Java Code Examples for org.apache.tika.io.TikaInputStream#close()
The following examples show how to use
org.apache.tika.io.TikaInputStream#close() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TesseractOCRParser.java From CogStack-Pipeline with Apache License 2.0 | 6 votes |
public void parse(Image image, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { TemporaryResources tmp = new TemporaryResources(); FileOutputStream fos = null; TikaInputStream tis = null; try { int w = image.getWidth(null); int h = image.getHeight(null); BufferedImage bImage = new BufferedImage(w, h, BufferedImage.TYPE_INT_RGB); File file = tmp.createTemporaryFile(); fos = new FileOutputStream(file); ImageIO.write(bImage, "png", fos); tis = TikaInputStream.get(file); parse(tis, handler, metadata, context); } finally { tmp.dispose(); if (tis != null) tis.close(); if (fos != null) fos.close(); } }
Example 2
Source File: ExecUtil.java From ctsms with GNU Lesser General Public License v2.1 | 6 votes |
public static String getMimeType(byte[] data, String fileName) throws Throwable { TikaInputStream tikaStream = null; Metadata metadata = new Metadata(); metadata.add(TikaMetadataKeys.RESOURCE_NAME_KEY, fileName); try { tikaStream = TikaInputStream.get(data, metadata); return detector.detect(tikaStream, metadata).toString(); } catch (Throwable t) { throw t; } finally { if (tikaStream != null) { try { tikaStream.close(); } catch (IOException e) { } } } }
Example 3
Source File: ExecUtil.java From ctsms with GNU Lesser General Public License v2.1 | 6 votes |
public static String getMimeType(File file) throws Throwable { TikaInputStream tikaStream = null; Metadata metadata = new Metadata(); metadata.add(TikaMetadataKeys.RESOURCE_NAME_KEY, file.getName()); try { tikaStream = TikaInputStream.get(file, metadata); return detector.detect(tikaStream, metadata).toString(); } catch (Throwable t) { throw t; } finally { if (tikaStream != null) { try { tikaStream.close(); } catch (IOException e) { } } } }
Example 4
Source File: NodeTika.java From node-tika with MIT License | 6 votes |
public static String extractMeta(String uri, String contentType) throws Exception { final AutoDetectParser parser = createParser(); final Metadata metadata = new Metadata(); fillMetadata(parser, metadata, contentType, uri); final TikaInputStream inputStream = createInputStream(uri, metadata); parser.parse(inputStream, new DefaultHandler(), metadata); Map meta = new HashMap(); for (String name : metadata.names()) { String[] values = metadata.getValues(name); meta.put(name, values); } inputStream.close(); return new Gson().toJson(meta); }
Example 5
Source File: NodeTika.java From node-tika with MIT License | 6 votes |
public static String detectContentType(String uri) throws FileNotFoundException, IOException, TikaException { final Detector detector = config.getDetector(); final TikaInputStream inputStream = createInputStream(uri); final Metadata metadata = new Metadata(); // Set the file name. This provides some level of type-hinting. metadata.add(TikaMetadataKeys.RESOURCE_NAME_KEY, new File(uri).getName()); // Detect the content type. String contentType = detector.detect(inputStream, metadata).toString(); inputStream.close(); // Return the default content-type if undetermined. if (contentType == null || contentType.isEmpty()) { return MediaType.OCTET_STREAM.toString(); } return contentType; }
Example 6
Source File: NodeTika.java From node-tika with MIT License | 5 votes |
public static String detectContentTypeAndCharset(String uri) throws FileNotFoundException, IOException, TikaException { final Detector detector = config.getDetector(); final TikaInputStream inputStream = createInputStream(uri); final Metadata metadata = new Metadata(); // Set the file name. This provides some level of type-hinting. metadata.add(TikaMetadataKeys.RESOURCE_NAME_KEY, new File(uri).getName()); // Detect the content type. String contentType = detector.detect(inputStream, metadata).toString(); // Use metadata to provide type-hinting to the AutoDetectReader. fillMetadata(metadata, contentType, uri); // Detect the character set. final AutoDetectReader reader = new AutoDetectReader(inputStream, metadata); String charset = reader.getCharset().toString(); inputStream.close(); // Return the default content-type if undetermined. if (contentType == null || contentType.isEmpty()) { return MediaType.OCTET_STREAM.toString(); } // Append the charset if the content-type was determined. if (charset != null && !charset.isEmpty()) { return contentType + "; charset=" + charset; } return contentType; }
Example 7
Source File: ExtractMediaMetadata.java From nifi with Apache License 2.0 | 4 votes |
private Map<String, String> tika_parse(InputStream sourceStream, String prefix, Integer maxAttribs, Integer maxAttribLen) throws IOException, TikaException, SAXException { final Metadata metadata = new Metadata(); final TikaInputStream tikaInputStream = TikaInputStream.get(sourceStream); try { autoDetectParser.parse(tikaInputStream, new DefaultHandler(), metadata); } finally { tikaInputStream.close(); } final Map<String, String> results = new HashMap<>(); final Pattern metadataKeyFilter = metadataKeyFilterRef.get(); final StringBuilder dataBuilder = new StringBuilder(); for (final String key : metadata.names()) { if (metadataKeyFilter != null && !metadataKeyFilter.matcher(key).matches()) { continue; } dataBuilder.setLength(0); if (metadata.isMultiValued(key)) { for (String val : metadata.getValues(key)) { if (dataBuilder.length() > 1) { dataBuilder.append(", "); } if (dataBuilder.length() + val.length() < maxAttribLen) { dataBuilder.append(val); } else { dataBuilder.append("..."); break; } } } else { dataBuilder.append(metadata.get(key)); } if (prefix == null) { results.put(key, dataBuilder.toString().trim()); } else { results.put(prefix + key, dataBuilder.toString().trim()); } // cutoff at max if provided if (maxAttribs != null && results.size() >= maxAttribs) { break; } } return results; }
Example 8
Source File: NodeTika.java From node-tika with MIT License | 4 votes |
public static String extractText(String uri, Map<String, Object> options) throws Exception { final AutoDetectParser parser = createParser(); final Metadata metadata = new Metadata(); final ParseContext context = new ParseContext(); String outputEncoding = null; String contentType = null; int maxLength = -1; if (options != null) { Object option; option = options.get("outputEncoding"); if (option != null) { outputEncoding = option.toString(); } option = options.get("contentType"); if (option != null) { contentType = option.toString(); } option = options.get("maxLength"); if (option != null) { maxLength = (int)Float.parseFloat(option.toString()); } } if (outputEncoding == null) { outputEncoding = "UTF-8"; } fillMetadata(parser, metadata, contentType, uri); fillParseContext(context, options); final ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); final OutputStreamWriter writer = new OutputStreamWriter(outputStream, outputEncoding); final WriteOutContentHandler contentHandler = new WriteOutContentHandler(writer, maxLength); final TikaInputStream inputStream = createInputStream(uri, metadata); // Set up recursive parsing of archives. // See: http://wiki.apache.org/tika/RecursiveMetadata context.set(Parser.class, parser); context.set(EmbeddedDocumentExtractor.class, new ParsingEmbeddedDocumentExtractor(context)); try { parser.parse(inputStream, new BodyContentHandler(contentHandler), metadata, context); } catch (Throwable e) { if (!contentHandler.isWriteLimitReached(e)) { throw e; } else { writer.close(); } } finally { inputStream.close(); } return outputStream.toString(outputEncoding); }
Example 9
Source File: NodeTika.java From node-tika with MIT License | 4 votes |
public static String detectCharset(String uri, String contentType) throws FileNotFoundException, IOException, TikaException { final Metadata metadata = new Metadata(); // Use metadata to provide type-hinting to the AutoDetectReader. fillMetadata(metadata, contentType, uri); final TikaInputStream inputStream = createInputStream(uri, metadata); // Detect the character set. final AutoDetectReader reader = new AutoDetectReader(inputStream, metadata); String charset = reader.getCharset().toString(); inputStream.close(); return charset; }