Java Code Examples for org.apache.tika.metadata.Metadata#add()
The following examples show how to use
org.apache.tika.metadata.Metadata#add() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ExecUtil.java From ctsms with GNU Lesser General Public License v2.1 | 6 votes |
public static String getMimeType(byte[] data, String fileName) throws Throwable { TikaInputStream tikaStream = null; Metadata metadata = new Metadata(); metadata.add(TikaMetadataKeys.RESOURCE_NAME_KEY, fileName); try { tikaStream = TikaInputStream.get(data, metadata); return detector.detect(tikaStream, metadata).toString(); } catch (Throwable t) { throw t; } finally { if (tikaStream != null) { try { tikaStream.close(); } catch (IOException e) { } } } }
Example 2
Source File: ExecUtil.java From ctsms with GNU Lesser General Public License v2.1 | 6 votes |
public static String getMimeType(File file) throws Throwable { TikaInputStream tikaStream = null; Metadata metadata = new Metadata(); metadata.add(TikaMetadataKeys.RESOURCE_NAME_KEY, file.getName()); try { tikaStream = TikaInputStream.get(file, metadata); return detector.detect(tikaStream, metadata).toString(); } catch (Throwable t) { throw t; } finally { if (tikaStream != null) { try { tikaStream.close(); } catch (IOException e) { } } } }
Example 3
Source File: TikaUtil.java From scipio-erp with Apache License 2.0 | 6 votes |
/** * Finds media type (through Apache Tika library), based on filename and magic numbers. * @throws IOException */ public static MediaType findMediaType(InputStream is, String fileName) throws IOException { BufferedInputStream bis = new BufferedInputStream(is); try { AutoDetectParser parser = new AutoDetectParser(); Detector detector = parser.getDetector(); Metadata md = new Metadata(); md.add(Metadata.RESOURCE_NAME_KEY, fileName); MediaType mediaType = detector.detect(bis, md); return mediaType; } finally { try { bis.close(); } catch (IOException e) { ; } } }
Example 4
Source File: StoredFile.java From openmeetings with Apache License 2.0 | 6 votes |
private void init(String inName, String inExt, InputStream is) { if (Strings.isEmpty(inExt)) { int idx = inName.lastIndexOf('.'); name = idx < 0 ? inName : inName.substring(0, idx); ext = getFileExt(inName); } else { name = inName; ext = inExt.toLowerCase(Locale.ROOT); } Metadata md = new Metadata(); md.add(RESOURCE_NAME_KEY, String.format(FILE_NAME_FMT, name, ext)); try { mime = tika.getDetector().detect(is == null ? null : TikaInputStream.get(is), md); } catch (Throwable e) { mime = null; log.error("Unexpected exception while detecting mime type", e); } }
Example 5
Source File: NodeTika.java From node-tika with MIT License | 6 votes |
public static String detectContentType(String uri) throws FileNotFoundException, IOException, TikaException { final Detector detector = config.getDetector(); final TikaInputStream inputStream = createInputStream(uri); final Metadata metadata = new Metadata(); // Set the file name. This provides some level of type-hinting. metadata.add(TikaMetadataKeys.RESOURCE_NAME_KEY, new File(uri).getName()); // Detect the content type. String contentType = detector.detect(inputStream, metadata).toString(); inputStream.close(); // Return the default content-type if undetermined. if (contentType == null || contentType.isEmpty()) { return MediaType.OCTET_STREAM.toString(); } return contentType; }
Example 6
Source File: FileParser.java From scava with Eclipse Public License 2.0 | 5 votes |
public static boolean isSupported(File file) throws FileNotFoundException, IOException { FileInputStream fis = fileToInputStream(file); BufferedInputStream bif = new BufferedInputStream(fis); Metadata metadata = new Metadata(); metadata.add(Metadata.RESOURCE_NAME_KEY, file.getName()); boolean supported = isSupported(bif, metadata); bif.close(); fis.close(); return supported; }
Example 7
Source File: FileParser.java From scava with Eclipse Public License 2.0 | 5 votes |
/** * * @param file * @return Null if the file is not supported * @throws Exception */ public static FileContent extractText(File file) throws Exception { FileInputStream fis = fileToInputStream(file); BufferedInputStream bif = new BufferedInputStream(fis); Metadata metadata = new Metadata(); metadata.add(Metadata.RESOURCE_NAME_KEY, file.getName()); FileContent fileContent = extractText(bif, metadata); bif.close(); fis.close(); return fileContent; }
Example 8
Source File: ParseResultTest.java From beam with Apache License 2.0 | 5 votes |
static Metadata getMetadata() { Metadata m = new Metadata(); m.add("Author", "BeamTikaUser"); m.add("Author", "BeamTikaUser2"); m.add("Date", "2017-09-01"); return m; }
Example 9
Source File: TikaExtractor.java From ache with Apache License 2.0 | 5 votes |
private Metadata createMetadata(String fileName, String contentType) { Metadata metadata = new Metadata(); if(fileName != null) { metadata.add(Metadata.RESOURCE_NAME_KEY, fileName); } if(contentType != null) { metadata.add(Metadata.CONTENT_TYPE, contentType); } return metadata; }
Example 10
Source File: FetchedResultTest.java From ache with Apache License 2.0 | 5 votes |
/** * Test method for {@link crawlercommons.fetcher.FetchedResult#report()}. * This does not actually test anything but simply allows us to see what a * generated report would look like. * @throws UnsupportedEncodingException */ @Test public void testPrintReport() throws UnsupportedEncodingException { Metadata headerMetadata = new Metadata(); headerMetadata.add(Metadata.CONTENT_DISPOSITION, "This is content disposition"); headerMetadata.add(Metadata.CONTENT_ENCODING, "This is the encoding"); headerMetadata.add(Metadata.CONTENT_LANGUAGE, "This is some language"); headerMetadata.add(Metadata.CONTENT_LENGTH, "This is the length"); Payload load = new Payload(); load.put("Item 1", 1234); load.put("Item 2", 5678); load.put("Item 3", 1357); load.put("Item 4", 2468); FetchedResult result = new FetchedResult // (, , , headers, content, contentType, responseRate, payload, // newBaseUrl, numRedirects, hostAddress, statusCode, reasonPhrase) ( "http://en.wikipedia.org/wiki/Glasgow", // baseUrl "http://en.wikipedia.org/wiki/Glasgow", // redirectedUrl System.currentTimeMillis(), // fetchTime headerMetadata, new String("Glasgow (/ˈɡlɑːzɡoʊ, ˈɡlæz-/;[4] Scots: Glesca; Scottish Gaelic: Glaschu) " + "is the largest city in Scotland, and the third largest in the United Kingdom.").getBytes("UTF-8"), "ScotsText", 2014, load, "http://en.wikipedia.org/wiki/Glasgow", 0, "wikipedia.org", 200, ""); LOG.error(result.report()); }
Example 11
Source File: ExcelHtmlParser.java From components with Apache License 2.0 | 5 votes |
public static List<List<String>> getRows(InputStream rawContent, String encoding, long limit) { SimpleValuesContentHandler valuesContentHandler = new SimpleValuesContentHandler(-1, limit); HtmlParser htmlParser = new HtmlParser(); Metadata metadata = new Metadata(); metadata.add(Metadata.CONTENT_ENCODING, encoding); try { htmlParser.parse(rawContent, valuesContentHandler, metadata, new ParseContext()); } catch (Exception e) { LOGGER.debug("Failed to parse the excel html format document.", e); } return valuesContentHandler.getValues(); }
Example 12
Source File: NodeTika.java From node-tika with MIT License | 5 votes |
private static void fillMetadata(Metadata metadata, String contentType, String uri) { // Set the file name. if (uri != null) { metadata.set(TikaMetadataKeys.RESOURCE_NAME_KEY, new File(uri).getName()); } // Normalise the content-type. contentType = normalizeContentType(contentType); // Set the content-type. if (contentType != null) { metadata.add(HttpHeaders.CONTENT_TYPE, contentType); } }
Example 13
Source File: NodeTika.java From node-tika with MIT License | 5 votes |
public static String detectContentTypeAndCharset(String uri) throws FileNotFoundException, IOException, TikaException { final Detector detector = config.getDetector(); final TikaInputStream inputStream = createInputStream(uri); final Metadata metadata = new Metadata(); // Set the file name. This provides some level of type-hinting. metadata.add(TikaMetadataKeys.RESOURCE_NAME_KEY, new File(uri).getName()); // Detect the content type. String contentType = detector.detect(inputStream, metadata).toString(); // Use metadata to provide type-hinting to the AutoDetectReader. fillMetadata(metadata, contentType, uri); // Detect the character set. final AutoDetectReader reader = new AutoDetectReader(inputStream, metadata); String charset = reader.getCharset().toString(); inputStream.close(); // Return the default content-type if undetermined. if (contentType == null || contentType.isEmpty()) { return MediaType.OCTET_STREAM.toString(); } // Append the charset if the content-type was determined. if (charset != null && !charset.isEmpty()) { return contentType + "; charset=" + charset; } return contentType; }
Example 14
Source File: TikaDetector.java From spring-boot-email-tools with Apache License 2.0 | 4 votes |
private org.springframework.http.MediaType detect(final TikaInputStream stream, final String fileName) throws IOException { final Metadata metadata = new Metadata(); metadata.add(RESOURCE_NAME_KEY, fileName); final MediaType mediaType = detector.detect(requireNonNull(stream), metadata); return toSpringMediaType(mediaType); }