org.apache.tika.detect.Detector Java Examples
The following examples show how to use
org.apache.tika.detect.Detector.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TikaUtil.java From scipio-erp with Apache License 2.0 | 6 votes |
/** * Finds media type (through Apache Tika library), based on filename and magic numbers. * @throws IOException */ public static MediaType findMediaType(InputStream is, String fileName) throws IOException { BufferedInputStream bis = new BufferedInputStream(is); try { AutoDetectParser parser = new AutoDetectParser(); Detector detector = parser.getDetector(); Metadata md = new Metadata(); md.add(Metadata.RESOURCE_NAME_KEY, fileName); MediaType mediaType = detector.detect(bis, md); return mediaType; } finally { try { bis.close(); } catch (IOException e) { ; } } }
Example #2
Source File: NodeTika.java From node-tika with MIT License | 6 votes |
public static String detectContentType(String uri) throws FileNotFoundException, IOException, TikaException { final Detector detector = config.getDetector(); final TikaInputStream inputStream = createInputStream(uri); final Metadata metadata = new Metadata(); // Set the file name. This provides some level of type-hinting. metadata.add(TikaMetadataKeys.RESOURCE_NAME_KEY, new File(uri).getName()); // Detect the content type. String contentType = detector.detect(inputStream, metadata).toString(); inputStream.close(); // Return the default content-type if undetermined. if (contentType == null || contentType.isEmpty()) { return MediaType.OCTET_STREAM.toString(); } return contentType; }
Example #3
Source File: TikaProcessor.java From quarkus with Apache License 2.0 | 5 votes |
@BuildStep @Record(ExecutionTime.STATIC_INIT) void initializeTikaParser(BeanContainerBuildItem beanContainer, TikaRecorder recorder, BuildProducer<ServiceProviderBuildItem> serviceProvider, TikaConfiguration configuration) throws Exception { Map<String, List<TikaParserParameter>> parsers = getSupportedParserConfig(configuration.tikaConfigPath, configuration.parsers, configuration.parserOptions, configuration.parser); String tikaXmlConfiguration = generateTikaXmlConfiguration(parsers); serviceProvider.produce(new ServiceProviderBuildItem(Parser.class.getName(), new ArrayList<>(parsers.keySet()))); serviceProvider .produce(new ServiceProviderBuildItem(Detector.class.getName(), getProviderNames(Detector.class.getName()))); serviceProvider.produce(new ServiceProviderBuildItem(EncodingDetector.class.getName(), getProviderNames(EncodingDetector.class.getName()))); recorder.initTikaParser(beanContainer.getValue(), configuration, tikaXmlConfiguration); }
Example #4
Source File: NodeTika.java From node-tika with MIT License | 5 votes |
public static String detectContentTypeAndCharset(String uri) throws FileNotFoundException, IOException, TikaException { final Detector detector = config.getDetector(); final TikaInputStream inputStream = createInputStream(uri); final Metadata metadata = new Metadata(); // Set the file name. This provides some level of type-hinting. metadata.add(TikaMetadataKeys.RESOURCE_NAME_KEY, new File(uri).getName()); // Detect the content type. String contentType = detector.detect(inputStream, metadata).toString(); // Use metadata to provide type-hinting to the AutoDetectReader. fillMetadata(metadata, contentType, uri); // Detect the character set. final AutoDetectReader reader = new AutoDetectReader(inputStream, metadata); String charset = reader.getCharset().toString(); inputStream.close(); // Return the default content-type if undetermined. if (contentType == null || contentType.isEmpty()) { return MediaType.OCTET_STREAM.toString(); } // Append the charset if the content-type was determined. if (charset != null && !charset.isEmpty()) { return contentType + "; charset=" + charset; } return contentType; }
Example #5
Source File: TikaAnalysis.java From tutorials with MIT License | 5 votes |
public static String detectDocTypeUsingDetector(InputStream stream) throws IOException { Detector detector = new DefaultDetector(); Metadata metadata = new Metadata(); MediaType mediaType = detector.detect(stream, metadata); return mediaType.toString(); }
Example #6
Source File: TikaContentExtractor.java From cxf with Apache License 2.0 | 4 votes |
/** * Create new Tika-based content extractor using the provided parser instances. * @param parsers parser instances */ public TikaContentExtractor(final List<Parser> parsers, Detector detector) { this.parsers = parsers; this.detector = detector; }
Example #7
Source File: DetectMimeTypeBuilder.java From kite with Apache License 2.0 | 4 votes |
protected Detector getDetector() { return detector; }