org.apache.tika.detect.DefaultDetector Java Examples
The following examples show how to use
org.apache.tika.detect.DefaultDetector.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TikaPoweredContainerExtractor.java From alfresco-repository with GNU Lesser General Public License v3.0 | 5 votes |
/** * Injects the TikaConfig to use * * @param tikaConfig The Tika Config to use */ public void setTikaConfig(TikaConfig tikaConfig) { this.config = tikaConfig; // Setup the detector and parser detector = new DefaultDetector(config.getMimeRepository()); parser = new AutoDetectParser(detector); }
Example #2
Source File: MimeTypeUtils.java From oodt with Apache License 2.0 | 5 votes |
public MimeTypeUtils(InputStream mimeIs, boolean magic) { try { this.mimeTypes = MimeTypesFactory.create(mimeIs); this.mimeMagic = magic; this.tika = new Tika(new DefaultDetector(this.mimeTypes)); }catch (Exception e) { LOG.log(Level.SEVERE, "Failed to load MimeType Registry : " + e.getMessage(), e); } }
Example #3
Source File: TikaAnalysis.java From tutorials with MIT License | 5 votes |
public static String detectDocTypeUsingDetector(InputStream stream) throws IOException { Detector detector = new DefaultDetector(); Metadata metadata = new Metadata(); MediaType mediaType = detector.detect(stream, metadata); return mediaType.toString(); }
Example #4
Source File: MimetypeMap.java From alfresco-data-model with GNU Lesser General Public License v3.0 | 4 votes |
/** * Initialises the map using the configuration service provided */ public void init() { PropertyCheck.mandatory(this, "configService", configService); PropertyCheck.mandatory(this, "contentCharsetFinder", contentCharsetFinder); // Do we have any properties that indicate we will read JSON? if (mimetypeJsonConfigDir != null || jsonObjectMapper != null || cronExpression != null || initialAndOnErrorCronExpression != null) { PropertyCheck.mandatory(this, "jsonObjectMapper", jsonObjectMapper); // If we have a cronExpression it indicates that we will schedule reading. if (cronExpression != null) { PropertyCheck.mandatory(this, "initialAndOnErrorCronExpression", initialAndOnErrorCronExpression); } jsonConfigFileFinder = new ConfigFileFinder(jsonObjectMapper) { @Override protected void readJson(JsonNode jsonNode, String readFromMessage, String baseUrl) throws IOException { try { JsonNode mediaTypes = jsonNode.get("mediaTypes"); if (mediaTypes != null && mediaTypes.isArray()) { List<ConfigElement> mimetypes = new ArrayList<>(); for (JsonNode mediaType : mediaTypes) { MediaTypeDef def = jsonObjectMapper.convertValue(mediaType, MediaTypeDef.class); GenericConfigElement mimetype = new GenericConfigElement(ATTR_MIMETYPE); mimetype.addAttribute(ATTR_DISPLAY, def.name); mimetype.addAttribute(ATTR_MIMETYPE, def.mediaType); if (def.text) { mimetype.addAttribute(ATTR_TEXT, Boolean.TRUE.toString()); } GenericConfigElement ext = null; int count = 0; for (ExtensionDef extension : def.extensions) { ext = new GenericConfigElement(ATTR_EXTENSION); ext.setValue(extension.extension); if (extension.name != null && !extension.name.isBlank()) { ext.addAttribute(ATTR_DISPLAY, extension.name); } if (extension.isDefault) { ext.addAttribute(ATTR_DEFAULT, Boolean.TRUE.toString()); } mimetype.addChild(ext); count++; } if (count == 1 && ext.getAttribute(ATTR_DEFAULT) == null) { ext.addAttribute(ATTR_DEFAULT, Boolean.TRUE.toString()); } mimetypes.add(mimetype); } registerMimetypes(mimetypes); Data data = getData(); data.fileCount++; } } catch (IllegalArgumentException e) { logger.error("Error reading "+readFromMessage+" "+e.getMessage()); } } }; } // TikaConfig should be given, but work around it if not if (tikaConfig == null) { logger.warn("TikaConfig spring parameter not supplied, using default config"); setTikaConfig(TikaConfig.getDefaultConfig()); } // Create our Tika mimetype detector up-front // We can then be sure we only have the one, so it's quick (ALF-10813) detector = new DefaultDetector(tikaConfig.getMimeRepository()); // Work out the mappings - only runs once and straight away if cronExpression is null configScheduler.run(true, logger, cronExpression, initialAndOnErrorCronExpression); }
Example #5
Source File: AssetMimeHandler.java From usergrid with Apache License 2.0 | 4 votes |
AssetMimeHandler() { detector = new DefaultDetector(); }
Example #6
Source File: TikaContentExtractor.java From cxf with Apache License 2.0 | 2 votes |
/** * Create new Tika-based content extractor using the provided parser instances. * @param parsers parser instances */ public TikaContentExtractor(final List<Parser> parsers) { this(parsers, new DefaultDetector()); }
Example #7
Source File: TikaContentExtractor.java From cxf with Apache License 2.0 | 2 votes |
/** * Create new Tika-based content extractor using the provided parser instance and * optional media type validation. If validation is enabled, the implementation parser * will try to detect the media type of the input and validate it against media types * supported by the parser. * @param parser parser instance * @param validateMediaType enabled or disable media type validationparser */ public TikaContentExtractor(final Parser parser, final boolean validateMediaType) { this(Collections.singletonList(parser), validateMediaType ? new DefaultDetector() : null); }