org.apache.tika.mime.MimeType Java Examples

The following examples show how to use org.apache.tika.mime.MimeType. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TikaUtil.java    From scipio-erp with Apache License 2.0 6 votes vote down vote up
/**
 * Returns the top tika mime-type definition for the media type.
 * WARN: this only returns explicit defined mime-types (canonical), NOT aliases.
 * if exact true, will double-check that parameters match as well (not guaranteed by MimeTypes.getRegisteredMimeType).
 * FIXME: exact doesn't handle parameter order.
 */
public static MimeType getMimeTypeForMediaTypeSafe(String mediaType, MimeTypes mimeTypes, boolean exact) {
    try {
        MimeType mimeType = mimeTypes.getRegisteredMimeType(mediaType);
        if (mimeType != null && exact) {
            // NOTE: because the way getRegisteredMimeType works, it may return non-null
            // even if not exact name match, due to parameters.
            // FIXME: this check won't handle parameter order difference
            // also check if another normalize call would be more appropriate...
            if (!getMimeTypeId(mediaType).equals(getMimeTypeId(mimeType.getName()))) {
                return null;
            }
        }
        return mimeType;
    } catch (MimeTypeException e) {
        return null;
    }
}
 
Example #2
Source File: TikaUtil.java    From scipio-erp with Apache License 2.0 6 votes vote down vote up
public static String getMediaTypeDescriptionOrNullSafe(MediaType mediaType, MimeTypes mimeTypes) {
    MimeType mimeType = getMimeTypeForMediaTypeSafe(mediaType, mimeTypes, true);
    String description = null;

    if (mimeType != null) {
        description = mimeType.getDescription();
    } else {
        // when this prints, it's because of imperfections in tika-mimetypes.xml...
        Debug.logWarning("No Tika mime-type for MediaType: " + mediaType.toString(), module);
    }
    if (UtilValidate.isNotEmpty(description)) {
        return description;
    } else {
        MediaTypeRegistry registry = mimeTypes.getMediaTypeRegistry();

        // check if can find one in supertype
        MediaType superType = registry.getSupertype(mediaType);
        if (superType != null) {
            description = getMediaTypeDescriptionOrNullSafe(superType, mimeTypes);
            if (UtilValidate.isNotEmpty(description)) {
                return description + " (sub-type)";
            }
        }
    }
    return null;
}
 
Example #3
Source File: MediaUtils.java    From desktopclient-java with GNU General Public License v3.0 6 votes vote down vote up
public static String extensionForMIME(String mimeType) {
    if (mimeType.isEmpty())
        return DEFAULT_EXT;

    MimeType mime = null;
    try {
        mime = MimeTypes.getDefaultMimeTypes().forName(mimeType);
    } catch (MimeTypeException ex) {
        LOGGER.log(Level.WARNING, "can't find mimetype", ex);
    }

    String m = mime != null ? mime.getExtension() : "";
    // remove dot
    if (!m.isEmpty())
        m = m.substring(1);
    return StringUtils.defaultIfEmpty(m, DEFAULT_EXT);
}
 
Example #4
Source File: ReferenceResource.java    From oodt with Apache License 2.0 5 votes vote down vote up
/**
 * Gets the name of the MIME type for the reference.
 * @return the name of the MIME type for the reference
 */
@XmlElement(name = "mimeType")
public String getMimeTypeName()
{
  MimeType m = reference.getMimeType();
  if (m != null)
  {
    return m.getName();
  }
  return null;
}
 
Example #5
Source File: IdentifyMimeType.java    From localization_nifi with Apache License 2.0 4 votes vote down vote up
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) {
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }

    final ComponentLog logger = getLogger();
    final AtomicReference<String> mimeTypeRef = new AtomicReference<>(null);
    final String filename = flowFile.getAttribute(CoreAttributes.FILENAME.key());

    session.read(flowFile, new InputStreamCallback() {
        @Override
        public void process(final InputStream stream) throws IOException {
            try (final InputStream in = new BufferedInputStream(stream)) {
                TikaInputStream tikaStream = TikaInputStream.get(in);
                Metadata metadata = new Metadata();
                // Add filename if it exists
                if (filename != null) {
                    metadata.add(TikaMetadataKeys.RESOURCE_NAME_KEY, filename);
                }
                // Get mime type
                MediaType mediatype = detector.detect(tikaStream, metadata);
                mimeTypeRef.set(mediatype.toString());
            }
        }
    });

    String mimeType = mimeTypeRef.get();
    String extension = "";
    try {
        MimeType mimetype;
        mimetype = config.getMimeRepository().forName(mimeType);
        extension = mimetype.getExtension();
    } catch (MimeTypeException ex) {
        logger.warn("MIME type extension lookup failed: {}", new Object[]{ex});
    }

    // Workaround for bug in Tika - https://issues.apache.org/jira/browse/TIKA-1563
    if (mimeType != null && mimeType.equals("application/gzip") && extension.equals(".tgz")) {
        extension = ".gz";
    }

    if (mimeType == null) {
        flowFile = session.putAttribute(flowFile, CoreAttributes.MIME_TYPE.key(), "application/octet-stream");
        flowFile = session.putAttribute(flowFile, "mime.extension", "");
        logger.info("Unable to identify MIME Type for {}; setting to application/octet-stream", new Object[]{flowFile});
    } else {
        flowFile = session.putAttribute(flowFile, CoreAttributes.MIME_TYPE.key(), mimeType);
        flowFile = session.putAttribute(flowFile, "mime.extension", extension);
        logger.info("Identified {} as having MIME Type {}", new Object[]{flowFile, mimeType});
    }

    session.getProvenanceReporter().modifyAttributes(flowFile);
    session.transfer(flowFile, REL_SUCCESS);
}
 
Example #6
Source File: MimeUtil.java    From anthelion with Apache License 2.0 4 votes vote down vote up
/**
 * A facade interface to trying all the possible mime type resolution
 * strategies available within Tika. First, the mime type provided in
 * <code>typeName</code> is cleaned, with {@link #cleanMimeType(String)}.
 * Then the cleaned mime type is looked up in the underlying Tika
 * {@link MimeTypes} registry, by its cleaned name. If the {@link MimeType} is
 * found, then that mime type is used, otherwise URL resolution is
 * used to try and determine the mime type. If that means is unsuccessful, and
 * if <code>mime.type.magic</code> is enabled in {@link NutchConfiguration},
 * then mime type magic resolution is used to try and obtain a
 * better-than-the-default approximation of the {@link MimeType}.
 * 
 * @param typeName
 *          The original mime type, returned from a {@link ProtocolOutput}.
 * @param url
 *          The given @see url, that Nutch was trying to crawl.
 * @param data
 *          The byte data, returned from the crawl, if any.
 * @return The correctly, automatically guessed {@link MimeType} name.
 */
public String autoResolveContentType(String typeName, String url, byte[] data) {
  String retType = null;
  String magicType = null;
  MimeType type = null;
  String cleanedMimeType = null;

  try {
    cleanedMimeType = MimeUtil.cleanMimeType(typeName) != null ? this.mimeTypes
        .forName(MimeUtil.cleanMimeType(typeName)).getName()
        : null;
  } catch (MimeTypeException mte) {
    // Seems to be a malformed mime type name...
  }

  // first try to get the type from the cleaned type name
  try {
    type = cleanedMimeType != null ? this.mimeTypes.forName(cleanedMimeType)
        : null;
  } catch (MimeTypeException e) {
    type = null;
  }

  // if returned null, or if it's the default type then try url resolution
  if (type == null
      || (type != null && type.getName().equals(MimeTypes.OCTET_STREAM))) {
    // If no mime-type header, or cannot find a corresponding registered
    // mime-type, then guess a mime-type from the url pattern
    type = this.mimeTypes.getMimeType(url) != null ? this.mimeTypes
        .getMimeType(url) : type;
  }

  retType= type.getName();

  // if magic is enabled use mime magic to guess if the mime type returned
  // from the magic guess is different than the one that's already set so far
  // if it is, and it's not the default mime type, then go with the mime type
  // returned by the magic
  if (this.mimeMagic) {
    magicType = tika.detect(data);

    // Deprecated in Tika 1.0 See https://issues.apache.org/jira/browse/NUTCH-1230
    //MimeType magicType = this.mimeTypes.getMimeType(data);
    if (magicType != null && !magicType.equals(MimeTypes.OCTET_STREAM)
        && !magicType.equals(MimeTypes.PLAIN_TEXT)
        && retType != null && !retType.equals(magicType)) {

      // If magic enabled and the current mime type differs from that of the
      // one returned from the magic, take the magic mimeType
      retType = magicType;
    }

    // if type is STILL null after all the resolution strategies, go for the
    // default type
    if (retType == null) {
      try {
        retType = MimeTypes.OCTET_STREAM;
      } catch (Exception ignore) {
      }
    }
  }

  return retType;
}
 
Example #7
Source File: IdentifyMimeType.java    From nifi with Apache License 2.0 4 votes vote down vote up
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) {
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }

    final ComponentLog logger = getLogger();
    final AtomicReference<String> mimeTypeRef = new AtomicReference<>(null);
    final String filename = flowFile.getAttribute(CoreAttributes.FILENAME.key());

    session.read(flowFile, new InputStreamCallback() {
        @Override
        public void process(final InputStream stream) throws IOException {
            try (final InputStream in = new BufferedInputStream(stream)) {
                TikaInputStream tikaStream = TikaInputStream.get(in);
                Metadata metadata = new Metadata();

                if (filename != null && context.getProperty(USE_FILENAME_IN_DETECTION).asBoolean()) {
                    metadata.add(TikaMetadataKeys.RESOURCE_NAME_KEY, filename);
                }
                // Get mime type
                MediaType mediatype = detector.detect(tikaStream, metadata);
                mimeTypeRef.set(mediatype.toString());
            }
        }
    });

    String mimeType = mimeTypeRef.get();
    String extension = "";
    try {
        MimeType mimetype;
        mimetype = mimeTypes.forName(mimeType);
        extension = mimetype.getExtension();
    } catch (MimeTypeException ex) {
        logger.warn("MIME type extension lookup failed: {}", new Object[]{ex});
    }

    // Workaround for bug in Tika - https://issues.apache.org/jira/browse/TIKA-1563
    if (mimeType != null && mimeType.equals("application/gzip") && extension.equals(".tgz")) {
        extension = ".gz";
    }

    if (mimeType == null) {
        flowFile = session.putAttribute(flowFile, CoreAttributes.MIME_TYPE.key(), "application/octet-stream");
        flowFile = session.putAttribute(flowFile, "mime.extension", "");
        logger.info("Unable to identify MIME Type for {}; setting to application/octet-stream", new Object[]{flowFile});
    } else {
        flowFile = session.putAttribute(flowFile, CoreAttributes.MIME_TYPE.key(), mimeType);
        flowFile = session.putAttribute(flowFile, "mime.extension", extension);
        logger.info("Identified {} as having MIME Type {}", new Object[]{flowFile, mimeType});
    }

    session.getProvenanceReporter().modifyAttributes(flowFile);
    session.transfer(flowFile, REL_SUCCESS);
}
 
Example #8
Source File: ReferenceResource.java    From oodt with Apache License 2.0 4 votes vote down vote up
/**
 * Gets the MIME type for the reference.
 * @return the MIME type for the reference
 */
public MimeType getMimeType()
{
  return reference.getMimeType();
}
 
Example #9
Source File: SikuliService.java    From cerberus-source with GNU General Public License v3.0 4 votes vote down vote up
private JSONObject generatePostParameters(String action, String locator, String text, long defaultWait) throws JSONException, IOException, MalformedURLException, MimeTypeException {
        JSONObject result = new JSONObject();
        String picture = "";
        String extension = "";
        /**
         * Get Picture from URL and convert to Base64
         */
        if (locator != null && !"".equals(locator)) {
            URL url = new URL(locator);
            URLConnection connection = url.openConnection();

            InputStream istream = new BufferedInputStream(connection.getInputStream());

            /**
             * Get the MimeType and the extension
             */
            String mimeType = URLConnection.guessContentTypeFromStream(istream);
            MimeTypes allTypes = MimeTypes.getDefaultMimeTypes();
            MimeType mt = allTypes.forName(mimeType);
            extension = mt.getExtension();

            /**
             * Encode in Base64
             */
            byte[] bytes = IOUtils.toByteArray(istream);
            picture = Base64.encodeBase64URLSafeString(bytes);
        }
        /**
         * Build JSONObject with parameters action : Action expected to be done
         * by Sikuli picture : Picture in Base64 format text : Text to type
         * defaultWait : Timeout for the action pictureExtension : Extension for
         * Base64 decoding
         */
        result.put("action", action);
        result.put("picture", picture);
        result.put("text", text);
        result.put("defaultWait", defaultWait);
        result.put("pictureExtension", extension);
//        result.put("minSimilarity", parameterService.getParameterStringByKey("cerberus_sikuli_minSimilarity", "", null));
        return result;
    }
 
Example #10
Source File: Reference.java    From oodt with Apache License 2.0 3 votes vote down vote up
/**
 * <p>
 * Constructs a new Reference with the specified parameters. In particular,
 * a MimeType object is explicitly supplied. This object represents the
 * mime-type of the item this reference refers to
 * </p>
 * 
 * @param origRef
 *            The item's original location.
 * @param dataRef
 *            The item's location within the data store.
 * @param size
 *            The size of the file that this reference refers to.
 * @param mime
 *            A MimeType object representing the mime-type of the item
 */
public Reference(String origRef, String dataRef, long size, MimeType mime) {
    origReference = origRef;
    dataStoreReference = dataRef;
    fileSize = size;
    mimeType = mime;
}
 
Example #11
Source File: TikaUtil.java    From scipio-erp with Apache License 2.0 2 votes vote down vote up
/**
 * WARN: this only returns explicit defined mime-types (canonical), NOT aliases.
 * FIXME: exact doesn't handle parameter order.
 */
public static MimeType getMimeTypeForMediaTypeSafe(MediaType mediaType, MimeTypes mimeTypes, boolean exact) {
    return getMimeTypeForMediaTypeSafe(mediaType.toString(), mimeTypes, exact);
}
 
Example #12
Source File: Reference.java    From oodt with Apache License 2.0 2 votes vote down vote up
/**
 * @return returns a MimeType obj representing the mime-type of this
 *         reference
 */
public MimeType getMimeType() {
    return mimeType;
}
 
Example #13
Source File: Reference.java    From oodt with Apache License 2.0 2 votes vote down vote up
/**
 * @param mime
 *            the MimeType object to set for this reference
 */
public void setMimeType(MimeType mime) {
    this.mimeType = mime;
}