org.apache.tika.sax.BodyContentHandler Java Examples
The following examples show how to use
org.apache.tika.sax.BodyContentHandler.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ContentExtractor.java From jate with GNU Lesser General Public License v3.0 | 6 votes |
private String parseTXTToString(InputStream stream, Metadata metadata) throws IOException, TikaException { WriteOutContentHandler handler = new WriteOutContentHandler(maxStringLength); try { ParseContext context = new ParseContext(); context.set(Parser.class, txtParser); txtParser.parse(stream, new BodyContentHandler(handler), metadata, context); } catch (SAXException e) { if (!handler.isWriteLimitReached(e)) { // This should never happen with BodyContentHandler... throw new TikaException("Unexpected SAX processing failure", e); } } finally { stream.close(); } return handler.toString(); }
Example #2
Source File: TikaExtractor.java From ache with Apache License 2.0 | 6 votes |
public ParsedData parse(InputStream stream, String fileName, String contentType) { BodyContentHandler handler = new BodyContentHandler(MAX_CHARACTERS); BoilerpipeContentHandler textHandler = new BoilerpipeContentHandler(handler, KeepEverythingExtractor.INSTANCE); Metadata metadata = createMetadata(fileName, contentType); ParseContext context = new ParseContext(); try { parser.parse(stream, textHandler, metadata, context); Map<String, String> metadataMap = new HashMap<String, String>(); for (String propertyName : metadata.names()) { metadataMap.put(propertyName, metadata.get(propertyName)); } return new ParsedData(handler.toString(), metadataMap); } catch (IOException | SAXException | TikaException e) { logger.error("Failed to extract metadata using Tika.", e); return null; } }
Example #3
Source File: TikaContentExtractor.java From baleen with Apache License 2.0 | 6 votes |
@Override public void doProcessStream(InputStream stream, String source, JCas jCas) throws IOException { super.doProcessStream(stream, source, jCas); try { BodyContentHandler textHandler = new BodyContentHandler(Integer.MAX_VALUE); Metadata metadata = new Metadata(); ParseContext context = new ParseContext(); AutoDetectParser autoParser = new AutoDetectParser(); autoParser.parse(stream, textHandler, metadata, context); jCas.setDocumentText(textHandler.toString()); for (String name : metadata.names()) { addMetadata(jCas, name, metadata.get(name)); } } catch (SAXException | TikaException e) { getMonitor().warn("Couldn't parse metadata from '{}'", source, e); if (Strings.isNullOrEmpty(jCas.getDocumentText())) { jCas.setDocumentText(CORRUPT_FILE_TEXT); } } }
Example #4
Source File: FTConnector.java From openprodoc with GNU Affero General Public License v3.0 | 6 votes |
/** * * @param Bytes * @return * @throws PDException */ protected String Convert(InputStream Bytes) throws PDException { try { ContentHandler textHandler=new BodyContentHandler(-1); Metadata metadata=new Metadata(); Parser parser=new AutoDetectParser(); ParseContext context=new ParseContext(); parser.parse(Bytes, textHandler, metadata, context); FileMetadata=""; for (String key : metadata.names()) FileMetadata+=key+"="+metadata.get(key)+"\n"; FullText=textHandler.toString(); } catch (Exception ex) { PDException.GenPDException("Error_extracting_content_from_doc", ex.getLocalizedMessage()); } return(FullText); }
Example #5
Source File: EmbedSpawner.java From extract with MIT License | 6 votes |
@Override public void parseEmbedded(final InputStream input, final ContentHandler handler, final Metadata metadata, final boolean outputHtml) throws SAXException, IOException { // There's no need to spawn inline embeds, like images in PDFs. These should be concatenated to the main // document as usual. if (TikaCoreProperties.EmbeddedResourceType.INLINE.toString().equals(metadata .get(TikaCoreProperties.EMBEDDED_RESOURCE_TYPE))) { final ContentHandler embedHandler = new EmbeddedContentHandler(new BodyContentHandler(handler)); if (outputHtml) { writeStart(handler, metadata); } delegateParsing(input, embedHandler, metadata); if (outputHtml) { writeEnd(handler); } } else { try (final TikaInputStream tis = TikaInputStream.get(input)) { spawnEmbedded(tis, metadata); } } }
Example #6
Source File: EmbedParser.java From extract with MIT License | 5 votes |
@Override public void parseEmbedded(final InputStream input, final ContentHandler handler, final Metadata metadata, final boolean outputHtml) throws SAXException, IOException { if (outputHtml) { writeStart(handler, metadata); } delegateParsing(input, new EmbeddedContentHandler(new BodyContentHandler(handler)), metadata); if (outputHtml) { writeEnd(handler); } }
Example #7
Source File: TikaAnalysis.java From tutorials with MIT License | 5 votes |
public static Metadata extractMetadatatUsingParser(InputStream stream) throws IOException, SAXException, TikaException { Parser parser = new AutoDetectParser(); ContentHandler handler = new BodyContentHandler(); Metadata metadata = new Metadata(); ParseContext context = new ParseContext(); parser.parse(stream, handler, metadata, context); return metadata; }
Example #8
Source File: TikaAnalysis.java From tutorials with MIT License | 5 votes |
public static String extractContentUsingParser(InputStream stream) throws IOException, TikaException, SAXException { Parser parser = new AutoDetectParser(); ContentHandler handler = new BodyContentHandler(); Metadata metadata = new Metadata(); ParseContext context = new ParseContext(); parser.parse(stream, handler, metadata, context); return handler.toString(); }
Example #9
Source File: AttachAttribute.java From entando-components with GNU Lesser General Public License v3.0 | 5 votes |
@Override public String getIndexeableFieldValue() { StringBuilder buffer = new StringBuilder(); if (null != super.getIndexeableFieldValue()) { buffer.append(super.getIndexeableFieldValue()); } String extraValue = null; ResourceInterface resource = this.getResource(); if (resource != null) { InputStream is = ((AttachResource) resource).getResourceStream(); if (null != is) { AutoDetectParser parser = new AutoDetectParser(); BodyContentHandler handler = new BodyContentHandler(-1); Metadata metadata = new Metadata(); try { parser.parse(is, handler, metadata); extraValue = handler.toString(); } catch (Throwable t) { _logger.error("Error while processing the parsing", t); } finally { try { is.close(); } catch (IOException ex) { _logger.error("Error closing stream", ex); } } } } if (null != extraValue) { buffer.append(" ").append(extraValue); } return buffer.toString(); }
Example #10
Source File: JATEUtil.java From jate with GNU Lesser General Public License v3.0 | 5 votes |
public static String parseToPlainText(InputStream fileStream) { BodyContentHandler handler = new BodyContentHandler(); AutoDetectParser parser = new AutoDetectParser(); Metadata metadata = new Metadata(); String rawContent = ""; try { parser.parse(fileStream, handler, metadata); rawContent = handler.toString(); } catch (IOException | SAXException | TikaException e) { LOG.debug("Parsing Exception while extracting content from current file. " + e.toString()); } return rawContent; }
Example #11
Source File: TearlineContentExtractor.java From baleen with Apache License 2.0 | 5 votes |
@Override public void doProcessStream(InputStream stream, String source, JCas jCas) throws IOException { super.doProcessStream(stream, source, jCas); try { BodyContentHandler textHandler = new BodyContentHandler(Integer.MAX_VALUE); Metadata metadata = new Metadata(); ParseContext context = new ParseContext(); AutoDetectParser autoParser = new AutoDetectParser(); autoParser.parse(stream, textHandler, metadata, context); String fullContent = textHandler.toString(); Matcher m = tearlinePattern.matcher(fullContent); if (m.find()) { jCas.setDocumentText(removeBoilerplate(fullContent.substring(0, m.start())).trim()); } else { jCas.setDocumentText(removeBoilerplate(fullContent).trim()); } for (String name : metadata.names()) { addMetadata(jCas, name, metadata.get(name)); } } catch (SAXException | TikaException e) { getMonitor().warn("Couldn't parse metadata from '{}'", source, e); } }
Example #12
Source File: PdfParser.java From superword with Apache License 2.0 | 5 votes |
/** * 将PDF文件解析为文本 * @param file 本地PDF文件的相对路径或绝对路径 * @return 提取的文本 */ public static String parsePdfFileToPlainText(String file) { try(InputStream stream = new FileInputStream(file)) { BodyContentHandler handler = new BodyContentHandler(Integer.MAX_VALUE); Metadata metadata = new Metadata(); PARSER.parse(stream, handler, metadata); return handler.toString(); } catch (Exception e){ e.printStackTrace(); } return ""; }
Example #13
Source File: EmbeddedDocumentMemoryExtractor.java From extract with MIT License | 5 votes |
public TikaDocumentSource extract(final TikaDocument rootDocument, final String embeddedDocumentDigest) throws SAXException, TikaException, IOException { ParseContext context = new ParseContext(); ContentHandler handler = new BodyContentHandler(-1); context.set(Parser.class, parser); DigestEmbeddedDocumentExtractor extractor = new DigestEmbeddedDocumentExtractor(rootDocument, embeddedDocumentDigest, context, digester, algorithm); context.set(org.apache.tika.extractor.EmbeddedDocumentExtractor.class, extractor); parser.parse(new FileInputStream(rootDocument.getPath().toFile()), handler, rootDocument.getMetadata(), context); return extractor.getDocument(); }
Example #14
Source File: TikaTest.java From tika-server with Apache License 2.0 | 5 votes |
/** * Basic text extraction. * <p> * Tries to close input stream after processing. */ public String getText(InputStream is, Parser parser, ParseContext context, Metadata metadata) throws Exception{ ContentHandler handler = new BodyContentHandler(1000000); try { parser.parse(is, handler, metadata, context); } finally { is.close(); } return handler.toString(); }
Example #15
Source File: WidgetMacroLibraryTests.java From scipio-erp with Apache License 2.0 | 5 votes |
public void testFopMacroLibrary() throws Exception { String screentextUrl = screenUrl.concat("Fop"); HttpClient http = initHttpClient(); http.setUrl(screentextUrl.concat(authentificationQuery)); //FIXME need to check if the stream is an application-pdf that don't contains ftl stack trace InputStream screenInputStream = (InputStream) http.postStream(); assertNotNull("Response failed from ofbiz", screenInputStream); assertEquals("Response contentType isn't good : " + http.getResponseContentType(), "application/pdf;charset=UTF-8", http.getResponseContentType()); String screenOutString = ""; try { BodyContentHandler handler = new BodyContentHandler(Integer.MAX_VALUE); Metadata metadata = new Metadata(); new PDFParser().parse(screenInputStream, handler, metadata, new ParseContext()); screenOutString = handler.toString(); } finally { screenInputStream.close(); } //Test if a ftl macro error is present assertFalse("Fop Screen contains Macro on error : see " + screentextUrl + " for more detail", screenOutString.contains("FreeMarker template error:")); }
Example #16
Source File: PDFPreprocessorParserTest.java From CogStack-Pipeline with Apache License 2.0 | 5 votes |
@Test public void testParseRequiringNotRequiringOCR() throws Exception { System.out.println("parse"); InputStream stream = getClass().getClassLoader().getResourceAsStream("tika/testdocs/pdf_nonOCR_test.pdf"); AutoDetectParser parser = new AutoDetectParser(config); //AutoDetectParser parser = new AutoDetectParser(); BodyContentHandler body = new BodyContentHandler(); Metadata metadata = new Metadata(); try { parser.parse(stream, body, metadata); } finally { stream.close(); } assertTrue(body.toString().contains("An Example Paper")); }
Example #17
Source File: PDFPreprocessorParserTest.java From CogStack-Pipeline with Apache License 2.0 | 5 votes |
@Test public void testEncryptedWordDoc() throws Exception { System.out.println("testEncryptedWordDoc"); InputStream stream = getClass().getClassLoader().getResourceAsStream("encryptedWordDocx.docx"); AutoDetectParser parser = new AutoDetectParser(config); //PDFPreprocessorParser parser = new PDFPreprocessorParser(); BodyContentHandler body = new BodyContentHandler(); Metadata metadata = new Metadata(); try { parser.parse(stream, body, metadata); } catch (Exception ex) { //donowt } assertFalse(body.toString().contains("Word doc Encrypted")); }
Example #18
Source File: PDFPreprocessorParserTest.java From CogStack-Pipeline with Apache License 2.0 | 5 votes |
@Test public void testEncryptedPDFDoc() throws Exception { System.out.println("testEncryptedPDFDoc"); InputStream stream = getClass().getClassLoader().getResourceAsStream("pdf_encrypted_test.pdf"); AutoDetectParser parser = new AutoDetectParser(config); //PDFPreprocessorParser parser = new PDFPreprocessorParser(); BodyContentHandler body = new BodyContentHandler(); Metadata metadata = new Metadata(); try { parser.parse(stream, body, metadata); } catch (Exception ex) { //donowt } assertFalse(body.toString().contains("PDF Encrypted")); }
Example #19
Source File: PDFPreprocessorParserTest.java From CogStack-Pipeline with Apache License 2.0 | 5 votes |
@Ignore @Test public void testMassiveOCRDoc() throws Exception { System.out.println("testMassiveOCRDoc"); InputStream stream = getClass().getClassLoader().getResourceAsStream("long_OCR_doc.pdf"); AutoDetectParser parser = new AutoDetectParser(config); //PDFPreprocessorParser parser = new PDFPreprocessorParser(); BodyContentHandler body = new BodyContentHandler(); Metadata metadata = new Metadata(); parser.parse(stream, body, metadata); assertTrue(body.toString().contains("Saliva-derived genomic DNA samples were genotyped using")); }
Example #20
Source File: PDFPreprocessorParserTest.java From CogStack-Pipeline with Apache License 2.0 | 5 votes |
@Test public void testParseRequiringOCR() throws Exception { System.out.println("parse"); InputStream stream = getClass().getClassLoader().getResourceAsStream("tika/testdocs/pdf_ocr_test.pdf"); AutoDetectParser parser = new AutoDetectParser(config); //PDFPreprocessorParser parser = new PDFPreprocessorParser(); BodyContentHandler body = new BodyContentHandler(); Metadata metadata = new Metadata(); parser.parse(stream, body, metadata); String parsedString = body.toString(); // From first page assertTrue(parsedString.contains("Father or mother")); // From second (last) page assertTrue(parsedString.contains("how you have determined who is the Nearest")); }
Example #21
Source File: TikaPoweredContentTransformer.java From alfresco-repository with GNU Lesser General Public License v3.0 | 5 votes |
/** * Returns an appropriate Tika ContentHandler for the * requested content type. Normally you'll let this * work as default, but if you need fine-grained * control of how the Tika events become text then * override and supply your own. */ protected ContentHandler getContentHandler(String targetMimeType, Writer output) throws TransformerConfigurationException { if(MimetypeMap.MIMETYPE_TEXT_PLAIN.equals(targetMimeType)) { return new BodyContentHandler(output); } SAXTransformerFactory factory = (SAXTransformerFactory) SAXTransformerFactory.newInstance(); TransformerHandler handler = factory.newTransformerHandler(); handler.getTransformer().setOutputProperty(OutputKeys.INDENT, "yes"); handler.setResult(new StreamResult(output)); if(MimetypeMap.MIMETYPE_HTML.equals(targetMimeType)) { handler.getTransformer().setOutputProperty(OutputKeys.METHOD, "html"); return new ExpandedTitleContentHandler(handler); } else if(MimetypeMap.MIMETYPE_XHTML.equals(targetMimeType) || MimetypeMap.MIMETYPE_XML.equals(targetMimeType)) { handler.getTransformer().setOutputProperty(OutputKeys.METHOD, "xml"); } else { throw new TransformerInfoException( WRONG_FORMAT_MESSAGE_ID, new IllegalArgumentException("Requested target type " + targetMimeType + " not supported") ); } return handler; }
Example #22
Source File: TikaAutoInterpreter.java From db with GNU Affero General Public License v3.0 | 5 votes |
@Override public JSONObject toJson(String filePath) throws OperationException { AutoDetectParser parser = new AutoDetectParser(); BodyContentHandler handler = new BodyContentHandler(); Metadata metadata = new Metadata(); try (InputStream stream = new FileInputStream(new File(filePath))) { parser.parse(stream, handler, metadata); } catch (IOException | SAXException | TikaException e) { throw new OperationException(ErrorCode.UNRECOGNISED_DOCUMENT_FORMAT, "Could not auto-detect document for reading"); } final String fileText = handler.toString(); if(fileText == null || fileText.isEmpty()) { throw new OperationException(ErrorCode.UNRECOGNISED_DOCUMENT_FORMAT, "Attempting to import an empty document"); } JSONObject jsonObject = new JSONObject(); jsonObject.put("_txt", fileText); String[] metadataNames = metadata.names(); for(String name : metadataNames) { jsonObject.put(name, metadata.get(name)); } return jsonObject; }
Example #23
Source File: TikaAutoInterpreter.java From db with GNU Affero General Public License v3.0 | 5 votes |
@Override public String toText(String filePath) throws OperationException { AutoDetectParser parser = new AutoDetectParser(); BodyContentHandler handler = new BodyContentHandler(); Metadata metadata = new Metadata(); try (InputStream stream = new FileInputStream(new File(filePath))) { parser.parse(stream, handler, metadata); return handler.toString(); } catch (IOException | SAXException | TikaException e) { throw new OperationException(ErrorCode.UNRECOGNISED_DOCUMENT_FORMAT, "Could not auto-detect document for reading"); } }
Example #24
Source File: ParsingReader.java From extract with MIT License | 4 votes |
public ParsingReader(final Parser parser, final InputStream input, final Metadata metadata, final ParseContext context) throws IOException { this(parser, input, metadata, context, BodyContentHandler::new); }
Example #25
Source File: TikaEntityProcessor.java From lucene-solr with Apache License 2.0 | 4 votes |
private static ContentHandler getTextContentHandler(Writer writer) { return new BodyContentHandler(writer); }
Example #26
Source File: DocUtils.java From geoportal-server-harvester with Apache License 2.0 | 4 votes |
public static byte[] generateMetadataXML(byte[] file_bytes, String file_name) throws IOException { // Input & Output Variables ByteArrayInputStream base_input = new ByteArrayInputStream(file_bytes); byte[] xml_bytes = null; // Tika Parser Objects Parser parser = new AutoDetectParser(); BodyContentHandler handler = new BodyContentHandler(); Metadata metadata = new Metadata(); ParseContext context = new ParseContext(); try { // Populate Metadata Object with Tika Parser parser.parse(base_input, handler, metadata, context); // Container & Writer for Metadata Properties meta_props = new Properties(); StringWriter sw = new StringWriter(); // Put Tika Metadata in Properties for(String name : metadata.names()) { if (!metadata.get(name).isEmpty()) { meta_props.put(name, metadata.get(name)); } } meta_props.store(sw, "Tika Values"); // Expected Harvester Properties String meta_descr = metadata.get(TikaCoreProperties.DESCRIPTION); String meta_modif = metadata.get(TikaCoreProperties.MODIFIED); String meta_title = metadata.get(TikaCoreProperties.TITLE); // Default Label for Undefined Tika Properties DateFormat date_format = new SimpleDateFormat("yyyy/MM/dd"); Date date = new Date(); String date_today = date_format.format(date); String tika_label = String.format("TIKA_%s", date_today); // Check For Null Values & Set Defaults if (meta_descr == null) { meta_props.put(WKAConstants.WKA_DESCRIPTION, "" + sw.toString()); } else { meta_props.put(WKAConstants.WKA_DESCRIPTION, meta_descr); } if (meta_modif == null) { meta_props.put(WKAConstants.WKA_MODIFIED, tika_label); } else { meta_props.put(WKAConstants.WKA_MODIFIED, meta_modif); } if (meta_title == null) { meta_props.put(WKAConstants.WKA_TITLE, file_name); } else { meta_props.put(WKAConstants.WKA_TITLE, meta_title); } // Build XML as Bytes MapAttribute attr = AttributeUtils.fromProperties(meta_props); Document document = new SimpleDcMetaBuilder().create(attr); xml_bytes = XmlUtils.toString(document).getBytes("UTF-8"); } catch (Exception ex) { LOG.error(String.format("Error reading data."), ex); } finally { base_input.close(); } return xml_bytes; }
Example #27
Source File: PDFPreprocessorParser.java From CogStack-Pipeline with Apache License 2.0 | 4 votes |
@Override public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { ImageMagickConfig config = context.get(ImageMagickConfig.class, DEFAULT_IMAGEMAGICK_CONFIG); // If ImageMagick is not on the path with the current config, do not try to run OCR // getSupportedTypes shouldn't have listed us as handling it, so this should only // occur if someone directly calls this parser, not via DefaultParser or similar // TemporaryResources tmp = new TemporaryResources(); //TikaInputStream pdfStream = TikaInputStream.get(stream); PDFParser pdfParser = new PDFParser(); //create temp handlers to investigate object BodyContentHandler body = new BodyContentHandler(); Metadata pdfMetadata = new Metadata(); //needed to reset stream if (stream.markSupported()) { stream.mark(Integer.MAX_VALUE); } //first do initial parse to see if there's subsantial content in pdf metadata already pdfParser.parse(stream, body, pdfMetadata, context); stream.reset(); //if there's content - reparse with official handlers/metadata. What else can you do? Also check imagemagick is available if (body.toString().length() > 100 || !hasImageMagick(config)) { pdfParser.parse(stream, handler, metadata, context); metadata.set("X-PDFPREPROC-OCR-APPLIED", "NA"); return; } metadata.set("X-PDFPREPROC-ORIGINAL", body.toString()); metadata.set("X-PDFPREPROC-OCR-APPLIED", "FAIL"); // "FAIL" will be overwritten if it succeeds later //add the PDF metadata to the official metadata object Arrays.asList(pdfMetadata.names()).stream().forEach(name -> { metadata.add(name, pdfMetadata.get(name)); }); //objects to hold file references for manipulation outside of Java File tiffFileOfPDF = null; File pdfFileFromStream = File.createTempFile("tempPDF", ".pdf"); try { FileUtils.copyInputStreamToFile(stream, pdfFileFromStream); tiffFileOfPDF = File.createTempFile("tempTIFF", ".tiff"); makeTiffFromPDF(pdfFileFromStream,tiffFileOfPDF, config); if (tiffFileOfPDF.exists()) { long tessStartTime = System.currentTimeMillis(); TesseractOCRParser tesseract = new TesseractOCRParser(); tesseract.parse(FileUtils.openInputStream(tiffFileOfPDF), handler, metadata, context); metadata.set("X-PDFPREPROC-OCR-APPLIED", "SUCCESS"); LOG.debug("Document parsing -- OCR processing time: {} ms", System.currentTimeMillis() - tessStartTime); } } finally { if (tiffFileOfPDF.exists()) { tiffFileOfPDF.delete(); } if (pdfFileFromStream.exists()) { pdfFileFromStream.delete(); } } }
Example #28
Source File: NodeTika.java From node-tika with MIT License | 4 votes |
public static String extractText(String uri, Map<String, Object> options) throws Exception { final AutoDetectParser parser = createParser(); final Metadata metadata = new Metadata(); final ParseContext context = new ParseContext(); String outputEncoding = null; String contentType = null; int maxLength = -1; if (options != null) { Object option; option = options.get("outputEncoding"); if (option != null) { outputEncoding = option.toString(); } option = options.get("contentType"); if (option != null) { contentType = option.toString(); } option = options.get("maxLength"); if (option != null) { maxLength = (int)Float.parseFloat(option.toString()); } } if (outputEncoding == null) { outputEncoding = "UTF-8"; } fillMetadata(parser, metadata, contentType, uri); fillParseContext(context, options); final ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); final OutputStreamWriter writer = new OutputStreamWriter(outputStream, outputEncoding); final WriteOutContentHandler contentHandler = new WriteOutContentHandler(writer, maxLength); final TikaInputStream inputStream = createInputStream(uri, metadata); // Set up recursive parsing of archives. // See: http://wiki.apache.org/tika/RecursiveMetadata context.set(Parser.class, parser); context.set(EmbeddedDocumentExtractor.class, new ParsingEmbeddedDocumentExtractor(context)); try { parser.parse(inputStream, new BodyContentHandler(contentHandler), metadata, context); } catch (Throwable e) { if (!contentHandler.isWriteLimitReached(e)) { throw e; } else { writer.close(); } } finally { inputStream.close(); } return outputStream.toString(outputEncoding); }
Example #29
Source File: MP3Reader.java From red5-io with Apache License 2.0 | 4 votes |
/** * Creates reader from file input stream * * @param file * file input * @throws IOException * on IO error */ public MP3Reader(File file) throws IOException { this.file = file; fis = new FileInputStream(file); try { // parse the ID3 info BodyContentHandler handler = new BodyContentHandler(); Metadata metadata = new Metadata(); // MP3 parser Mp3Parser parser = new Mp3Parser(); parser.parse(fis, handler, metadata, null); log.debug("Contents of the document: {}", handler.toString()); // create meta data holder metaData = new MetaData(); String val = null; String[] metadataNames = metadata.names(); for (String name : metadataNames) { val = metadata.get(name); log.debug("Meta name: {} value: {}", name, val); if ("xmpDM:artist".equals(name)) { metaData.setArtist(val); } else if ("xmpDM:album".equals(name)) { metaData.setAlbum(val); } else if ("title".equals(name)) { metaData.setSongName(val); } else if ("xmpDM:genre".equals(name)) { metaData.setGenre(val); } else if ("xmpDM:logComment".equals(name)) { metaData.setComment(val); } else if ("xmpDM:trackNumber".equals(name)) { metaData.setTrack(val); } else if ("xmpDM:releaseDate".equals(name)) { metaData.setYear(val); } else if ("xmpDM:duration".equals(name) || "duration".equals(name)) { metaData.setDuration(val); } else if ("xmpDM:audioSampleRate".equals(name) || "samplerate".equals(name)) { metaData.setSampleRate(val); } else if ("channels".equals(name)) { metaData.setChannels(val); } } /* * //send album image if included List<Artwork> tagFieldList = idTag.getArtworkList(); if (tagFieldList == null || tagFieldList.isEmpty()) { log.debug("No cover art was found"); } * else { Artwork imageField = tagFieldList.get(0); log.debug("Picture type: {}", imageField.getPictureType()); FrameBodyAPIC imageFrameBody = new FrameBodyAPIC(); * imageFrameBody.setImageData(imageField.getBinaryData()); if (!imageFrameBody.isImageUrl()) { byte[] imageBuffer = (byte[]) * imageFrameBody.getObjectValue(DataTypes.OBJ_PICTURE_DATA); //set the cover image on the metadata metaData.setCovr(imageBuffer); // Create tag for onImageData event IoBuffer buf * = IoBuffer.allocate(imageBuffer.length); buf.setAutoExpand(true); Output out = new Output(buf); out.writeString("onImageData"); Map<Object, Object> props = new HashMap<Object, * Object>(); props.put("trackid", 1); props.put("data", imageBuffer); out.writeMap(props); buf.flip(); //Ugh i hate flash sometimes!! //Error #2095: flash.net.NetStream was unable * to invoke callback onImageData. ITag result = new Tag(IoConstants.TYPE_METADATA, 0, buf.limit(), null, 0); result.setBody(buf); //add to first frames firstTags.add(result); } } * } else { log.info("File did not contain ID3v2 data: {}", file.getName()); } */ } catch (Exception e) { log.error("MP3Reader {}", e); } // ensure we have a valid sample rate checkValidHeader(); // get the total bytes / file size fileSize = file.length(); log.debug("File size: {}", fileSize); // analyze keyframes data analyzeKeyFrames(); // create file metadata object firstTags.addFirst(createFileMeta()); log.trace("File input stream - open: {} position: {}", fis.getChannel().isOpen(), fis.getChannel().position()); // create a channel for reading fileChannel = fis.getChannel(); }