org.apache.poi.openxml4j.opc.PackagePart Java Examples

The following examples show how to use org.apache.poi.openxml4j.opc.PackagePart. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: SimpleReadCacheSelector.java    From easyexcel with Apache License 2.0 6 votes vote down vote up
@Override
public ReadCache readCache(PackagePart sharedStringsTablePackagePart) {
    long size = sharedStringsTablePackagePart.getSize();
    if (size < 0) {
        try {
            size = sharedStringsTablePackagePart.getInputStream().available();
        } catch (IOException e) {
            LOGGER.warn("Unable to get file size, default used MapCache");
            return new MapCache();
        }
    }
    if (size < maxUseMapCacheSize * B2M) {
        if (LOGGER.isDebugEnabled()) {
            LOGGER.debug("Use map cache.size:{}", size);
        }
        return new MapCache();
    }
    if (LOGGER.isDebugEnabled()) {
        LOGGER.debug("Use ehcache.size:{}", size);
    }
    return new Ehcache(maxCacheActivateSize);
}
 
Example #2
Source File: RawCopier.java    From M2Doc with Eclipse Public License 1.0 6 votes vote down vote up
/**
 * Creates relations in output {@link XWPFDocument}.
 * 
 * @param inputRelationIdToOutputMap
 *            the relation ID mapping
 * @param inputPartURIToOutputPartURI
 *            the mapping form input part {@link PackagePartName} to output par {@link PackagePartName}
 * @param inputRelationID
 *            the input realtion ID
 * @param inputBody
 *            the input {@link IBody}
 * @param outputBody
 *            the output {@link IBody}
 * @return the new relation ID
 * @throws InvalidFormatException
 *             if image copy fails
 * @throws IOException
 *             if a {@link PackagePart} can't be read
 * @throws NoSuchAlgorithmException
 *             if MD5 can't be read
 */
private String createRelation(Map<String, String> inputRelationIdToOutputMap,
        Map<URI, URI> inputPartURIToOutputPartURI, String inputRelationID, IBody inputBody, IBody outputBody)
        throws InvalidFormatException, NoSuchAlgorithmException, IOException {
    final PackageRelationship inputRelationship = inputBody.getPart().getPackagePart()
            .getRelationship(inputRelationID);
    final PackagePart outputPart;
    if (inputRelationship.getTargetMode() == TargetMode.INTERNAL) {
        final PackagePart source = inputBody.getXWPFDocument().getPackagePart().getPackage()
                .getPart(PackagingURIHelper.createPartName(inputRelationship.getTargetURI()));
        outputPart = getOrCopyPart(inputPartURIToOutputPartURI, source, outputBody.getXWPFDocument());
    } else {
        outputPart = null;
        inputPartURIToOutputPartURI.put(inputRelationship.getTargetURI(), inputRelationship.getTargetURI());
    }

    final PackageRelationship outputRelationship = getOrCreateRelationship(inputPartURIToOutputPartURI, outputBody,
            outputPart, inputRelationship);
    inputRelationIdToOutputMap.put(inputRelationship.getId(), outputRelationship.getId());

    return outputRelationship.getId();
}
 
Example #3
Source File: OOXMLTagHelper.java    From DocBleach with MIT License 6 votes vote down vote up
/**
 * Read the part content.
 *
 * @param part The {@link org.apache.poi.openxml4j.opc.PackagePart PackagePart} the content must
 * be read from
 * @return A string containing the content of the part.
 */
private static String readPartContent(PackagePart part) {
  try (InputStream is = part.getInputStream()) {
    // Read the file content first
    BufferedReader reader = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8));

    StringBuilder buf = new StringBuilder();
    String line = reader.readLine();
    while (line != null) {
      buf.append(line);
      line = reader.readLine();
    }

    return buf.toString();
  } catch (IOException ex) {
    LOGGER.error(
        "Error while bleaching {}. The file may be corrupted :/",
        part.getPartName().getName(),
        ex);
    return null;
  }
}
 
Example #4
Source File: RawCopier.java    From M2Doc with Eclipse Public License 1.0 6 votes vote down vote up
/**
 * Gets the output {@link PackagePart} name for the given source {@link PackagePart} in the given output {@link XWPFDocument}.
 * 
 * @param source
 *            the source {@link PackagePart}
 * @param outputDoc
 *            the output {@link XWPFDocument}
 * @return the output {@link PackagePart} name for the given source {@link PackagePart} in the given output {@link XWPFDocument}
 * @throws InvalidFormatException
 *             if a {@link PackagePart} can't be accessed
 */
private PackagePartName getOutputPartName(PackagePart source, XWPFDocument outputDoc)
        throws InvalidFormatException {
    PackagePartName possiblePartName = source.getPartName();
    PackagePart existingPart = outputDoc.getPackage().getPart(possiblePartName);
    int index = 1;
    final Matcher matcher = INTEGER_PATTERN.matcher(possiblePartName.getName());
    final boolean indexFound = matcher.find();
    while (existingPart != null) {
        existingPart = outputDoc.getPackage().getPart(possiblePartName);
        if (existingPart != null) {
            if (indexFound) {
                possiblePartName = PackagingURIHelper.createPartName(matcher.replaceFirst(String.valueOf(index++)));
            } else {
                possiblePartName = PackagingURIHelper
                        .createPartName(source.getPartName().getName().replace(".", index++ + "."));
            }
        }
    }

    return possiblePartName;
}
 
Example #5
Source File: OOXMLBleach.java    From DocBleach with MIT License 6 votes vote down vote up
void sanitize(BleachSession session, OPCPackage pkg, PackagePart part) {
  LOGGER.trace("Part name: {}", part.getPartName());

  String contentType = part.getContentType();
  LOGGER.debug("Content type: {} for part {}", contentType, part.getPartName());

  // Sample content types:
  // vnd.ms-word.vbaData+xml, vnd.ms-office.vbaProject
  // cf https://msdn.microsoft.com/fr-fr/library/aa338205(v=office.12).aspx
  ContentType type = part.getContentTypeDetails();
  if (isForbiddenType(type) || isStrangeContentType(type)) {
    LOGGER.debug(SUSPICIOUS_OOXML_FORMAT, contentType, part.getPartName(), part.getSize());
    deletePart(pkg, part.getPartName());

    Threat threat = Threat.builder()
        .type(ThreatType.ACTIVE_CONTENT)
        .severity(ThreatSeverity.HIGH)
        .action(ThreatAction.REMOVE)
        .location(part.getPartName().getName())
        .details("Forbidden content type: " + type)
        .build();

    session.recordThreat(threat);
  }
}
 
Example #6
Source File: OOXMLBleach.java    From DocBleach with MIT License 6 votes vote down vote up
void remapContentType(BleachSession session, PackagePart part) throws InvalidFormatException {
  String oldContentType = part.getContentType();
  if (!REMAPPED_CONTENT_TYPES.containsKey(oldContentType)) {
    return;
  }

  String newContentType = REMAPPED_CONTENT_TYPES.get(part.getContentType());
  part.setContentType(newContentType);

  LOGGER.debug(
      "Content type of '{}' changed from '{}' to '{}'",
      part.getPartName(),
      oldContentType,
      newContentType);

  Threat threat = Threat.builder()
      .type(ThreatType.UNRECOGNIZED_CONTENT)
      .severity(ThreatSeverity.LOW)
      .action(ThreatAction.DISARM)
      .location(part.getPartName().getName())
      .details("Remapped content type: " + oldContentType)
      .build();

  session.recordThreat(threat);
}
 
Example #7
Source File: RawCopier.java    From M2Doc with Eclipse Public License 1.0 5 votes vote down vote up
/**
 * Creates a copy of the source {@link PackagePart} in the given ouput {@link XWPFDocument}.
 * 
 * @param source
 *            the source {@link PackagePart}
 * @param outputDoc
 *            the oupput {@link XWPFDocument}
 * @return the copied {@link PackagePart}
 * @throws InvalidFormatException
 *             if the {@link PackagePart} can't be accessed
 * @throws NoSuchAlgorithmException
 *             if MD5 can't be found
 * @throws IOException
 *             if a {@link PackagePart} can't be read
 */
private PackagePart copyPart(PackagePart source, XWPFDocument outputDoc)
        throws InvalidFormatException, NoSuchAlgorithmException, IOException {
    final PackagePart res;

    final PackagePartName outputPartName = getOutputPartName(source, outputDoc);
    final PackagePart copiedPart = outputDoc.getPackage().createPart(outputPartName, source.getContentType());

    final MessageDigest md5 = MessageDigest.getInstance("MD5");
    try (InputStream is = source.getInputStream();
            DigestInputStream dis = new DigestInputStream(is, md5);
            OutputStream os = copiedPart.getOutputStream()) {
        IOUtil.copyCompletely(dis, os);
    }
    final byte[] digest = md5.digest();
    final StringBuilder builder = new StringBuilder();
    for (int i = 0; i < digest.length; i++) {
        builder.append(Integer.toString((digest[i] & MASK_0XFF) + MASK_0X100, MASK_16).substring(1));
    }

    final URI existingPartURI = getPartMD5ToName(outputDoc).get(builder.toString());
    if (existingPartURI != null) {
        res = outputDoc.getPackage().getPart(PackagingURIHelper.createPartName(existingPartURI));
        outputDoc.getPackage().removePart(copiedPart);
    } else {
        res = copiedPart;
        getPartMD5ToName(outputDoc).put(builder.toString(), res.getPartName().getURI());
    }

    return res;
}
 
Example #8
Source File: RawCopier.java    From M2Doc with Eclipse Public License 1.0 5 votes vote down vote up
/**
 * Gets or creates the output {@link PackageRelationship} for the given input {@link PackageRelationship}.
 * 
 * @param inputPartURIToOutputPartURI
 *            the mapping form input part {@link PackagePartName} to output par {@link PackagePartName}
 * @param outputBody
 *            the ouput {@link IBody}
 * @param outputPart
 *            the output {@link PackagePart}
 * @param inputRelationship
 *            the input {@link PackageRelationship}
 * @return the ouptut {@link PackageRelationship}
 * @throws InvalidFormatException
 *             if image copy fails
 */
private PackageRelationship getOrCreateRelationship(Map<URI, URI> inputPartURIToOutputPartURI, IBody outputBody,
        PackagePart outputPart, PackageRelationship inputRelationship) throws InvalidFormatException {
    final PackageRelationship res;

    PackageRelationship existingRelationship = null;
    for (PackageRelationship relationship : outputBody.getPart().getPackagePart()
            .getRelationshipsByType(inputRelationship.getRelationshipType())) {
        if (relationship.getTargetMode() == inputRelationship.getTargetMode() && inputPartURIToOutputPartURI
                .get(inputRelationship.getTargetURI()).equals(relationship.getTargetURI())) {
            existingRelationship = relationship;
            break;
        }
    }

    if (existingRelationship != null) {
        res = existingRelationship;
    } else {
        if (outputPart != null) {
            res = outputBody.getPart().getPackagePart().addRelationship(outputPart.getPartName(),
                    inputRelationship.getTargetMode(), inputRelationship.getRelationshipType());
        } else {
            res = outputBody.getPart().getPackagePart().addExternalRelationship(
                    inputRelationship.getTargetURI().toString(), inputRelationship.getRelationshipType());
        }
    }

    return res;
}
 
Example #9
Source File: ImageParserSupport.java    From tephra with MIT License 5 votes vote down vote up
void parse(XSLFSlide xslfSlide, XSLFPictureData xslfPictureData, JSONObject object) {
    if (!object.containsKey("alpha")) {
        parseImage(xslfSlide, xslfPictureData, object);

        return;
    }

    double alpha = object.getDoubleValue("alpha");
    if (alpha >= 1.0D) {
        parseImage(xslfSlide, xslfPictureData, object);

        return;
    }

    PackagePart packagePart = xslfPictureData.getPackagePart();
    POIXMLDocumentPart.RelationPart relationPart = xslfSlide.addRelation(null, XSLFRelation.IMAGES,
            new XSLFPictureData(packagePart));

    XSLFAutoShape xslfAutoShape = xslfSlide.createAutoShape();
    CTShape ctShape = (CTShape) xslfAutoShape.getXmlObject();
    CTBlipFillProperties ctBlipFillProperties = ctShape.getSpPr().addNewBlipFill();
    CTBlip ctBlip = ctBlipFillProperties.addNewBlip();
    ctBlip.setEmbed(relationPart.getRelationship().getId());
    ctBlip.setCstate(STBlipCompression.PRINT);
    ctBlip.addNewAlphaModFix().setAmt(numeric.toInt(alpha * 100000));
    ctBlipFillProperties.addNewSrcRect();
    ctBlipFillProperties.addNewStretch().addNewFillRect();
    xslfAutoShape.setAnchor(parserHelper.getRectangle(object));
    parserHelper.rotate(xslfAutoShape, object);
}
 
Example #10
Source File: ReadOnlySharedStringsTable.java    From myexcel with Apache License 2.0 5 votes vote down vote up
/**
 * @param pkg          The {@link OPCPackage} to use as basis for the shared-strings table.
 * @param stringsCache stringsCache
 * @throws IOException  If reading the data from the package fails.
 * @throws SAXException if parsing the XML data fails.
 * @since POI 3.14-Beta3
 */
public ReadOnlySharedStringsTable(OPCPackage pkg, StringsCache stringsCache)
        throws IOException, SAXException {
    this.stringsCache = stringsCache;
    ArrayList<PackagePart> parts =
            pkg.getPartsByContentType(XSSFRelation.SHARED_STRINGS.getContentType());

    // Some workbooks have no shared strings table.
    if (parts.size() > 0) {
        PackagePart sstPart = parts.get(0);
        readFrom(sstPart.getInputStream());
    }
}
 
Example #11
Source File: OOXMLBleach.java    From DocBleach with MIT License 5 votes vote down vote up
private Iterator<PackagePart> getPartsIterator(OPCPackage pkg) throws BleachException {
  try {
    return pkg.getParts().iterator();
  } catch (InvalidFormatException e) {
    throw new BleachException(e);
  }
}
 
Example #12
Source File: OOXMLBleach.java    From DocBleach with MIT License 5 votes vote down vote up
public void sanitize(OPCPackage pkg, BleachSession session)
    throws BleachException, InvalidFormatException {
  LOGGER.trace("File opened");
  Iterator<PackagePart> it = getPartsIterator(pkg);

  pkg.ensureRelationships();

  sanitize(session, pkg, pkg.getRelationships());

  PackagePart part;
  while (it.hasNext()) {
    part = it.next();
    sanitize(session, pkg, part);

    OOXMLTagHelper.removeExternalDataTagAndDDE(session, part);

    if (!part.isRelationshipPart()) {
      sanitize(session, part, part.getRelationships());
    }

    if (part.isDeleted()) {
      continue;
    }

    remapContentType(session, part);
  }

  // If threats have been removed, then add the dummy file so the relationship
  // still refers to an existing dummy object.
  if (session.threatCount() > 0) {
    pushDummyFile(pkg);
  }
}
 
Example #13
Source File: WordEmbedsTest.java    From sun-wordtable-read with Apache License 2.0 5 votes vote down vote up
private static void listEmbeds2(XWPFDocument doc) throws Exception {
	for (final PackagePart pPart : doc.getAllEmbedds()) {
		final String contentType = pPart.getContentType();
		System.out.println(contentType + "\n");
		if (contentType.equals("application/vnd.ms-excel")) {
			final HSSFWorkbook embeddedWorkbook = new HSSFWorkbook(pPart.getInputStream());

			for (int sheet = 0; sheet < embeddedWorkbook.getNumberOfSheets(); sheet++) {
				final HSSFSheet activeSheet = embeddedWorkbook.getSheetAt(sheet);
				if (activeSheet.getSheetName().equalsIgnoreCase("Sheet1")) {
					for (int rowIndex = activeSheet.getFirstRowNum(); rowIndex <= activeSheet
							.getLastRowNum(); rowIndex++) {
						final HSSFRow row = activeSheet.getRow(rowIndex);
						for (int cellIndex = row.getFirstCellNum(); cellIndex <= row
								.getLastCellNum(); cellIndex++) {
							final HSSFCell cell = row.getCell(cellIndex);
							if (cell != null) {
								if (cell.getCellType() == Cell.CELL_TYPE_STRING)
									System.out.println("Row:" + rowIndex + " Cell:" + cellIndex + " = "
											+ cell.getStringCellValue());
								if (cell.getCellType() == Cell.CELL_TYPE_NUMERIC) {
									System.out.println("Row:" + rowIndex + " Cell:" + cellIndex + " = "
											+ cell.getNumericCellValue());

									cell.setCellValue(cell.getNumericCellValue() * 2); // update
																						// the
																						// value
								}
							}
						}
					}
				}
			}
		}
	}
}
 
Example #14
Source File: WordEmbedsTest.java    From sun-wordtable-read with Apache License 2.0 5 votes vote down vote up
private static void listEmbeds(XWPFDocument doc) throws OpenXML4JException {
	List<PackagePart> embeddedDocs = doc.getAllEmbedds();
	if (embeddedDocs != null && !embeddedDocs.isEmpty()) {
		Iterator<PackagePart> pIter = embeddedDocs.iterator();
		while (pIter.hasNext()) {
			PackagePart pPart = pIter.next();
			System.out.print(pPart.getPartName() + ", ");

			System.out.print(pPart.getContentType() + ", ");
			System.out.println();
		}
	}
}
 
Example #15
Source File: WordTableCellContentOleObject.java    From sun-wordtable-read with Apache License 2.0 5 votes vote down vote up
/**
 * 读取Ole对象
 * 
 * @param embedId
 * @param xdoc
 * @return
 */
private WcOleObject readOleObject(String embedId, final XWPFDocument xdoc) {
	if (StringUtils.isBlank(embedId)) {
		return null;
	}
	WcOleObject oleObject = null;
	List<POIXMLDocumentPart> parts = xdoc.getRelations();
	for (POIXMLDocumentPart poixmlDocumentPart : parts) {
		String id = poixmlDocumentPart.getPackageRelationship().getId();
		if (embedId.equals(id)) {
			PackagePart packagePart = poixmlDocumentPart.getPackagePart();

			oleObject = new WcOleObject();
			// oleObjectContent.setFileName(packagePart.getPartName().getName());

			// 解析Ole对象中的文件,参考:http://poi.apache.org/poifs/how-to.html
			try (InputStream is = packagePart.getInputStream();) {
				POIFSFileSystem poifs = new POIFSFileSystem(is);

				if (isOle10NativeObject(poifs.getRoot())) {
					oleObject = readOle10Native(poifs);
				} else {
					oleObject = readDocumentOle(poifs, is);
				}
			} catch (Exception e) {
				logger.error(e.getMessage(), e);
			}
		}
	}

	return oleObject;
}
 
Example #16
Source File: EncryptedCachedDiskStringsTable.java    From hadoopoffice with Apache License 2.0 5 votes vote down vote up
/***
 * Create a new encrypted cached string table
 * 
 * @param part             package part with Shared String Table
 * @param cacheSize        cache = -1 means all is in memory, cache = 0 means
 *                         nothing is in memory, positive means only that
 *                         fractions is kept in-memory
 * @param compressTempFile true, if temporary file storage for shared string
 *                         table should be gzip compressed, false if not
 * @param                  ca, cipher algorithm leave it null for disabling
 *                         encryption (not recommended if source document is
 *                         encrypted)
 * @param                  cm, chaining mode, only need to be specified if
 *                         cipher algorithm is specified
 * @throws IOException
 */

public EncryptedCachedDiskStringsTable(PackagePart part, int cacheSize, boolean compressTempFile,
		CipherAlgorithm ca, ChainingMode cm) throws IOException {
	this.cacheSize = cacheSize;
	this.count=0;
	if (this.cacheSize > 0) {

		this.cache = new LRUCache<>(((int) Math.ceil(this.cacheSize / 0.75)) + 1); // based on recommendations of
																					// the Javadoc of HashMap
		this.stringPositionInFileList = new ArrayList<>(this.cacheSize);
	} else {
		this.cache = new LRUCache<>();
		this.stringPositionInFileList = new ArrayList<>();
	}
	this.stringPositionInFileList = new ArrayList<>();
	this.compressTempFile = compressTempFile;
	this.tempFile = TempFile.createTempFile("hadooffice-poi-temp-sst", ".tmp");
	this.tempFileSize = 0L;
	// generate random key for temnporary files
	if (ca != null) {
		SecureRandom sr = new SecureRandom();
		byte[] iv = new byte[ca.blockSize];
		byte[] key = new byte[ca.defaultKeySize / 8];
		sr.nextBytes(iv);
		sr.nextBytes(key);
		SecretKeySpec skeySpec = new SecretKeySpec(key, ca.jceId);
		this.ca = ca;
		this.cm = cm;
		if (this.cm.jceId.equals(ChainingMode.ecb.jceId)) { // does not work with Crpyto Functions since it does not require IV
			this.cm=ChainingMode.cbc;
		}
		this.ciEncrypt = CryptoFunctions.getCipher(skeySpec, this.ca, this.cm, iv, Cipher.ENCRYPT_MODE, "PKCS5Padding");
		this.ciDecrypt = CryptoFunctions.getCipher(skeySpec, this.ca, this.cm, iv, Cipher.DECRYPT_MODE, "PKCS5Padding");
	}
	this.originalIS = part.getInputStream();
	this.readFrom(this.originalIS);
}
 
Example #17
Source File: SampleTemplateGenerator.java    From M2Doc with Eclipse Public License 1.0 4 votes vote down vote up
/**
 * Creates the sample template {@link XWPFDocument}. The returned {@link XWPFDocument} should be {@link XWPFDocument#close() closed} by the
 * caller.
 * 
 * @param variableName
 *            the variable name
 * @param eCls
 *            the variable {@link EClass}
 * @return the created sample template {@link XWPFDocument}
 * @throws IOException
 *             if the sample template can't be read
 * @throws InvalidFormatException
 *             if the sample template can't be read
 */
@SuppressWarnings("resource")
public XWPFDocument generate(String variableName, EClass eCls) throws InvalidFormatException, IOException {
    final InputStream is = SampleTemplateGenerator.class.getResourceAsStream("/resources/sampleTemplate.docx");
    final OPCPackage pkg = OPCPackage.open(is);

    String featureName = eCls.getEAllAttributes().get(0).getName();
    for (EAttribute attribute : eCls.getEAllAttributes()) {
        if (attribute.getEType() == EcorePackage.eINSTANCE.getEString()) {
            featureName = attribute.getName();
            break;
        }
    }

    final StringBuilder builder = new StringBuilder();
    final byte[] buffer = new byte[BUFFER_SIZE];
    final PackagePart part = pkg.getPart(PackagingURIHelper.createPartName("/word/document.xml"));
    try (InputStream partIS = part.getInputStream()) {
        int nbBytes = partIS.read(buffer);
        while (nbBytes != -1) {
            builder.append(new String(buffer, 0, nbBytes));
            nbBytes = partIS.read(buffer);
        }
    }
    String xml = builder.toString().replace(VARIABLE_NAME_TAG, variableName);
    xml = xml.replace(FEATURE_NAME_TAG, featureName);

    try (OutputStream partOS = part.getOutputStream()) {
        partOS.write(xml.getBytes("UTF-8"));
    }

    final XWPFDocument res = new XWPFDocument(pkg);

    final TemplateCustomProperties customProperties = new TemplateCustomProperties(res);
    customProperties.setM2DocVersion(M2DocUtils.VERSION);
    customProperties.getVariables().put(variableName, eCls.getEPackage().getName() + "::" + eCls.getName());
    final Set<String> packages = new LinkedHashSet<>();
    packages.add(eCls.getEPackage().getNsURI());
    for (EClass superCls : eCls.getEAllSuperTypes()) {
        packages.add(superCls.getEPackage().getNsURI());
    }
    customProperties.getPackagesURIs().addAll(packages);
    customProperties.save();

    return res;
}
 
Example #18
Source File: BufferedStringsTable.java    From excel-streaming-reader with Apache License 2.0 4 votes vote down vote up
public static BufferedStringsTable getSharedStringsTable(File tmp, int cacheSize, OPCPackage pkg)
    throws IOException {
  List<PackagePart> parts = pkg.getPartsByContentType(XSSFRelation.SHARED_STRINGS.getContentType());
  return parts.size() == 0 ? null : new BufferedStringsTable(parts.get(0), tmp, cacheSize);
}
 
Example #19
Source File: BufferedStringsTable.java    From excel-streaming-reader with Apache License 2.0 4 votes vote down vote up
private BufferedStringsTable(PackagePart part, File file, int cacheSize) throws IOException {
  this.list = new FileBackedList(file, cacheSize);
  readFrom(part.getInputStream());
}
 
Example #20
Source File: OOXMLTagHelper.java    From DocBleach with MIT License 4 votes vote down vote up
/**
 * The externalData tag is embedded in xml files to automatically load OLE object or macro, or any
 * kind of potential threat. Remove this tag prevent MS Office from crashing. Actually, if you
 * only remove the relation it crashes, that's why you have to remove the relation and the
 * reference of the relation (via the externalData tag) in the xml file. Also removes DDE.
 *
 * @param session The current bleach session where the threat can be reported
 * @param part The package part to sanitize
 */
protected static void removeExternalDataTagAndDDE(BleachSession session, PackagePart part) {
  // Only applicable if the file is an XML file (not a _refs or whatever)
  // And is a ZipPackagePart, not a config file or whatever.
  if (!XML_EXTENSION.equals(part.getPartName().getExtension())
      || !(part instanceof ZipPackagePart)) {
    return;
  }

  String content = readPartContent(part);
  // An error occured
  if (content == null) {
    return;
  }

  boolean external = content.indexOf(TAG_EXTERNAL_DATA) != -1;
  boolean ddeauto =
      content.indexOf(DDEAUTO) != -1 || content.indexOf(ATTRIBUTE_DDESERVICE_DATA) != -1;

  // The evil tag has not been found, return
  if (!external && !ddeauto) {
    return;
  }

  LOGGER.debug((external ? "externalData tag" : "DDE ") + " has been spotted {}", part);

  // Replace the tag by a comment
  content = content.replaceAll(REGEXP_EXTERNAL_DATA, XML_COMMENT_BLEACHED);

  // Replace DDEAUTO with nothing, DDE will not trigger
  content = content.replaceAll(DDEAUTO, "");

  // Replace ddeService & ddeTopic with cmd.exe exit
  content =
      content.replaceAll(
          REGEXP_DDESERVICE_DATA, ATTRIBUTE_DDESERVICE_DATA + "=\"" + DDE_DATA_BLEACHED1 + "\"");
  content =
      content.replaceAll(
          REGEXP_DDETOPIC_DATA, ATTRIBUTE_DDETOPIC_DATA + "=\"" + DDE_DATA_BLEACHED2 + "\"");

  // Write the result
  try (OutputStream os = part.getOutputStream()) {
    os.write(content.getBytes());
    os.close();
  } catch (IOException ex) {
    LOGGER.error("Error while writing the part content. The file may be corrupted.", ex);
    return;
  }

  session.recordThreat(Threat.builder()
      .type(external ? ThreatType.EXTERNAL_CONTENT : ThreatType.ACTIVE_CONTENT)
      .severity(ThreatSeverity.HIGH)
      .action(ThreatAction.REMOVE)
      .location(part.getPartName().getName())
      .details(
          "Removed tag \" "
              + (external ? "externalData" : "DDEAUTO")
              + "\" from the document.")
      .build());
}
 
Example #21
Source File: XlsxSaxAnalyser.java    From easyexcel with Apache License 2.0 4 votes vote down vote up
public XlsxSaxAnalyser(XlsxReadContext xlsxReadContext, InputStream decryptedStream) throws Exception {
    this.xlsxReadContext = xlsxReadContext;
    // Initialize cache
    XlsxReadWorkbookHolder xlsxReadWorkbookHolder = xlsxReadContext.xlsxReadWorkbookHolder();

    OPCPackage pkg = readOpcPackage(xlsxReadWorkbookHolder, decryptedStream);
    xlsxReadWorkbookHolder.setOpcPackage(pkg);

    ArrayList<PackagePart> packageParts = pkg.getPartsByContentType(XSSFRelation.SHARED_STRINGS.getContentType());

    if (!CollectionUtils.isEmpty(packageParts)) {
        PackagePart sharedStringsTablePackagePart = packageParts.get(0);

        // Specify default cache
        defaultReadCache(xlsxReadWorkbookHolder, sharedStringsTablePackagePart);

        // Analysis sharedStringsTable.xml
        analysisSharedStringsTable(sharedStringsTablePackagePart.getInputStream(), xlsxReadWorkbookHolder);
    }

    XSSFReader xssfReader = new XSSFReader(pkg);
    analysisUse1904WindowDate(xssfReader, xlsxReadWorkbookHolder);

    xlsxReadWorkbookHolder.setStylesTable(xssfReader.getStylesTable());
    sheetList = new ArrayList<ReadSheet>();
    sheetMap = new HashMap<Integer, InputStream>();
    commentsTableMap = new HashMap<Integer, CommentsTable>();
    XSSFReader.SheetIterator ite = (XSSFReader.SheetIterator)xssfReader.getSheetsData();
    int index = 0;
    if (!ite.hasNext()) {
        throw new ExcelAnalysisException("Can not find any sheet!");
    }
    while (ite.hasNext()) {
        InputStream inputStream = ite.next();
        sheetList.add(new ReadSheet(index, ite.getSheetName()));
        sheetMap.put(index, inputStream);
        if (xlsxReadContext.readWorkbookHolder().getExtraReadSet().contains(CellExtraTypeEnum.COMMENT)) {
            CommentsTable commentsTable = ite.getSheetComments();
            if (null != commentsTable) {
                commentsTableMap.put(index, commentsTable);
            }
        }
        index++;
    }
}
 
Example #22
Source File: OOXMLThumbnailContentTransformer.java    From alfresco-repository with GNU Lesser General Public License v3.0 4 votes vote down vote up
@Override
protected void transformInternal(ContentReader reader,
                                 ContentWriter writer,
                                 TransformationOptions options) throws Exception
{
    final String sourceMimetype = reader.getMimetype();
    final String sourceExtension = getMimetypeService().getExtension(sourceMimetype);
    final String targetMimetype = writer.getMimetype();
    
    
    if (log.isDebugEnabled())
    {
        StringBuilder msg = new StringBuilder();
        msg.append("Transforming from ").append(sourceMimetype)
           .append(" to ").append(targetMimetype);
        log.debug(msg.toString());
    }
    
    
    OPCPackage pkg = null;
    try 
    {
        File ooxmlTempFile = TempFileProvider.createTempFile(this.getClass().getSimpleName() + "_ooxml", sourceExtension);
        reader.getContent(ooxmlTempFile);
        
        // Load the file
        pkg = OPCPackage.open(ooxmlTempFile.getPath());
        
        // Does it have a thumbnail?
        PackageRelationshipCollection rels = 
            pkg.getRelationshipsByType(PackageRelationshipTypes.THUMBNAIL);
        if (rels.size() > 0)
        {
            // Get the thumbnail part
            PackageRelationship tRel = rels.getRelationship(0);
            PackagePart tPart = pkg.getPart(tRel);
            
            // Write it to the target
            InputStream tStream = tPart.getInputStream();
            writer.putContent( tStream );
            tStream.close();
        }
        else
        {
            log.debug("No thumbnail present in " + reader.toString());
            throw new UnimportantTransformException(NO_THUMBNAIL_PRESENT_IN_FILE + targetMimetype);
        }
    } 
    catch (IOException e) 
    {
       throw new AlfrescoRuntimeException("Unable to transform " + sourceExtension + " file.", e);
    }
    finally
    {
        if (pkg != null)
        {
            pkg.close();
        }
    }
}
 
Example #23
Source File: EternalReadCacheSelector.java    From easyexcel with Apache License 2.0 4 votes vote down vote up
@Override
public ReadCache readCache(PackagePart sharedStringsTablePackagePart) {
    return readCache;
}
 
Example #24
Source File: XlsxSaxAnalyser.java    From easyexcel with Apache License 2.0 4 votes vote down vote up
private void defaultReadCache(XlsxReadWorkbookHolder xlsxReadWorkbookHolder,
    PackagePart sharedStringsTablePackagePart) {
    ReadCache readCache = xlsxReadWorkbookHolder.getReadCacheSelector().readCache(sharedStringsTablePackagePart);
    xlsxReadWorkbookHolder.setReadCache(readCache);
    readCache.init(xlsxReadContext);
}
 
Example #25
Source File: RawCopier.java    From M2Doc with Eclipse Public License 1.0 3 votes vote down vote up
/**
 * Gets or copy a {@link PackagePart} corresponding to the given source {@link PackagePart}.
 * 
 * @param inputPartURIToOutputPartURI
 *            the mapping form input part {@link PackagePartName} to output par {@link PackagePartName}
 * @param source
 *            the source {@link PackagePart}
 * @param outputDoc
 *            the output {@link XWPFDocument}
 * @return the corresponding {@link PackagePart} in output {@link XWPFDocument}
 * @throws IOException
 *             if a {@link PackagePart} can't be read
 * @throws NoSuchAlgorithmException
 *             if MD5 can't be found
 * @throws InvalidFormatException
 *             if {@link PackagePart} can't be accessed
 */
private PackagePart getOrCopyPart(Map<URI, URI> inputPartURIToOutputPartURI, PackagePart source,
        XWPFDocument outputDoc) throws InvalidFormatException, NoSuchAlgorithmException, IOException {
    final PackagePart res;

    final URI existingPartURI = inputPartURIToOutputPartURI.get(source.getPartName().getURI());
    if (existingPartURI != null) {
        res = outputDoc.getPackage().getPart(PackagingURIHelper.createPartName(existingPartURI));
    } else {
        res = copyPart(source, outputDoc);
    }
    inputPartURIToOutputPartURI.put(source.getPartName().getURI(), res.getPartName().getURI());

    return res;
}
 
Example #26
Source File: ReadCacheSelector.java    From easyexcel with Apache License 2.0 2 votes vote down vote up
/**
 * Select a cache
 *
 * @param sharedStringsTablePackagePart
 * @return
 */
ReadCache readCache(PackagePart sharedStringsTablePackagePart);