mirror of
https://github.com/apache/poi.git
synced 2026-02-27 20:40:08 +08:00
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1888021 13f79535-47bb-0310-9956-ffa450edef68
179 lines
7.8 KiB
Java
179 lines
7.8 KiB
Java
/* ====================================================================
|
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
|
contributor license agreements. See the NOTICE file distributed with
|
|
this work for additional information regarding copyright ownership.
|
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
|
(the "License"); you may not use this file except in compliance with
|
|
the License. You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
==================================================================== */
|
|
package org.apache.poi.stress;
|
|
|
|
import static org.junit.jupiter.api.Assertions.assertEquals;
|
|
import static org.junit.jupiter.api.Assertions.assertFalse;
|
|
import static org.junit.jupiter.api.Assertions.assertNotNull;
|
|
import static org.junit.jupiter.api.Assumptions.assumeFalse;
|
|
|
|
import java.io.File;
|
|
import java.io.FileInputStream;
|
|
import java.io.IOException;
|
|
import java.io.InputStream;
|
|
import java.util.HashSet;
|
|
import java.util.Set;
|
|
|
|
import org.apache.poi.EncryptedDocumentException;
|
|
import org.apache.poi.extractor.ExtractorFactory;
|
|
import org.apache.poi.extractor.POIOLE2TextExtractor;
|
|
import org.apache.poi.extractor.POITextExtractor;
|
|
import org.apache.poi.hpsf.extractor.HPSFPropertiesExtractor;
|
|
import org.apache.poi.hssf.extractor.EventBasedExcelExtractor;
|
|
import org.apache.poi.ooxml.POIXMLException;
|
|
import org.apache.poi.ss.extractor.ExcelExtractor;
|
|
import org.apache.poi.util.IOUtils;
|
|
|
|
/**
|
|
* Base class with things that can be run for any supported file handler
|
|
* in the integration tests, mostly text-extraction related at the moment.
|
|
*/
|
|
public abstract class AbstractFileHandler implements FileHandler {
|
|
public static final Set<String> EXPECTED_EXTRACTOR_FAILURES = new HashSet<>();
|
|
static {
|
|
// password protected files without password
|
|
// ... currently none ...
|
|
|
|
// unsupported file-types, no supported OLE2 parts
|
|
EXPECTED_EXTRACTOR_FAILURES.add("hmef/quick-winmail.dat");
|
|
EXPECTED_EXTRACTOR_FAILURES.add("hmef/winmail-sample1.dat");
|
|
EXPECTED_EXTRACTOR_FAILURES.add("hmef/bug52400-winmail-simple.dat");
|
|
EXPECTED_EXTRACTOR_FAILURES.add("hmef/bug52400-winmail-with-attachments.dat");
|
|
EXPECTED_EXTRACTOR_FAILURES.add("hmef/bug63955-winmail.dat");
|
|
EXPECTED_EXTRACTOR_FAILURES.add("hpsf/Test0313rur.adm");
|
|
EXPECTED_EXTRACTOR_FAILURES.add("poifs/Notes.ole2");
|
|
EXPECTED_EXTRACTOR_FAILURES.add("poifs/64322.ole2");
|
|
}
|
|
|
|
@Override
|
|
public void handleExtracting(File file) throws Exception {
|
|
boolean before = ExtractorFactory.getThreadPrefersEventExtractors();
|
|
try {
|
|
ExtractorFactory.setThreadPrefersEventExtractors(true);
|
|
handleExtractingInternal(file);
|
|
|
|
ExtractorFactory.setThreadPrefersEventExtractors(false);
|
|
handleExtractingInternal(file);
|
|
} finally {
|
|
ExtractorFactory.setThreadPrefersEventExtractors(before);
|
|
}
|
|
|
|
/* Did fail for some documents with special XML contents...
|
|
try {
|
|
OOXMLPrettyPrint.main(new String[] { file.getAbsolutePath(),
|
|
"/tmp/pretty-" + file.getName() });
|
|
} catch (ZipException e) {
|
|
// ignore, not a Zip/OOXML file
|
|
}*/
|
|
}
|
|
|
|
private void handleExtractingInternal(File file) throws Exception {
|
|
long length = file.length();
|
|
long modified = file.lastModified();
|
|
|
|
POITextExtractor extractor = null;
|
|
String fileAndParentName = file.getParentFile().getName() + "/" + file.getName();
|
|
try {
|
|
// fix windows absolute paths for exception message tracking
|
|
String relPath = file.getPath().replaceAll(".*test-data", "test-data").replace('\\', '/');
|
|
extractor = ExtractorFactory.createExtractor(file);
|
|
assertNotNull(extractor, "Should get a POITextExtractor but had none for file " + relPath);
|
|
|
|
assertNotNull(extractor.getText(), "Should get some text but had none for file " + relPath);
|
|
|
|
// also try metadata
|
|
@SuppressWarnings("resource")
|
|
POITextExtractor metadataExtractor = extractor.getMetadataTextExtractor();
|
|
assertNotNull(metadataExtractor.getText());
|
|
|
|
assertFalse(EXPECTED_EXTRACTOR_FAILURES.contains(fileAndParentName),
|
|
"Expected Extraction to fail for file " + relPath + " and handler " + this + ", but did not fail!");
|
|
|
|
assertEquals(length, file.length(), "File should not be modified by extractor");
|
|
assertEquals(modified, file.lastModified(), "File should not be modified by extractor");
|
|
|
|
handleExtractingAsStream(file);
|
|
|
|
if (extractor instanceof POIOLE2TextExtractor) {
|
|
try (HPSFPropertiesExtractor hpsfExtractor = new HPSFPropertiesExtractor((POIOLE2TextExtractor) extractor)) {
|
|
assertNotNull(hpsfExtractor.getDocumentSummaryInformationText());
|
|
assertNotNull(hpsfExtractor.getSummaryInformationText());
|
|
String text = hpsfExtractor.getText();
|
|
//System.out.println(text);
|
|
assertNotNull(text);
|
|
}
|
|
}
|
|
|
|
// test again with including formulas and cell-comments as this caused some bugs
|
|
if (extractor instanceof ExcelExtractor &&
|
|
// comment-extraction and formula extraction are not well supported in event based extraction
|
|
!(extractor instanceof EventBasedExcelExtractor)) {
|
|
((ExcelExtractor) extractor).setFormulasNotResults(true);
|
|
|
|
String text = extractor.getText();
|
|
assertNotNull(text);
|
|
// */
|
|
|
|
((ExcelExtractor) extractor).setIncludeCellComments(true);
|
|
|
|
text = extractor.getText();
|
|
assertNotNull(text);
|
|
}
|
|
} catch (IOException | POIXMLException e) {
|
|
Exception prevE = e;
|
|
Throwable cause;
|
|
while ((cause = prevE.getCause()) instanceof Exception) {
|
|
if (cause instanceof IOException || cause instanceof POIXMLException) {
|
|
prevE = (Exception)cause;
|
|
} else {
|
|
throw (Exception)cause;
|
|
}
|
|
}
|
|
throw e;
|
|
} catch (IllegalArgumentException e) {
|
|
if(!EXPECTED_EXTRACTOR_FAILURES.contains(fileAndParentName)) {
|
|
throw e;
|
|
}
|
|
} catch (EncryptedDocumentException e) {
|
|
String msg = "org.apache.poi.EncryptedDocumentException: Export Restrictions in place - please install JCE Unlimited Strength Jurisdiction Policy files";
|
|
assumeFalse(msg.equals(e.getMessage()));
|
|
throw e;
|
|
} catch (IllegalStateException e) {
|
|
if (!e.getMessage().contains("POI Scratchpad jar missing") || !Boolean.getBoolean("scratchpad.ignore")) {
|
|
throw e;
|
|
}
|
|
} finally {
|
|
IOUtils.closeQuietly(extractor);
|
|
}
|
|
}
|
|
|
|
private void handleExtractingAsStream(File file) throws IOException {
|
|
try (InputStream stream = new FileInputStream(file)) {
|
|
try (POITextExtractor streamExtractor = ExtractorFactory.createExtractor(stream)) {
|
|
assertNotNull(streamExtractor);
|
|
|
|
assertNotNull(streamExtractor.getText());
|
|
}
|
|
}
|
|
}
|
|
|
|
@Override
|
|
public void handleAdditional(File file) throws Exception {
|
|
// by default we do nothing here
|
|
}
|
|
}
|