mirror of
https://github.com/apache/poi.git
synced 2026-02-27 20:40:08 +08:00
Prevent more cases of unbounded allocation
Test WordToTextConverter with all sample files git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1906326 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
ab45ef779c
commit
059283c9e6
@ -20,56 +20,56 @@ import java.util.Arrays;
|
||||
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
import org.apache.poi.util.IOUtils;
|
||||
import org.apache.poi.util.LittleEndian;
|
||||
import org.apache.poi.util.LittleEndianConsts;
|
||||
|
||||
import static java.lang.Integer.toHexString;
|
||||
import static org.apache.logging.log4j.util.Unbox.box;
|
||||
|
||||
public class NilPICFAndBinData
|
||||
{
|
||||
|
||||
public class NilPICFAndBinData {
|
||||
private static final Logger LOGGER = LogManager.getLogger(NilPICFAndBinData.class);
|
||||
|
||||
// limit the default maximum length of the allocated fields
|
||||
private static final int MAX_SIZE = 100_000;
|
||||
|
||||
private byte[] _binData;
|
||||
|
||||
public NilPICFAndBinData( byte[] data, int offset )
|
||||
{
|
||||
public NilPICFAndBinData( byte[] data, int offset ) {
|
||||
fillFields( data, offset );
|
||||
}
|
||||
|
||||
public void fillFields( byte[] data, int offset )
|
||||
{
|
||||
public void fillFields( byte[] data, int offset ) {
|
||||
int lcb = LittleEndian.getInt( data, offset );
|
||||
int cbHeader = LittleEndian.getUShort( data, offset
|
||||
+ LittleEndianConsts.INT_SIZE );
|
||||
|
||||
if ( cbHeader != 0x44 )
|
||||
{
|
||||
if ( cbHeader != 0x44 ) {
|
||||
LOGGER.atWarn().log("NilPICFAndBinData at offset {} cbHeader 0x{} != 0x44", box(offset), toHexString(cbHeader));
|
||||
}
|
||||
|
||||
// make sure these do not cause OOM if passed as invalid or extremely large values
|
||||
IOUtils.safelyAllocateCheck(lcb, MAX_SIZE);
|
||||
IOUtils.safelyAllocateCheck(cbHeader, MAX_SIZE);
|
||||
|
||||
// skip the 62 ignored bytes
|
||||
int binaryLength = lcb - cbHeader;
|
||||
this._binData = Arrays.copyOfRange(data, offset + cbHeader,
|
||||
offset + cbHeader + binaryLength);
|
||||
}
|
||||
|
||||
public byte[] getBinData()
|
||||
{
|
||||
public byte[] getBinData() {
|
||||
return _binData;
|
||||
}
|
||||
|
||||
public byte[] serialize()
|
||||
{
|
||||
public byte[] serialize() {
|
||||
byte[] bs = new byte[_binData.length + 0x44];
|
||||
LittleEndian.putInt( bs, 0, _binData.length + 0x44 );
|
||||
System.arraycopy( _binData, 0, bs, 0x44, _binData.length );
|
||||
return bs;
|
||||
}
|
||||
|
||||
public int serialize( byte[] data, int offset )
|
||||
{
|
||||
public int serialize( byte[] data, int offset ) {
|
||||
LittleEndian.putInt( data, offset, _binData.length + 0x44 );
|
||||
System.arraycopy( _binData, 0, data, offset + 0x44, _binData.length );
|
||||
return 0x44 + _binData.length;
|
||||
|
||||
@ -20,10 +20,29 @@ import static org.apache.poi.hwpf.HWPFTestDataSamples.openSampleFile;
|
||||
import static org.junit.jupiter.api.Assertions.assertNotNull;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FilenameFilter;
|
||||
import java.io.InputStream;
|
||||
import java.util.Arrays;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import org.apache.commons.io.filefilter.SuffixFileFilter;
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
import org.apache.poi.EncryptedDocumentException;
|
||||
import org.apache.poi.POIDataSamples;
|
||||
import org.apache.poi.hwpf.HWPFDocument;
|
||||
import org.apache.poi.hwpf.OldWordFileFormatException;
|
||||
import org.apache.poi.poifs.filesystem.FileMagic;
|
||||
import org.apache.poi.util.RecordFormatException;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.params.ParameterizedTest;
|
||||
import org.junit.jupiter.params.provider.Arguments;
|
||||
import org.junit.jupiter.params.provider.MethodSource;
|
||||
|
||||
public class TestWordToTextConverter {
|
||||
private static final Logger LOG = LogManager.getLogger(WordToTextConverter.class);
|
||||
|
||||
/**
|
||||
* [FAILING] Bug 47731 - Word Extractor considers text copied from some
|
||||
@ -60,4 +79,38 @@ public class TestWordToTextConverter {
|
||||
assertNotNull(WordToTextConverter.getText(doc));
|
||||
}
|
||||
}
|
||||
|
||||
@ParameterizedTest
|
||||
@MethodSource("files")
|
||||
void testAllFiles(File file) throws Exception {
|
||||
LOG.info("Testing " + file);
|
||||
try (FileInputStream stream = new FileInputStream(file)) {
|
||||
InputStream is = FileMagic.prepareToCheckMagic(stream);
|
||||
FileMagic fm = FileMagic.valueOf(is);
|
||||
|
||||
if (fm != FileMagic.OLE2) {
|
||||
LOG.info("Skip non-doc file " + file);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
try (HWPFDocument doc = new HWPFDocument(is)) {
|
||||
String foundText = WordToTextConverter.getText(doc);
|
||||
assertNotNull(foundText);
|
||||
} catch (OldWordFileFormatException | EncryptedDocumentException | RecordFormatException e) {
|
||||
// ignored here
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public static Stream<Arguments> files() {
|
||||
String dataDirName = System.getProperty(POIDataSamples.TEST_PROPERTY,
|
||||
new File("test-data").exists() ? "test-data" : "../test-data");
|
||||
|
||||
File[] documents = new File(dataDirName, "document").listFiles(
|
||||
(FilenameFilter) new SuffixFileFilter(".doc"));
|
||||
assertNotNull(documents);
|
||||
|
||||
return Arrays.stream(documents).map(Arguments::of);
|
||||
}
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user