Prevent more cases of unbounded allocation

Test WordToTextConverter with all sample files

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1906326 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Dominik Stadler 2023-01-01 15:59:44 +00:00
parent ab45ef779c
commit 059283c9e6
2 changed files with 68 additions and 15 deletions

View File

@ -20,56 +20,56 @@ import java.util.Arrays;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.poi.util.IOUtils;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.util.LittleEndianConsts;
import static java.lang.Integer.toHexString;
import static org.apache.logging.log4j.util.Unbox.box;
public class NilPICFAndBinData
{
public class NilPICFAndBinData {
private static final Logger LOGGER = LogManager.getLogger(NilPICFAndBinData.class);
// limit the default maximum length of the allocated fields
private static final int MAX_SIZE = 100_000;
private byte[] _binData;
public NilPICFAndBinData( byte[] data, int offset )
{
public NilPICFAndBinData( byte[] data, int offset ) {
fillFields( data, offset );
}
public void fillFields( byte[] data, int offset )
{
public void fillFields( byte[] data, int offset ) {
int lcb = LittleEndian.getInt( data, offset );
int cbHeader = LittleEndian.getUShort( data, offset
+ LittleEndianConsts.INT_SIZE );
if ( cbHeader != 0x44 )
{
if ( cbHeader != 0x44 ) {
LOGGER.atWarn().log("NilPICFAndBinData at offset {} cbHeader 0x{} != 0x44", box(offset), toHexString(cbHeader));
}
// make sure these do not cause OOM if passed as invalid or extremely large values
IOUtils.safelyAllocateCheck(lcb, MAX_SIZE);
IOUtils.safelyAllocateCheck(cbHeader, MAX_SIZE);
// skip the 62 ignored bytes
int binaryLength = lcb - cbHeader;
this._binData = Arrays.copyOfRange(data, offset + cbHeader,
offset + cbHeader + binaryLength);
}
public byte[] getBinData()
{
public byte[] getBinData() {
return _binData;
}
public byte[] serialize()
{
public byte[] serialize() {
byte[] bs = new byte[_binData.length + 0x44];
LittleEndian.putInt( bs, 0, _binData.length + 0x44 );
System.arraycopy( _binData, 0, bs, 0x44, _binData.length );
return bs;
}
public int serialize( byte[] data, int offset )
{
public int serialize( byte[] data, int offset ) {
LittleEndian.putInt( data, offset, _binData.length + 0x44 );
System.arraycopy( _binData, 0, data, offset + 0x44, _binData.length );
return 0x44 + _binData.length;

View File

@ -20,10 +20,29 @@ import static org.apache.poi.hwpf.HWPFTestDataSamples.openSampleFile;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
import java.io.File;
import java.io.FileInputStream;
import java.io.FilenameFilter;
import java.io.InputStream;
import java.util.Arrays;
import java.util.stream.Stream;
import org.apache.commons.io.filefilter.SuffixFileFilter;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.poi.EncryptedDocumentException;
import org.apache.poi.POIDataSamples;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.OldWordFileFormatException;
import org.apache.poi.poifs.filesystem.FileMagic;
import org.apache.poi.util.RecordFormatException;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;
public class TestWordToTextConverter {
private static final Logger LOG = LogManager.getLogger(WordToTextConverter.class);
/**
* [FAILING] Bug 47731 - Word Extractor considers text copied from some
@ -60,4 +79,38 @@ public class TestWordToTextConverter {
assertNotNull(WordToTextConverter.getText(doc));
}
}
@ParameterizedTest
@MethodSource("files")
void testAllFiles(File file) throws Exception {
LOG.info("Testing " + file);
try (FileInputStream stream = new FileInputStream(file)) {
InputStream is = FileMagic.prepareToCheckMagic(stream);
FileMagic fm = FileMagic.valueOf(is);
if (fm != FileMagic.OLE2) {
LOG.info("Skip non-doc file " + file);
return;
}
try (HWPFDocument doc = new HWPFDocument(is)) {
String foundText = WordToTextConverter.getText(doc);
assertNotNull(foundText);
} catch (OldWordFileFormatException | EncryptedDocumentException | RecordFormatException e) {
// ignored here
}
}
}
public static Stream<Arguments> files() {
String dataDirName = System.getProperty(POIDataSamples.TEST_PROPERTY,
new File("test-data").exists() ? "test-data" : "../test-data");
File[] documents = new File(dataDirName, "document").listFiles(
(FilenameFilter) new SuffixFileFilter(".doc"));
assertNotNull(documents);
return Arrays.stream(documents).map(Arguments::of);
}
}