throw OfficeXmlFileException if hwpf reads an ooxml file

This commit is contained in:
PJ Fanning 2025-09-05 11:04:29 +01:00
parent 2fbfe2ea7e
commit 559485a0fe
3 changed files with 57 additions and 0 deletions

View File

@ -49,6 +49,7 @@ import org.apache.poi.poifs.filesystem.DocumentEntry;
import org.apache.poi.poifs.filesystem.DocumentInputStream;
import org.apache.poi.poifs.filesystem.Entry;
import org.apache.poi.poifs.filesystem.FileMagic;
import org.apache.poi.poifs.filesystem.OfficeXmlFileException;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.util.IOUtils;
import org.apache.poi.util.Internal;
@ -138,6 +139,9 @@ public abstract class HWPFDocumentCore extends POIDocument {
FileMagic fm = FileMagic.valueOf(is);
if (fm != FileMagic.OLE2) {
if (fm == FileMagic.OOXML) {
throw new OfficeXmlFileException("The document is really a OOXML file");
}
throw new IllegalArgumentException("The document is really a "+fm+" file");
}

View File

@ -34,6 +34,10 @@ public class HWPFTestDataSamples {
}
}
public static InputStream openSampleFileStream(String sampleFileName) {
return SAMPLES.openResourceAsStream(sampleFileName);
}
public static HWPFOldDocument openOldSampleFile(String sampleFileName) {
try {
InputStream is = POIDataSamples.getDocumentInstance().openResourceAsStream(sampleFileName);

View File

@ -0,0 +1,49 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hwpf;
import org.apache.poi.poifs.filesystem.OfficeXmlFileException;
import org.junit.jupiter.api.Test;
import java.io.InputStream;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertInstanceOf;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertThrows;
public class TestHWPFParser {
@Test
void testDoc() throws Exception {
try (
InputStream stream = HWPFTestDataSamples.openSampleFileStream("Lists.doc");
HWPFDocument doc = HWPFParser.parse(stream)
) {
assertNotNull(doc);
assertEquals(40, doc.getParagraphTable().getParagraphs().size());
}
}
@Test
void testFailOnDocx() throws Exception {
try (InputStream stream = HWPFTestDataSamples.openSampleFileStream("sample.docx")) {
HWPFReadException hre = assertThrows(HWPFReadException.class, () -> HWPFParser.parse(stream));
assertInstanceOf(OfficeXmlFileException.class, hre.getCause());
}
}
}