support reading HSSFWorkbook with password passed as param (#1016)

* support reading HSSFWorkbook with password passed as param

Update HSSFWorkbook.java

* npe issue

* remove one use of Biff8EncryptionKey

* extractor api

* support changing passwords
This commit is contained in:
PJ Fanning 2026-02-20 15:06:33 +01:00 committed by GitHub
parent 6ba4a1b050
commit 811eb4a4a8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 267 additions and 64 deletions

View File

@ -72,10 +72,24 @@ public class ExcelExtractor implements POIOLE2TextExtractor, org.apache.poi.ss.e
this(fs.getRoot()); this(fs.getRoot());
} }
/**
* @since 6.0.0
*/
public ExcelExtractor(POIFSFileSystem fs, char[] password) throws IOException {
this(fs.getRoot(), password);
}
public ExcelExtractor(DirectoryNode dir) throws IOException { public ExcelExtractor(DirectoryNode dir) throws IOException {
this(new HSSFWorkbook(dir, true)); this(new HSSFWorkbook(dir, true));
} }
/**
* @since 6.0.0
*/
public ExcelExtractor(DirectoryNode dir, char[] password) throws IOException {
this(new HSSFWorkbook(dir, true, password));
}
private static final class CommandParseException extends Exception { private static final class CommandParseException extends Exception {
public CommandParseException(String msg) { public CommandParseException(String msg) {
super(msg); super(msg);

View File

@ -172,19 +172,31 @@ public final class RecordFactory {
* Create an array of records from an input stream * Create an array of records from an input stream
* *
* @param in the InputStream from which the records will be obtained * @param in the InputStream from which the records will be obtained
*
* @return a list of Records created from the InputStream * @return a list of Records created from the InputStream
*
* @throws org.apache.poi.util.RecordFormatException on error processing the InputStream * @throws org.apache.poi.util.RecordFormatException on error processing the InputStream
*/ */
public static List<org.apache.poi.hssf.record.Record> createRecords(InputStream in) throws RecordFormatException { public static List<org.apache.poi.hssf.record.Record> createRecords(InputStream in) throws RecordFormatException {
return createRecords(in, null);
}
/**
* Create an array of records from an input stream
*
* @param in the InputStream from which the records will be obtained
* @param password in char array format (can be null)
* @return a list of Records created from the InputStream
* @throws org.apache.poi.util.RecordFormatException on error processing the InputStream
* @since 6.0.0
*/
public static List<org.apache.poi.hssf.record.Record> createRecords(
InputStream in, char[] password) throws RecordFormatException {
List<org.apache.poi.hssf.record.Record> records = new ArrayList<>(NUM_RECORDS); List<org.apache.poi.hssf.record.Record> records = new ArrayList<>(NUM_RECORDS);
RecordFactoryInputStream recStream = new RecordFactoryInputStream(in, true); RecordFactoryInputStream recStream = new RecordFactoryInputStream(in, true, password);
Record record; Record record;
while ((record = recStream.nextRecord())!=null) { while ((record = recStream.nextRecord()) != null) {
records.add(record); records.add(record);
IOUtils.safelyAllocateCheck(records.size(), MAX_NUMBER_OF_RECORDS); IOUtils.safelyAllocateCheck(records.size(), MAX_NUMBER_OF_RECORDS);
@ -192,4 +204,5 @@ public final class RecordFactory {
return records; return records;
} }
} }

View File

@ -102,19 +102,39 @@ public final class RecordFactoryInputStream {
_lastRecord = rec; _lastRecord = rec;
} }
/**
* This method requires that you store the password in {@link Biff8EncryptionKey}.
* Since 6.0.0, we have overloaded methods where you can pass the password as a param instead.
*/
@SuppressWarnings({"squid:S2068"}) @SuppressWarnings({"squid:S2068"})
public RecordInputStream createDecryptingStream(InputStream original) { public RecordInputStream createDecryptingStream(InputStream original) {
String userPassword = Biff8EncryptionKey.getCurrentUserPassword(); return createDecryptingStream(original, (String) null);
if (userPassword == null) { }
userPassword = Decryptor.DEFAULT_PASSWORD;
}
/**
* @since 6.0.0
*/
public RecordInputStream createDecryptingStream(InputStream original, char[] password) {
final String pwdString = password == null ? null : new String(password);
return createDecryptingStream(original, pwdString);
}
/**
* @since 6.0.0
*/
public RecordInputStream createDecryptingStream(InputStream original, String password) {
if (password == null) {
password = Biff8EncryptionKey.getCurrentUserPassword();
if (password == null) {
password = Decryptor.DEFAULT_PASSWORD;
}
}
EncryptionInfo info = _filePassRec.getEncryptionInfo(); EncryptionInfo info = _filePassRec.getEncryptionInfo();
try { try {
if (!info.getDecryptor().verifyPassword(userPassword)) { if (!info.getDecryptor().verifyPassword(password)) {
throw new EncryptedDocumentException( throw new EncryptedDocumentException(
(Decryptor.DEFAULT_PASSWORD.equals(userPassword) ? "Default" : "Supplied") (Decryptor.DEFAULT_PASSWORD.equals(password) ? "Default" : "Supplied")
+ " password is invalid for salt/verifier/verifierHash"); + " password is invalid for salt/verifier/verifierHash");
} }
} catch (GeneralSecurityException e) { } catch (GeneralSecurityException e) {
throw new EncryptedDocumentException(e); throw new EncryptedDocumentException(e);
@ -176,17 +196,29 @@ public final class RecordFactoryInputStream {
/** /**
* @param in the InputStream to read from * @param in the InputStream to read from
*
* @param shouldIncludeContinueRecords caller can pass <code>false</code> if loose * @param shouldIncludeContinueRecords caller can pass <code>false</code> if loose
* {@link ContinueRecord}s should be skipped (this is sometimes useful in event based * {@link ContinueRecord}s should be skipped (this is sometimes useful in event based
* processing). * processing).
*/ */
public RecordFactoryInputStream(InputStream in, boolean shouldIncludeContinueRecords) { public RecordFactoryInputStream(InputStream in, boolean shouldIncludeContinueRecords) {
this(in, shouldIncludeContinueRecords, null);
}
/**
* @param in the InputStream to read from
* @param shouldIncludeContinueRecords caller can pass <code>false</code> if loose
* @param password password in char array format (can be null)
* {@link ContinueRecord}s should be skipped (this is sometimes useful in event based
* processing).
* @since 6.0.0
*/
public RecordFactoryInputStream(InputStream in, boolean shouldIncludeContinueRecords,
char[] password) {
RecordInputStream rs = new RecordInputStream(in); RecordInputStream rs = new RecordInputStream(in);
List<org.apache.poi.hssf.record.Record> records = new ArrayList<>(); List<org.apache.poi.hssf.record.Record> records = new ArrayList<>();
StreamEncryptionInfo sei = new StreamEncryptionInfo(rs, records); StreamEncryptionInfo sei = new StreamEncryptionInfo(rs, records);
if (sei.hasEncryption()) { if (sei.hasEncryption()) {
rs = sei.createDecryptingStream(in); rs = sei.createDecryptingStream(in, password);
} else { } else {
// typical case - non-encrypted stream // typical case - non-encrypted stream
} }
@ -201,22 +233,22 @@ public final class RecordFactoryInputStream {
_lastRecord = sei.getLastRecord(); _lastRecord = sei.getLastRecord();
/* /*
* How to recognise end of stream? * How to recognise end of stream?
* In the best case, the underlying input stream (in) ends just after the last EOF record * In the best case, the underlying input stream (in) ends just after the last EOF record
* Usually however, the stream is padded with an arbitrary byte count. Excel and most apps * Usually however, the stream is padded with an arbitrary byte count. Excel and most apps
* reliably use zeros for padding and if this were always the case, this code could just * reliably use zeros for padding and if this were always the case, this code could just
* skip all the (zero sized) records with sid==0. However, bug 46987 shows a file with * skip all the (zero sized) records with sid==0. However, bug 46987 shows a file with
* non-zero padding that is read OK by Excel (Excel also fixes the padding). * non-zero padding that is read OK by Excel (Excel also fixes the padding).
* *
* So to properly detect the workbook end of stream, this code has to identify the last * So to properly detect the workbook end of stream, this code has to identify the last
* EOF record. This is not so easy because the worbook bof+eof pair do not bracket the * EOF record. This is not so easy because the workbook bof+eof pair do not bracket the
* whole stream. The worksheets follow the workbook, but it is not easy to tell how many * whole stream. The worksheets follow the workbook, but it is not easy to tell how many
* sheet sub-streams should be present. Hence we are looking for an EOF record that is not * sheet sub-streams should be present. Hence we are looking for an EOF record that is not
* immediately followed by a BOF record. One extra complication is that bof+eof sub- * immediately followed by a BOF record. One extra complication is that bof+eof sub-
* streams can be nested within worksheet streams and it's not clear in these cases what * streams can be nested within worksheet streams and it's not clear in these cases what
* record might follow any EOF record. So we also need to keep track of the bof/eof * record might follow any EOF record. So we also need to keep track of the bof/eof
* nesting level. * nesting level.
*/ */
_bofDepth = sei.hasBOFRecord() ? 1 : 0; _bofDepth = sei.hasBOFRecord() ? 1 : 0;
_lastRecordWasEOFLevelZero = false; _lastRecordWasEOFLevelZero = false;
} }

View File

@ -205,6 +205,12 @@ public final class HSSFWorkbook extends POIDocument implements Workbook {
*/ */
private final UDFFinder _udfFinder = new IndexedUDFFinder(AggregatingUDFFinder.DEFAULT); private final UDFFinder _udfFinder = new IndexedUDFFinder(AggregatingUDFFinder.DEFAULT);
/**
* The password used to decrypt this workbook when writing out.
* @since 6.0.0
*/
private char[] outputPasswordChars;
public static HSSFWorkbook create(InternalWorkbook book) { public static HSSFWorkbook create(InternalWorkbook book) {
return new HSSFWorkbook(book); return new HSSFWorkbook(book);
} }
@ -287,6 +293,27 @@ public final class HSSFWorkbook extends POIDocument implements Workbook {
this(fs.getRoot(), fs, preserveNodes); this(fs.getRoot(), fs, preserveNodes);
} }
/**
* Given a POI POIFSFileSystem object, read in its Workbook and populate
* the high and low level models. If you're reading in a workbook... start here!
*
* @param fs the POI filesystem that contains the Workbook stream.
* @param preserveNodes whether to preserve other nodes, such as
* macros. This takes more memory, so only say yes if you
* need to. If set, will store all of the POIFSFileSystem
* in memory
* @param password in char array format (can be null)
* @throws IOException if the stream cannot be read
* @throws IllegalStateException a number of runtime exceptions can be thrown, especially if there are problems with the
* input format
* @since 6.0.0
* @see POIFSFileSystem
*/
public HSSFWorkbook(POIFSFileSystem fs, boolean preserveNodes, char[] password)
throws IOException {
this(fs.getRoot(), preserveNodes, password);
}
public static String getWorkbookDirEntryName(DirectoryNode directory) { public static String getWorkbookDirEntryName(DirectoryNode directory) {
if (directory.hasEntryCaseInsensitive(WORKBOOK)) { if (directory.hasEntryCaseInsensitive(WORKBOOK)) {
return WORKBOOK; return WORKBOOK;
@ -359,6 +386,28 @@ public final class HSSFWorkbook extends POIDocument implements Workbook {
*/ */
public HSSFWorkbook(DirectoryNode directory, boolean preserveNodes) public HSSFWorkbook(DirectoryNode directory, boolean preserveNodes)
throws IOException { throws IOException {
this(directory, preserveNodes, null);
}
/**
* given a POI POIFSFileSystem object, and a specific directory
* within it, read in its Workbook and populate the high and
* low level models. If you're reading in a workbook...start here.
*
* @param directory the POI filesystem directory to process from
* @param preserveNodes whether to preserve other nodes, such as
* macros. This takes more memory, so only say yes if you
* need to. If set, will store all of the POIFSFileSystem
* in memory
* @param password in char array format (can be null)
* @throws IOException if the stream cannot be read
* @throws IllegalStateException a number of runtime exceptions can be thrown, especially if there are problems with the
* input format
* @see POIFSFileSystem
* @since 6.0.0
*/
public HSSFWorkbook(DirectoryNode directory, boolean preserveNodes, char[] password)
throws IOException {
super(directory); super(directory);
String workbookName = getWorkbookDirEntryName(directory); String workbookName = getWorkbookDirEntryName(directory);
@ -377,7 +426,7 @@ public final class HSSFWorkbook extends POIDocument implements Workbook {
// it happens to be spelled. // it happens to be spelled.
InputStream stream = directory.createDocumentInputStream(workbookName); InputStream stream = directory.createDocumentInputStream(workbookName);
List<org.apache.poi.hssf.record.Record> records = RecordFactory.createRecords(stream); List<org.apache.poi.hssf.record.Record> records = RecordFactory.createRecords(stream, password);
workbook = InternalWorkbook.createWorkbook(records); workbook = InternalWorkbook.createWorkbook(records);
setPropertiesFromWorkbook(workbook); setPropertiesFromWorkbook(workbook);
@ -420,6 +469,24 @@ public final class HSSFWorkbook extends POIDocument implements Workbook {
this(s, true); this(s, true);
} }
/**
* Companion to HSSFWorkbook(POIFSFileSystem), this constructs the
* POI filesystem around your {@link InputStream}, including all nodes.
* <p>This calls {@link #HSSFWorkbook(InputStream, boolean)} with
* preserve nodes set to true.
*
* @throws IOException if the stream cannot be read
* @throws IllegalStateException a number of runtime exceptions can be thrown, especially if there are problems with the
* input format
* @see #HSSFWorkbook(InputStream, boolean)
* @see #HSSFWorkbook(POIFSFileSystem)
* @see POIFSFileSystem
* @since 6.0.0
*/
public HSSFWorkbook(InputStream s, char[] password) throws IOException {
this(s, true, password);
}
/** /**
* Companion to HSSFWorkbook(POIFSFileSystem), this constructs the * Companion to HSSFWorkbook(POIFSFileSystem), this constructs the
* POI filesystem around your {@link InputStream}. * POI filesystem around your {@link InputStream}.
@ -440,10 +507,31 @@ public final class HSSFWorkbook extends POIDocument implements Workbook {
this(new POIFSFileSystem(s).getRoot(), preserveNodes); this(new POIFSFileSystem(s).getRoot(), preserveNodes);
} }
/**
* Companion to HSSFWorkbook(POIFSFileSystem), this constructs the
* POI filesystem around your {@link InputStream}.
*
* @param s the POI filesystem that contains the Workbook stream.
* @param preserveNodes whether to preserve other nodes, such as
* macros. This takes more memory, so only say yes if you
* need to.
* @param password in char array format (can be null)
* @throws IOException if the stream cannot be read
* @throws IllegalStateException a number of runtime exceptions can be thrown, especially if there are problems with the
* input format
* @see POIFSFileSystem
* @see #HSSFWorkbook(POIFSFileSystem)
* @since 6.0.0
*/
@SuppressWarnings("resource") // POIFSFileSystem always closes the stream
public HSSFWorkbook(InputStream s, boolean preserveNodes, char[] password)
throws IOException {
this(new POIFSFileSystem(s).getRoot(), preserveNodes, password);
}
/** /**
* used internally to set the workbook properties. * used internally to set the workbook properties.
*/ */
private void setPropertiesFromWorkbook(InternalWorkbook book) { private void setPropertiesFromWorkbook(InternalWorkbook book) {
this.workbook = book; this.workbook = book;
@ -1482,6 +1570,15 @@ public final class HSSFWorkbook extends POIDocument implements Workbook {
} }
} }
/**
* Set the password to be used to password protect the spreadsheet when we output the data.
* @param password as a char array (null is supported and means use @{link Biff8EncryptionKey}
* and no password if none set there)
* @since 6.0.0
*/
public void setOutputPassword(final char[] password) {
this.outputPasswordChars = password;
}
/** /**
* Method getBytes - get the bytes of just the HSSF portions of the XLS file. * Method getBytes - get the bytes of just the HSSF portions of the XLS file.
@ -1496,7 +1593,14 @@ public final class HSSFWorkbook extends POIDocument implements Workbook {
HSSFSheet[] sheets = getSheets(); HSSFSheet[] sheets = getSheets();
int nSheets = sheets.length; int nSheets = sheets.length;
updateEncryptionInfo(); String pwdString;
if (outputPasswordChars != null) {
pwdString = new String(outputPasswordChars);
} else {
// from POI 6.0.0, using Biff8EncryptionKey is discouraged
pwdString = Biff8EncryptionKey.getCurrentUserPassword();
}
updateEncryptionInfo(pwdString);
// before getting the workbook size we must tell the sheets that // before getting the workbook size we must tell the sheets that
// serialization is about to occur. // serialization is about to occur.
@ -2329,12 +2433,11 @@ public final class HSSFWorkbook extends POIDocument implements Workbook {
} }
private void updateEncryptionInfo() { private void updateEncryptionInfo(String password) {
// make sure, that we've read all the streams ... // make sure, that we've read all the streams ...
readProperties(); readProperties();
FilePassRecord fpr = (FilePassRecord) workbook.findFirstRecordBySid(FilePassRecord.sid); FilePassRecord fpr = (FilePassRecord) workbook.findFirstRecordBySid(FilePassRecord.sid);
String password = Biff8EncryptionKey.getCurrentUserPassword();
WorkbookRecordList wrl = workbook.getWorkbookRecordList(); WorkbookRecordList wrl = workbook.getWorkbookRecordList();
if (password == null) { if (password == null) {
if (fpr != null) { if (fpr != null) {

View File

@ -21,7 +21,6 @@ import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import org.apache.poi.hssf.record.crypto.Biff8EncryptionKey;
import org.apache.poi.poifs.filesystem.DirectoryNode; import org.apache.poi.poifs.filesystem.DirectoryNode;
import org.apache.poi.poifs.filesystem.FileMagic; import org.apache.poi.poifs.filesystem.FileMagic;
import org.apache.poi.poifs.filesystem.POIFSFileSystem; import org.apache.poi.poifs.filesystem.POIFSFileSystem;
@ -70,18 +69,8 @@ public class HSSFWorkbookFactory implements WorkbookProvider {
@SuppressWarnings("java:S2093") @SuppressWarnings("java:S2093")
@Override @Override
public HSSFWorkbook create(final DirectoryNode root, String password) throws IOException { public HSSFWorkbook create(final DirectoryNode root, String password) throws IOException {
boolean passwordSet = false; final char[] passwordChars = password == null ? null : password.toCharArray();
if (password != null) { return new HSSFWorkbook(root, true, passwordChars);
Biff8EncryptionKey.setCurrentUserPassword(password);
passwordSet = true;
}
try {
return new HSSFWorkbook(root, true);
} finally {
if (passwordSet) {
Biff8EncryptionKey.setCurrentUserPassword(null);
}
}
} }
@Override @Override
@ -98,24 +87,14 @@ public class HSSFWorkbookFactory implements WorkbookProvider {
@Override @Override
@SuppressWarnings({"java:S2095","java:S2093"}) @SuppressWarnings({"java:S2095","java:S2093"})
public Workbook create(File file, String password, boolean readOnly) throws IOException { public Workbook create(File file, String password, boolean readOnly) throws IOException {
boolean passwordSet = false; POIFSFileSystem fs = new POIFSFileSystem(file, readOnly);
if (password != null) {
Biff8EncryptionKey.setCurrentUserPassword(password);
passwordSet = true;
}
try { try {
POIFSFileSystem fs = new POIFSFileSystem(file, readOnly); final char[] passwordChars = password == null ? null : password.toCharArray();
try { return new HSSFWorkbook(fs, true, passwordChars);
return new HSSFWorkbook(fs, true); } catch (RuntimeException e) {
} catch (RuntimeException e) { // we need to close the filesystem if we encounter an exception to not leak file handles
// we need to close the filesystem if we encounter an exception to not leak file handles fs.close();
fs.close(); throw e;
throw e;
}
} finally {
if (passwordSet) {
Biff8EncryptionKey.setCurrentUserPassword(null);
}
} }
} }
} }

View File

@ -49,6 +49,15 @@ public final class HSSFTestDataSamples {
throw new RuntimeException(e); throw new RuntimeException(e);
} }
} }
public static HSSFWorkbook openSampleWorkbook(String sampleFileName, char[] password) {
try (InputStream stream = _inst.openResourceAsStream(sampleFileName)){
return new HSSFWorkbook(stream, password);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
/** /**
* Writes a spreadsheet to a {@code ByteArrayOutputStream} and reads it back * Writes a spreadsheet to a {@code ByteArrayOutputStream} and reads it back
* from a {@code ByteArrayInputStream}.<p> * from a {@code ByteArrayInputStream}.<p>

View File

@ -46,6 +46,13 @@ final class TestExcelExtractor {
return new ExcelExtractor(fs); return new ExcelExtractor(fs);
} }
private static ExcelExtractor createExtractor(String sampleFileName, String password) throws IOException {
File file = HSSFTestDataSamples.getSampleFile(sampleFileName);
POIFSFileSystem fs = new POIFSFileSystem(file);
final char[] passwordChars = password == null ? null : password.toCharArray();
return new ExcelExtractor(fs, passwordChars);
}
@Test @Test
void testSimple() throws IOException { void testSimple() throws IOException {
try (ExcelExtractor extractor = createExtractor("Simple.xls")) { try (ExcelExtractor extractor = createExtractor("Simple.xls")) {
@ -335,6 +342,14 @@ final class TestExcelExtractor {
@Test @Test
void testPassword() throws IOException { void testPassword() throws IOException {
try (ExcelExtractor extractor = createExtractor("password.xls", "password")) {
String text = extractor.getText();
assertContains(text, "ZIP");
}
}
@Test
void testPasswordWithBiff8EncryptionKey() throws IOException {
Biff8EncryptionKey.setCurrentUserPassword("password"); Biff8EncryptionKey.setCurrentUserPassword("password");
try (ExcelExtractor extractor = createExtractor("password.xls")) { try (ExcelExtractor extractor = createExtractor("password.xls")) {
String text = extractor.getText(); String text = extractor.getText();

View File

@ -1249,6 +1249,44 @@ public final class TestHSSFWorkbook extends BaseTestWorkbook {
} }
} }
@Test
void testPassword() throws Exception {
try (HSSFWorkbook wb = HSSFTestDataSamples.openSampleWorkbook(
"xor-encryption-abc.xls", "abc".toCharArray())) {
validateXorEncryptionDoc(wb);
try (UnsynchronizedByteArrayOutputStream baos = UnsynchronizedByteArrayOutputStream.builder().get()) {
// testing that when we write that no password is applied
wb.write(baos);
try (HSSFWorkbook wbOut = new HSSFWorkbook(baos.toInputStream())) {
validateXorEncryptionDoc(wbOut);
}
}
}
}
@Test
void testChangePassword() throws Exception {
try (HSSFWorkbook wb = HSSFTestDataSamples.openSampleWorkbook(
"xor-encryption-abc.xls", "abc".toCharArray())) {
validateXorEncryptionDoc(wb);
String newPassword = "newPassword";
try (UnsynchronizedByteArrayOutputStream baos = UnsynchronizedByteArrayOutputStream.builder().get()) {
// testing that when we write that the newPassword is applied
wb.setOutputPassword(newPassword.toCharArray());
wb.write(baos);
try (HSSFWorkbook wbOut = new HSSFWorkbook(baos.toInputStream(), newPassword.toCharArray())) {
validateXorEncryptionDoc(wbOut);
}
}
}
}
private void validateXorEncryptionDoc(HSSFWorkbook wb) {
HSSFSheet sheet = wb.getSheetAt(0);
double value = sheet.getRow(0).getCell(0).getNumericCellValue();
assertEquals(1.0, value);
}
private static class WrappedStream extends FilterInputStream { private static class WrappedStream extends FilterInputStream {
private boolean closed; private boolean closed;