diff --git a/poi/src/main/java/org/apache/poi/hssf/extractor/ExcelExtractor.java b/poi/src/main/java/org/apache/poi/hssf/extractor/ExcelExtractor.java index fc0e7aa43c..02315ffb29 100644 --- a/poi/src/main/java/org/apache/poi/hssf/extractor/ExcelExtractor.java +++ b/poi/src/main/java/org/apache/poi/hssf/extractor/ExcelExtractor.java @@ -72,10 +72,24 @@ public class ExcelExtractor implements POIOLE2TextExtractor, org.apache.poi.ss.e this(fs.getRoot()); } + /** + * @since 6.0.0 + */ + public ExcelExtractor(POIFSFileSystem fs, char[] password) throws IOException { + this(fs.getRoot(), password); + } + public ExcelExtractor(DirectoryNode dir) throws IOException { this(new HSSFWorkbook(dir, true)); } + /** + * @since 6.0.0 + */ + public ExcelExtractor(DirectoryNode dir, char[] password) throws IOException { + this(new HSSFWorkbook(dir, true, password)); + } + private static final class CommandParseException extends Exception { public CommandParseException(String msg) { super(msg); diff --git a/poi/src/main/java/org/apache/poi/hssf/record/RecordFactory.java b/poi/src/main/java/org/apache/poi/hssf/record/RecordFactory.java index 08b49f5be4..f0f1c42f82 100644 --- a/poi/src/main/java/org/apache/poi/hssf/record/RecordFactory.java +++ b/poi/src/main/java/org/apache/poi/hssf/record/RecordFactory.java @@ -172,19 +172,31 @@ public final class RecordFactory { * Create an array of records from an input stream * * @param in the InputStream from which the records will be obtained - * * @return a list of Records created from the InputStream - * * @throws org.apache.poi.util.RecordFormatException on error processing the InputStream */ public static List createRecords(InputStream in) throws RecordFormatException { + return createRecords(in, null); + } + + /** + * Create an array of records from an input stream + * + * @param in the InputStream from which the records will be obtained + * @param password in char array format (can be null) + * @return a list of Records created from the InputStream + * @throws org.apache.poi.util.RecordFormatException on error processing the InputStream + * @since 6.0.0 + */ + public static List createRecords( + InputStream in, char[] password) throws RecordFormatException { List records = new ArrayList<>(NUM_RECORDS); - RecordFactoryInputStream recStream = new RecordFactoryInputStream(in, true); + RecordFactoryInputStream recStream = new RecordFactoryInputStream(in, true, password); Record record; - while ((record = recStream.nextRecord())!=null) { + while ((record = recStream.nextRecord()) != null) { records.add(record); IOUtils.safelyAllocateCheck(records.size(), MAX_NUMBER_OF_RECORDS); @@ -192,4 +204,5 @@ public final class RecordFactory { return records; } + } diff --git a/poi/src/main/java/org/apache/poi/hssf/record/RecordFactoryInputStream.java b/poi/src/main/java/org/apache/poi/hssf/record/RecordFactoryInputStream.java index 46077788b4..a5cedea15d 100644 --- a/poi/src/main/java/org/apache/poi/hssf/record/RecordFactoryInputStream.java +++ b/poi/src/main/java/org/apache/poi/hssf/record/RecordFactoryInputStream.java @@ -102,19 +102,39 @@ public final class RecordFactoryInputStream { _lastRecord = rec; } + /** + * This method requires that you store the password in {@link Biff8EncryptionKey}. + * Since 6.0.0, we have overloaded methods where you can pass the password as a param instead. + */ @SuppressWarnings({"squid:S2068"}) public RecordInputStream createDecryptingStream(InputStream original) { - String userPassword = Biff8EncryptionKey.getCurrentUserPassword(); - if (userPassword == null) { - userPassword = Decryptor.DEFAULT_PASSWORD; - } + return createDecryptingStream(original, (String) null); + } + /** + * @since 6.0.0 + */ + public RecordInputStream createDecryptingStream(InputStream original, char[] password) { + final String pwdString = password == null ? null : new String(password); + return createDecryptingStream(original, pwdString); + } + + /** + * @since 6.0.0 + */ + public RecordInputStream createDecryptingStream(InputStream original, String password) { + if (password == null) { + password = Biff8EncryptionKey.getCurrentUserPassword(); + if (password == null) { + password = Decryptor.DEFAULT_PASSWORD; + } + } EncryptionInfo info = _filePassRec.getEncryptionInfo(); try { - if (!info.getDecryptor().verifyPassword(userPassword)) { + if (!info.getDecryptor().verifyPassword(password)) { throw new EncryptedDocumentException( - (Decryptor.DEFAULT_PASSWORD.equals(userPassword) ? "Default" : "Supplied") - + " password is invalid for salt/verifier/verifierHash"); + (Decryptor.DEFAULT_PASSWORD.equals(password) ? "Default" : "Supplied") + + " password is invalid for salt/verifier/verifierHash"); } } catch (GeneralSecurityException e) { throw new EncryptedDocumentException(e); @@ -176,17 +196,29 @@ public final class RecordFactoryInputStream { /** * @param in the InputStream to read from - * * @param shouldIncludeContinueRecords caller can pass false if loose * {@link ContinueRecord}s should be skipped (this is sometimes useful in event based * processing). */ public RecordFactoryInputStream(InputStream in, boolean shouldIncludeContinueRecords) { + this(in, shouldIncludeContinueRecords, null); + } + + /** + * @param in the InputStream to read from + * @param shouldIncludeContinueRecords caller can pass false if loose + * @param password password in char array format (can be null) + * {@link ContinueRecord}s should be skipped (this is sometimes useful in event based + * processing). + * @since 6.0.0 + */ + public RecordFactoryInputStream(InputStream in, boolean shouldIncludeContinueRecords, + char[] password) { RecordInputStream rs = new RecordInputStream(in); List records = new ArrayList<>(); StreamEncryptionInfo sei = new StreamEncryptionInfo(rs, records); if (sei.hasEncryption()) { - rs = sei.createDecryptingStream(in); + rs = sei.createDecryptingStream(in, password); } else { // typical case - non-encrypted stream } @@ -201,22 +233,22 @@ public final class RecordFactoryInputStream { _lastRecord = sei.getLastRecord(); /* - * How to recognise end of stream? - * In the best case, the underlying input stream (in) ends just after the last EOF record - * Usually however, the stream is padded with an arbitrary byte count. Excel and most apps - * reliably use zeros for padding and if this were always the case, this code could just - * skip all the (zero sized) records with sid==0. However, bug 46987 shows a file with - * non-zero padding that is read OK by Excel (Excel also fixes the padding). - * - * So to properly detect the workbook end of stream, this code has to identify the last - * EOF record. This is not so easy because the worbook bof+eof pair do not bracket the - * whole stream. The worksheets follow the workbook, but it is not easy to tell how many - * sheet sub-streams should be present. Hence we are looking for an EOF record that is not - * immediately followed by a BOF record. One extra complication is that bof+eof sub- - * streams can be nested within worksheet streams and it's not clear in these cases what - * record might follow any EOF record. So we also need to keep track of the bof/eof - * nesting level. - */ + * How to recognise end of stream? + * In the best case, the underlying input stream (in) ends just after the last EOF record + * Usually however, the stream is padded with an arbitrary byte count. Excel and most apps + * reliably use zeros for padding and if this were always the case, this code could just + * skip all the (zero sized) records with sid==0. However, bug 46987 shows a file with + * non-zero padding that is read OK by Excel (Excel also fixes the padding). + * + * So to properly detect the workbook end of stream, this code has to identify the last + * EOF record. This is not so easy because the workbook bof+eof pair do not bracket the + * whole stream. The worksheets follow the workbook, but it is not easy to tell how many + * sheet sub-streams should be present. Hence we are looking for an EOF record that is not + * immediately followed by a BOF record. One extra complication is that bof+eof sub- + * streams can be nested within worksheet streams and it's not clear in these cases what + * record might follow any EOF record. So we also need to keep track of the bof/eof + * nesting level. + */ _bofDepth = sei.hasBOFRecord() ? 1 : 0; _lastRecordWasEOFLevelZero = false; } diff --git a/poi/src/main/java/org/apache/poi/hssf/usermodel/HSSFWorkbook.java b/poi/src/main/java/org/apache/poi/hssf/usermodel/HSSFWorkbook.java index 1d2506b5bb..21cbb9ca6d 100644 --- a/poi/src/main/java/org/apache/poi/hssf/usermodel/HSSFWorkbook.java +++ b/poi/src/main/java/org/apache/poi/hssf/usermodel/HSSFWorkbook.java @@ -205,6 +205,12 @@ public final class HSSFWorkbook extends POIDocument implements Workbook { */ private final UDFFinder _udfFinder = new IndexedUDFFinder(AggregatingUDFFinder.DEFAULT); + /** + * The password used to decrypt this workbook when writing out. + * @since 6.0.0 + */ + private char[] outputPasswordChars; + public static HSSFWorkbook create(InternalWorkbook book) { return new HSSFWorkbook(book); } @@ -287,6 +293,27 @@ public final class HSSFWorkbook extends POIDocument implements Workbook { this(fs.getRoot(), fs, preserveNodes); } + /** + * Given a POI POIFSFileSystem object, read in its Workbook and populate + * the high and low level models. If you're reading in a workbook... start here! + * + * @param fs the POI filesystem that contains the Workbook stream. + * @param preserveNodes whether to preserve other nodes, such as + * macros. This takes more memory, so only say yes if you + * need to. If set, will store all of the POIFSFileSystem + * in memory + * @param password in char array format (can be null) + * @throws IOException if the stream cannot be read + * @throws IllegalStateException a number of runtime exceptions can be thrown, especially if there are problems with the + * input format + * @since 6.0.0 + * @see POIFSFileSystem + */ + public HSSFWorkbook(POIFSFileSystem fs, boolean preserveNodes, char[] password) + throws IOException { + this(fs.getRoot(), preserveNodes, password); + } + public static String getWorkbookDirEntryName(DirectoryNode directory) { if (directory.hasEntryCaseInsensitive(WORKBOOK)) { return WORKBOOK; @@ -359,6 +386,28 @@ public final class HSSFWorkbook extends POIDocument implements Workbook { */ public HSSFWorkbook(DirectoryNode directory, boolean preserveNodes) throws IOException { + this(directory, preserveNodes, null); + } + + /** + * given a POI POIFSFileSystem object, and a specific directory + * within it, read in its Workbook and populate the high and + * low level models. If you're reading in a workbook...start here. + * + * @param directory the POI filesystem directory to process from + * @param preserveNodes whether to preserve other nodes, such as + * macros. This takes more memory, so only say yes if you + * need to. If set, will store all of the POIFSFileSystem + * in memory + * @param password in char array format (can be null) + * @throws IOException if the stream cannot be read + * @throws IllegalStateException a number of runtime exceptions can be thrown, especially if there are problems with the + * input format + * @see POIFSFileSystem + * @since 6.0.0 + */ + public HSSFWorkbook(DirectoryNode directory, boolean preserveNodes, char[] password) + throws IOException { super(directory); String workbookName = getWorkbookDirEntryName(directory); @@ -377,7 +426,7 @@ public final class HSSFWorkbook extends POIDocument implements Workbook { // it happens to be spelled. InputStream stream = directory.createDocumentInputStream(workbookName); - List records = RecordFactory.createRecords(stream); + List records = RecordFactory.createRecords(stream, password); workbook = InternalWorkbook.createWorkbook(records); setPropertiesFromWorkbook(workbook); @@ -420,6 +469,24 @@ public final class HSSFWorkbook extends POIDocument implements Workbook { this(s, true); } + /** + * Companion to HSSFWorkbook(POIFSFileSystem), this constructs the + * POI filesystem around your {@link InputStream}, including all nodes. + *

This calls {@link #HSSFWorkbook(InputStream, boolean)} with + * preserve nodes set to true. + * + * @throws IOException if the stream cannot be read + * @throws IllegalStateException a number of runtime exceptions can be thrown, especially if there are problems with the + * input format + * @see #HSSFWorkbook(InputStream, boolean) + * @see #HSSFWorkbook(POIFSFileSystem) + * @see POIFSFileSystem + * @since 6.0.0 + */ + public HSSFWorkbook(InputStream s, char[] password) throws IOException { + this(s, true, password); + } + /** * Companion to HSSFWorkbook(POIFSFileSystem), this constructs the * POI filesystem around your {@link InputStream}. @@ -440,10 +507,31 @@ public final class HSSFWorkbook extends POIDocument implements Workbook { this(new POIFSFileSystem(s).getRoot(), preserveNodes); } + /** + * Companion to HSSFWorkbook(POIFSFileSystem), this constructs the + * POI filesystem around your {@link InputStream}. + * + * @param s the POI filesystem that contains the Workbook stream. + * @param preserveNodes whether to preserve other nodes, such as + * macros. This takes more memory, so only say yes if you + * need to. + * @param password in char array format (can be null) + * @throws IOException if the stream cannot be read + * @throws IllegalStateException a number of runtime exceptions can be thrown, especially if there are problems with the + * input format + * @see POIFSFileSystem + * @see #HSSFWorkbook(POIFSFileSystem) + * @since 6.0.0 + */ + @SuppressWarnings("resource") // POIFSFileSystem always closes the stream + public HSSFWorkbook(InputStream s, boolean preserveNodes, char[] password) + throws IOException { + this(new POIFSFileSystem(s).getRoot(), preserveNodes, password); + } + /** * used internally to set the workbook properties. */ - private void setPropertiesFromWorkbook(InternalWorkbook book) { this.workbook = book; @@ -1482,6 +1570,15 @@ public final class HSSFWorkbook extends POIDocument implements Workbook { } } + /** + * Set the password to be used to password protect the spreadsheet when we output the data. + * @param password as a char array (null is supported and means use @{link Biff8EncryptionKey} + * and no password if none set there) + * @since 6.0.0 + */ + public void setOutputPassword(final char[] password) { + this.outputPasswordChars = password; + } /** * Method getBytes - get the bytes of just the HSSF portions of the XLS file. @@ -1496,7 +1593,14 @@ public final class HSSFWorkbook extends POIDocument implements Workbook { HSSFSheet[] sheets = getSheets(); int nSheets = sheets.length; - updateEncryptionInfo(); + String pwdString; + if (outputPasswordChars != null) { + pwdString = new String(outputPasswordChars); + } else { + // from POI 6.0.0, using Biff8EncryptionKey is discouraged + pwdString = Biff8EncryptionKey.getCurrentUserPassword(); + } + updateEncryptionInfo(pwdString); // before getting the workbook size we must tell the sheets that // serialization is about to occur. @@ -2329,12 +2433,11 @@ public final class HSSFWorkbook extends POIDocument implements Workbook { } - private void updateEncryptionInfo() { + private void updateEncryptionInfo(String password) { // make sure, that we've read all the streams ... readProperties(); FilePassRecord fpr = (FilePassRecord) workbook.findFirstRecordBySid(FilePassRecord.sid); - String password = Biff8EncryptionKey.getCurrentUserPassword(); WorkbookRecordList wrl = workbook.getWorkbookRecordList(); if (password == null) { if (fpr != null) { diff --git a/poi/src/main/java/org/apache/poi/hssf/usermodel/HSSFWorkbookFactory.java b/poi/src/main/java/org/apache/poi/hssf/usermodel/HSSFWorkbookFactory.java index bda0dd6e8d..5dd5585130 100644 --- a/poi/src/main/java/org/apache/poi/hssf/usermodel/HSSFWorkbookFactory.java +++ b/poi/src/main/java/org/apache/poi/hssf/usermodel/HSSFWorkbookFactory.java @@ -21,7 +21,6 @@ import java.io.File; import java.io.IOException; import java.io.InputStream; -import org.apache.poi.hssf.record.crypto.Biff8EncryptionKey; import org.apache.poi.poifs.filesystem.DirectoryNode; import org.apache.poi.poifs.filesystem.FileMagic; import org.apache.poi.poifs.filesystem.POIFSFileSystem; @@ -70,18 +69,8 @@ public class HSSFWorkbookFactory implements WorkbookProvider { @SuppressWarnings("java:S2093") @Override public HSSFWorkbook create(final DirectoryNode root, String password) throws IOException { - boolean passwordSet = false; - if (password != null) { - Biff8EncryptionKey.setCurrentUserPassword(password); - passwordSet = true; - } - try { - return new HSSFWorkbook(root, true); - } finally { - if (passwordSet) { - Biff8EncryptionKey.setCurrentUserPassword(null); - } - } + final char[] passwordChars = password == null ? null : password.toCharArray(); + return new HSSFWorkbook(root, true, passwordChars); } @Override @@ -98,24 +87,14 @@ public class HSSFWorkbookFactory implements WorkbookProvider { @Override @SuppressWarnings({"java:S2095","java:S2093"}) public Workbook create(File file, String password, boolean readOnly) throws IOException { - boolean passwordSet = false; - if (password != null) { - Biff8EncryptionKey.setCurrentUserPassword(password); - passwordSet = true; - } + POIFSFileSystem fs = new POIFSFileSystem(file, readOnly); try { - POIFSFileSystem fs = new POIFSFileSystem(file, readOnly); - try { - return new HSSFWorkbook(fs, true); - } catch (RuntimeException e) { - // we need to close the filesystem if we encounter an exception to not leak file handles - fs.close(); - throw e; - } - } finally { - if (passwordSet) { - Biff8EncryptionKey.setCurrentUserPassword(null); - } + final char[] passwordChars = password == null ? null : password.toCharArray(); + return new HSSFWorkbook(fs, true, passwordChars); + } catch (RuntimeException e) { + // we need to close the filesystem if we encounter an exception to not leak file handles + fs.close(); + throw e; } } } diff --git a/poi/src/test/java/org/apache/poi/hssf/HSSFTestDataSamples.java b/poi/src/test/java/org/apache/poi/hssf/HSSFTestDataSamples.java index 32fdc0fc52..74b8277c89 100644 --- a/poi/src/test/java/org/apache/poi/hssf/HSSFTestDataSamples.java +++ b/poi/src/test/java/org/apache/poi/hssf/HSSFTestDataSamples.java @@ -49,6 +49,15 @@ public final class HSSFTestDataSamples { throw new RuntimeException(e); } } + + public static HSSFWorkbook openSampleWorkbook(String sampleFileName, char[] password) { + try (InputStream stream = _inst.openResourceAsStream(sampleFileName)){ + return new HSSFWorkbook(stream, password); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + /** * Writes a spreadsheet to a {@code ByteArrayOutputStream} and reads it back * from a {@code ByteArrayInputStream}.

diff --git a/poi/src/test/java/org/apache/poi/hssf/extractor/TestExcelExtractor.java b/poi/src/test/java/org/apache/poi/hssf/extractor/TestExcelExtractor.java index 8e27789f97..a9d3748997 100644 --- a/poi/src/test/java/org/apache/poi/hssf/extractor/TestExcelExtractor.java +++ b/poi/src/test/java/org/apache/poi/hssf/extractor/TestExcelExtractor.java @@ -46,6 +46,13 @@ final class TestExcelExtractor { return new ExcelExtractor(fs); } + private static ExcelExtractor createExtractor(String sampleFileName, String password) throws IOException { + File file = HSSFTestDataSamples.getSampleFile(sampleFileName); + POIFSFileSystem fs = new POIFSFileSystem(file); + final char[] passwordChars = password == null ? null : password.toCharArray(); + return new ExcelExtractor(fs, passwordChars); + } + @Test void testSimple() throws IOException { try (ExcelExtractor extractor = createExtractor("Simple.xls")) { @@ -335,6 +342,14 @@ final class TestExcelExtractor { @Test void testPassword() throws IOException { + try (ExcelExtractor extractor = createExtractor("password.xls", "password")) { + String text = extractor.getText(); + assertContains(text, "ZIP"); + } + } + + @Test + void testPasswordWithBiff8EncryptionKey() throws IOException { Biff8EncryptionKey.setCurrentUserPassword("password"); try (ExcelExtractor extractor = createExtractor("password.xls")) { String text = extractor.getText(); diff --git a/poi/src/test/java/org/apache/poi/hssf/usermodel/TestHSSFWorkbook.java b/poi/src/test/java/org/apache/poi/hssf/usermodel/TestHSSFWorkbook.java index 9d99627674..2b9452ccf4 100644 --- a/poi/src/test/java/org/apache/poi/hssf/usermodel/TestHSSFWorkbook.java +++ b/poi/src/test/java/org/apache/poi/hssf/usermodel/TestHSSFWorkbook.java @@ -1249,6 +1249,44 @@ public final class TestHSSFWorkbook extends BaseTestWorkbook { } } + @Test + void testPassword() throws Exception { + try (HSSFWorkbook wb = HSSFTestDataSamples.openSampleWorkbook( + "xor-encryption-abc.xls", "abc".toCharArray())) { + validateXorEncryptionDoc(wb); + try (UnsynchronizedByteArrayOutputStream baos = UnsynchronizedByteArrayOutputStream.builder().get()) { + // testing that when we write that no password is applied + wb.write(baos); + try (HSSFWorkbook wbOut = new HSSFWorkbook(baos.toInputStream())) { + validateXorEncryptionDoc(wbOut); + } + } + } + } + + @Test + void testChangePassword() throws Exception { + try (HSSFWorkbook wb = HSSFTestDataSamples.openSampleWorkbook( + "xor-encryption-abc.xls", "abc".toCharArray())) { + validateXorEncryptionDoc(wb); + String newPassword = "newPassword"; + try (UnsynchronizedByteArrayOutputStream baos = UnsynchronizedByteArrayOutputStream.builder().get()) { + // testing that when we write that the newPassword is applied + wb.setOutputPassword(newPassword.toCharArray()); + wb.write(baos); + try (HSSFWorkbook wbOut = new HSSFWorkbook(baos.toInputStream(), newPassword.toCharArray())) { + validateXorEncryptionDoc(wbOut); + } + } + } + } + + private void validateXorEncryptionDoc(HSSFWorkbook wb) { + HSSFSheet sheet = wb.getSheetAt(0); + double value = sheet.getRow(0).getCell(0).getNumericCellValue(); + assertEquals(1.0, value); + } + private static class WrappedStream extends FilterInputStream { private boolean closed;