diff --git a/poi-ooxml/src/main/java/org/apache/poi/xssf/binary/XSSFBSheetHandler.java b/poi-ooxml/src/main/java/org/apache/poi/xssf/binary/XSSFBSheetHandler.java index 7a24234046..24676ac7b9 100644 --- a/poi-ooxml/src/main/java/org/apache/poi/xssf/binary/XSSFBSheetHandler.java +++ b/poi-ooxml/src/main/java/org/apache/poi/xssf/binary/XSSFBSheetHandler.java @@ -23,6 +23,8 @@ import java.util.Queue; import org.apache.poi.ss.usermodel.BuiltinFormats; import org.apache.poi.ss.usermodel.DataFormatter; +import org.apache.poi.ss.usermodel.ExcelNumberFormat; +import org.apache.poi.ss.usermodel.FormulaError; import org.apache.poi.ss.usermodel.RichTextString; import org.apache.poi.ss.util.CellAddress; import org.apache.poi.util.Internal; @@ -41,10 +43,9 @@ public class XSSFBSheetHandler extends XSSFBParser { private static final int CHECK_ALL_ROWS = -1; private final SharedStrings stringsTable; - private final XSSFSheetXMLHandler.SheetContentsHandler handler; + private final XSSFBSheetContentsHandler handler; private final XSSFBStylesTable styles; private final XSSFBCommentsTable comments; - private final DataFormatter dataFormatter; private final boolean formulasNotResults;//TODO: implement this private int lastEndedRow = -1; @@ -55,6 +56,51 @@ public class XSSFBSheetHandler extends XSSFBParser { private StringBuilder xlWideStringBuffer = new StringBuilder(); private final XSSFBCellHeader cellBuffer = new XSSFBCellHeader(); + + /** + * Creates a handler that forwards native POI cell types to the supplied {@link + * XSSFBSheetContentsHandler}. + * + *
Select this overload when the consumer expects the raw cell representation rather than + * formatted strings. + * + * @param is XLSB worksheet stream to parse + * @param styles table providing cell style and number format metadata + * @param comments optional comments table, may be {@code null} + * @param strings shared strings table used by the sheet + * @param sheetContentsHandler callback receiving native cell events + * @since POI 5.5.0 + */ + public XSSFBSheetHandler(InputStream is, + XSSFBStylesTable styles, + XSSFBCommentsTable comments, + SharedStrings strings, + XSSFBSheetContentsHandler sheetContentsHandler, + boolean formulasNotResults) { + super(is); + this.styles = styles; + this.comments = comments; + this.stringsTable = strings; + this.handler = sheetContentsHandler; + this.formulasNotResults = formulasNotResults; + } + + /** + * Creates a handler that converts numeric and date cells to formatted strings via {@link + * DataFormatter}. + * + *
Select this overload when the consumer expects formatted string values rather than raw + * cell representations. + * + * @param is XLSB worksheet stream to parse + * @param styles table providing cell style and number format metadata + * @param comments optional comments table, may be {@code null} + * @param strings shared strings table used by the sheet + * @param sheetContentsHandler callback receiving formatted string values + * @param dataFormatter formatter applied to numeric and date cells + * @see #XSSFBSheetHandler(InputStream, XSSFBStylesTable, XSSFBCommentsTable, SharedStrings, + * XSSFBSheetContentsHandler, boolean) + */ public XSSFBSheetHandler(InputStream is, XSSFBStylesTable styles, XSSFBCommentsTable comments, @@ -66,11 +112,18 @@ public class XSSFBSheetHandler extends XSSFBParser { this.styles = styles; this.comments = comments; this.stringsTable = strings; - this.handler = sheetContentsHandler; - this.dataFormatter = dataFormatter; + this.handler = new XSSFBSheetContentsHandlerWrapper(sheetContentsHandler, dataFormatter); this.formulasNotResults = formulasNotResults; } + /** + * Dispatches a parsed XLSB record to the appropriate specialised handler. + * + * @param id numeric record identifier supplied by {@link XSSFBParser} + * @param data raw record payload + * @throws XSSFBParseException if the record cannot be processed according to the XLSB spec + * @see XSSFBRecordType + */ @Override public void handleRecord(int id, byte[] data) throws XSSFBParseException { XSSFBRecordType type = XSSFBRecordType.lookup(id); @@ -133,86 +186,117 @@ public class XSSFBSheetHandler extends XSSFBParser { checkMissedComments(currentRow, cellBuffer.getColNum()); } - private void handleCellValue(String formattedValue) { - CellAddress cellAddress = new CellAddress(currentRow, cellBuffer.getColNum()); + private void handleStringCellValue(String val) { + CellAddress cellAddress = getCellAddress(); + XSSFBComment comment = getCellComment(cellAddress); + handler.stringCell(cellAddress.formatAsString(), val, comment); + } + + private void handleDoubleCellValue(double val) { + CellAddress cellAddress = getCellAddress(); + XSSFBComment comment = getCellComment(cellAddress); + ExcelNumberFormat nf = getExcelNumberFormat(); + handler.doubleCell(cellAddress.formatAsString(), val, comment, nf); + } + + private void handleErrorCellValue(int val) { + FormulaError fe; + try { + fe = FormulaError.forInt(val); + } catch (IllegalArgumentException e) { + fe = null; + } + CellAddress cellAddress = getCellAddress(); + XSSFBComment comment = getCellComment(cellAddress); + handler.errorCell(cellAddress.formatAsString(), fe, comment); + } + + private CellAddress getCellAddress() { + return new CellAddress(currentRow, cellBuffer.getColNum()); + } + + private XSSFBComment getCellComment(CellAddress cellAddress) { XSSFBComment comment = null; if (comments != null) { comment = comments.get(cellAddress); } - handler.cell(cellAddress.formatAsString(), formattedValue, comment); + return comment; + } + + private ExcelNumberFormat getExcelNumberFormat() { + int styleIdx = cellBuffer.getStyleIdx(); + String formatString = styles.getNumberFormatString(styleIdx); + short styleIndex = styles.getNumberFormatIndex(styleIdx); + // for now, if formatString is null, silently punt + // and use "General". Not the best behavior, + // but we're doing it now in the streaming and non-streaming + // extractors for xlsx. See BUG-61053 + if (formatString == null) { + formatString = BuiltinFormats.getBuiltinFormat(0); + styleIndex = 0; + } + return new ExcelNumberFormat(styleIndex, formatString); } private void handleFmlaNum(byte[] data) { beforeCellValue(data); //xNum double val = LittleEndian.getDouble(data, XSSFBCellHeader.length); - handleCellValue(formatVal(val, cellBuffer.getStyleIdx())); + handleDoubleCellValue(val); } private void handleCellSt(byte[] data) { beforeCellValue(data); xlWideStringBuffer.setLength(0); XSSFBUtils.readXLWideString(data, XSSFBCellHeader.length, xlWideStringBuffer); - handleCellValue(xlWideStringBuffer.toString()); + handleStringCellValue(xlWideStringBuffer.toString()); } private void handleFmlaString(byte[] data) { beforeCellValue(data); xlWideStringBuffer.setLength(0); XSSFBUtils.readXLWideString(data, XSSFBCellHeader.length, xlWideStringBuffer); - handleCellValue(xlWideStringBuffer.toString()); + handleStringCellValue(xlWideStringBuffer.toString()); } private void handleCellError(byte[] data) { beforeCellValue(data); - //TODO, read byte to figure out the type of error - handleCellValue("ERROR"); + int val = data[XSSFBCellHeader.length] & 0xFF; + handleErrorCellValue(val); } private void handleFmlaError(byte[] data) { beforeCellValue(data); - //TODO, read byte to figure out the type of error - handleCellValue("ERROR"); + int val = data[XSSFBCellHeader.length] & 0xFF; + handleErrorCellValue(val); } private void handleBoolean(byte[] data) { beforeCellValue(data); - String formattedVal = (data[XSSFBCellHeader.length] == 1) ? "TRUE" : "FALSE"; - handleCellValue(formattedVal); + boolean val = data[XSSFBCellHeader.length] == 1; + CellAddress cellAddress = getCellAddress(); + XSSFBComment comment = getCellComment(cellAddress); + handler.booleanCell(cellAddress.formatAsString(), val, comment); } private void handleCellReal(byte[] data) { beforeCellValue(data); //xNum double val = LittleEndian.getDouble(data, XSSFBCellHeader.length); - handleCellValue(formatVal(val, cellBuffer.getStyleIdx())); + handleDoubleCellValue(val); } private void handleCellRk(byte[] data) { beforeCellValue(data); double val = rkNumber(data, XSSFBCellHeader.length); - handleCellValue(formatVal(val, cellBuffer.getStyleIdx())); - } - - private String formatVal(double val, int styleIdx) { - String formatString = styles.getNumberFormatString(styleIdx); - short styleIndex = styles.getNumberFormatIndex(styleIdx); - //for now, if formatString is null, silently punt - //and use "General". Not the best behavior, - //but we're doing it now in the streaming and non-streaming - //extractors for xlsx. See BUG-61053 - if (formatString == null) { - formatString = BuiltinFormats.getBuiltinFormat(0); - styleIndex = 0; - } - return dataFormatter.formatRawCellContents(val, styleIndex, formatString); + handleDoubleCellValue(val); } private void handleBrtCellIsst(byte[] data) { beforeCellValue(data); int idx = XSSFBUtils.castToInt(LittleEndian.getUInt(data, XSSFBCellHeader.length)); RichTextString rtss = stringsTable.getItemAt(idx); - handleCellValue(rtss.getString()); + handleStringCellValue(rtss.getString()); } @@ -300,7 +384,7 @@ public class XSSFBSheetHandler extends XSSFBParser { } private void dumpEmptyCellComment(CellAddress cellAddress, XSSFBComment comment) { - handler.cell(cellAddress.formatAsString(), null, comment); + handler.stringCell(cellAddress.formatAsString(), null, comment); } private double rkNumber(byte[] data, int offset) { @@ -326,6 +410,174 @@ public class XSSFBSheetHandler extends XSSFBParser { return d; } + /** + * Receives streaming callbacks while {@link XSSFBSheetHandler} parses an XLSB sheet. + * + * @see XSSFBSheetHandler + * @since POI 5.5.0 + */ + public interface XSSFBSheetContentsHandler { + /** + * Signals that a row has started before any of its cells are delivered. + * + * @param rowNum zero-based row index + * @see #endRow(int) + */ + void startRow(int rowNum); + + /** + * Signals that a row has ended after all of its cells and comments were processed. + * + * @param rowNum zero-based row index + * @see #startRow(int) + */ + void endRow(int rowNum); + + /** + * Handles a cell that resolves to a string value, possibly representing a comment-only cell. + * + * @param cellReference A1-style cell address + * @param value string contents, or {@code null} if only a comment is present + * @param comment associated comment, or {@code null} if absent + *
Sheets that have missing or empty cells may result in sparse calls to cell
+ * . See the code in
+ * poi-examples/src/main/java/org/apache/poi/xssf/eventusermodel/XLSX2CSV.java for an
+ * example of how to handle this scenario.
+ * @see #doubleCell(String, double, XSSFComment, ExcelNumberFormat)
+ */
+ void stringCell(String cellReference, String value, XSSFComment comment);
+
+ /**
+ * Handles a numeric cell while providing the corresponding {@link ExcelNumberFormat}.
+ *
+ * @param cellReference A1-style cell address
+ * @param value numeric value extracted from the sheet
+ * @param comment associated comment, or {@code null} if absent
+ * @param nf number format describing how the value should be rendered
+ *
Sheets that have missing or empty cells may result in sparse calls to cell
+ * . See the code in
+ * poi-examples/src/main/java/org/apache/poi/xssf/eventusermodel/XLSX2CSV.java for an
+ * example of how to handle this scenario.
+ * @see #stringCell(String, String, XSSFComment)
+ */
+ void doubleCell(String cellReference, double value, XSSFComment comment, ExcelNumberFormat nf);
+
+ /**
+ * Handles a boolean cell.
+ *
+ * @param cellReference A1-style cell address
+ * @param value boolean value stored in the cell
+ * @param comment associated comment, or {@code null} if absent
+ *
Sheets that have missing or empty cells may result in sparse calls to cell
+ * . See the code in
+ * poi-examples/src/main/java/org/apache/poi/xssf/eventusermodel/XLSX2CSV.java for an
+ * example of how to handle this scenario.
+ * @see #stringCell(String, String, XSSFComment)
+ */
+ void booleanCell(String cellReference, boolean value, XSSFComment comment);
+
+ /**
+ * Handles a cell that evaluates to an error.
+ *
+ * @param cellReference A1-style cell address
+ * @param fe mapped {@link FormulaError}, or {@code null} when the error code is unknown
+ * @param comment associated comment, or {@code null} if absent
+ *
Sheets that have missing or empty cells may result in sparse calls to cell
+ * . See the code in
+ * poi-examples/src/main/java/org/apache/poi/xssf/eventusermodel/XLSX2CSV.java for an
+ * example of how to handle this scenario.
+ * @see FormulaError
+ */
+ void errorCell(String cellReference, FormulaError fe, XSSFComment comment);
+
+ /**
+ * Receives header or footer text encountered in the sheet.
+ *
+ * @param text resolved header or footer text
+ * @param isHeader {@code true} when the text belongs to a header, otherwise {@code false}
+ * @param tagName POI-internal tag representing the header or footer section
+ * @see #endSheet()
+ */
+ void headerFooter(String text, boolean isHeader, String tagName);
+
+ /**
+ * Signals that the sheet has been completely processed.
+ *
+ * @see #startRow(int)
+ */
+ void endSheet();
+ }
+
+ /**
+ * Bridges a {@link XSSFSheetXMLHandler.SheetContentsHandler} to the {@link
+ * XSSFBSheetContentsHandler} contract.
+ *
+ * @see XSSFSheetXMLHandler
+ */
+ private final class XSSFBSheetContentsHandlerWrapper implements XSSFBSheetContentsHandler {
+ private final XSSFSheetXMLHandler.SheetContentsHandler delegate;
+ private final DataFormatter dataFormatter;
+
+ /**
+ * Creates a wrapper that forwards events to the XML sheet handler while formatting numeric
+ * cells.
+ *
+ * @param delegate target handler compatible with the XML streaming API
+ * @param dataFormatter formatter used for numeric and date cell rendering
+ */
+ XSSFBSheetContentsHandlerWrapper(
+ XSSFSheetXMLHandler.SheetContentsHandler delegate, DataFormatter dataFormatter) {
+ this.delegate = delegate;
+ this.dataFormatter = dataFormatter;
+ }
+
+ @Override
+ public void startRow(int rowNum) {
+ delegate.startRow(rowNum);
+ }
+
+ @Override
+ public void endRow(int rowNum) {
+ delegate.endRow(rowNum);
+ }
+
+ @Override
+ public void stringCell(String cellReference, String value, XSSFComment comment) {
+ delegate.cell(cellReference, value, comment);
+ }
+
+ @Override
+ public void doubleCell(
+ String cellReference, double value, XSSFComment comment, ExcelNumberFormat nf) {
+ String formattedValue =
+ dataFormatter.formatRawCellContents(value, nf.getIdx(), nf.getFormat());
+ delegate.cell(cellReference, formattedValue, comment);
+ }
+
+ @Override
+ public void booleanCell(String cellReference, boolean value, XSSFComment comment) {
+ delegate.cell(cellReference, Boolean.toString(value), comment);
+ }
+
+ @Override
+ public void errorCell(String cellReference, FormulaError fe, XSSFComment comment) {
+ // For backward compatibility, we pass "ERROR" as the cell value.
+ // If you need the actual error code, you should implement
+ // XSSFBSheetContentsHandler directly
+ delegate.cell(cellReference, "ERROR", comment);
+ }
+
+ @Override
+ public void headerFooter(String text, boolean isHeader, String tagName) {
+ delegate.headerFooter(text, isHeader, tagName);
+ }
+
+ @Override
+ public void endSheet() {
+ delegate.endSheet();
+ }
+ }
+
/**
* You need to implement this to handle the results
* of the sheet parsing.
diff --git a/poi-ooxml/src/test/java/org/apache/poi/xssf/eventusermodel/TestXSSFBReader.java b/poi-ooxml/src/test/java/org/apache/poi/xssf/eventusermodel/TestXSSFBReader.java
index 3fa96264f0..9ea4652f3e 100644
--- a/poi-ooxml/src/test/java/org/apache/poi/xssf/eventusermodel/TestXSSFBReader.java
+++ b/poi-ooxml/src/test/java/org/apache/poi/xssf/eventusermodel/TestXSSFBReader.java
@@ -17,10 +17,6 @@
package org.apache.poi.xssf.eventusermodel;
-import static org.apache.poi.POITestCase.assertContains;
-import static org.junit.jupiter.api.Assertions.assertEquals;
-import static org.junit.jupiter.api.Assertions.assertNotNull;
-
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
@@ -28,11 +24,29 @@ import java.util.List;
import org.apache.poi.POIDataSamples;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.ss.usermodel.DataFormatter;
+import org.apache.poi.ss.usermodel.ExcelNumberFormat;
+import org.apache.poi.ss.usermodel.FormulaError;
import org.apache.poi.xssf.binary.XSSFBSharedStringsTable;
import org.apache.poi.xssf.binary.XSSFBSheetHandler;
import org.apache.poi.xssf.binary.XSSFBStylesTable;
import org.apache.poi.xssf.usermodel.XSSFComment;
import org.junit.jupiter.api.Test;
+import org.mockito.ArgumentCaptor;
+import org.mockito.ArgumentMatcher;
+import org.mockito.InOrder;
+import org.mockito.quality.Strictness;
+
+import static org.apache.poi.POITestCase.assertContains;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.mockito.ArgumentMatchers.any;
+import static org.mockito.ArgumentMatchers.argThat;
+import static org.mockito.ArgumentMatchers.eq;
+import static org.mockito.ArgumentMatchers.isNull;
+import static org.mockito.ArgumentMatchers.notNull;
+import static org.mockito.Mockito.inOrder;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.withSettings;
class TestXSSFBReader {
@@ -216,4 +230,234 @@ class TestXSSFBReader {
return sb.toString();
}
}
+
+ private static XSSFBSheetHandler.XSSFBSheetContentsHandler mockSheetContentsHandler() {
+ return mock(
+ XSSFBSheetHandler.XSSFBSheetContentsHandler.class,
+ withSettings().strictness(Strictness.STRICT_STUBS));
+ }
+
+ private static ArgumentMatcher