diff --git a/poi-ooxml/src/main/java/org/apache/poi/xssf/usermodel/XSSFOptimiser.java b/poi-ooxml/src/main/java/org/apache/poi/xssf/usermodel/XSSFOptimiser.java new file mode 100755 index 0000000000..9d2ad208b6 --- /dev/null +++ b/poi-ooxml/src/main/java/org/apache/poi/xssf/usermodel/XSSFOptimiser.java @@ -0,0 +1,249 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.xssf.usermodel; + +import org.apache.poi.ss.usermodel.Cell; +import org.apache.poi.ss.usermodel.Color; +import org.apache.poi.ss.usermodel.Row; +import org.apache.poi.xssf.model.StylesTable; +import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTCellXfs; +import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTStylesheet; +import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTXf; + +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.Locale; +import java.util.Map; + +/** + * XSSF equivalent of HSSFOptimiser. + * + *

Provides methods to optimise cell styles and fonts in XSSF workbooks + * by remapping cells that use duplicate styles/fonts to use canonical + * (first) occurrences.

+ * + *

This helps avoid "style explosion" issues where Excel workbooks accumulate + * too many style definitions, causing performance problems or file corruption.

+ * + *

Usage:

+ *
+ *   XSSFWorkbook workbook = new XSSFWorkbook(file);
+ *   XSSFOptimiser.optimiseFonts(workbook);
+ *   XSSFOptimiser.optimiseCellStyles(workbook);
+ *   workbook.write(outputStream);
+ * 
+ * + *

Note: Unlike HSSFOptimiser which can remove unused styles from the binary + * format, this implementation remaps cells to canonical styles without removing + * entries from the XML. This is because XSSF's StylesTable maintains a separate + * Java list from the CTStylesheet XML, and removing entries can cause + * XmlValueDisconnectedException on save. The remapping approach prevents + * further style explosion and is safe.

+ * + * @see org.apache.poi.hssf.usermodel.HSSFOptimiser + * @since 6.0.0 + */ +public final class XSSFOptimiser { + + private XSSFOptimiser() { + // no instances of this class + } + + /** + * Goes through the Workbook, optimising the cell styles by remapping + * cells that use duplicate styles to use the canonical (first) occurrence. + * + *

Two styles are considered duplicates if they have identical properties: + * font properties (not just font index), data format, alignment, borders, + * fill colors, etc.

+ * + *

For best results, call {@link #optimiseFonts(XSSFWorkbook)} first.

+ * + * @param workbook The workbook to optimise + * @return The number of cells that were remapped to canonical styles + */ + public static int optimiseCellStyles(XSSFWorkbook workbook) { + int numStyles = workbook.getNumCellStyles(); + + if (numStyles <= 1) { + return 0; // Nothing to optimize + } + + // Step 1: Build signature map for duplicate detection + Map signatureToCanonical = new LinkedHashMap<>(); + Map oldToCanonical = new HashMap<>(); + + for (int i = 0; i < numStyles; i++) { + XSSFCellStyle style = workbook.getCellStyleAt(i); + String signature = getStyleSignature(style); + + if (signatureToCanonical.containsKey(signature)) { + // Duplicate found - map to earlier canonical style + oldToCanonical.put(i, signatureToCanonical.get(signature)); + } else { + // First occurrence - this is the canonical style + signatureToCanonical.put(signature, i); + oldToCanonical.put(i, i); + } + } + + // Step 2: Remap all cells to use canonical styles + int cellsRemapped = 0; + for (int sheetIdx = 0; sheetIdx < workbook.getNumberOfSheets(); sheetIdx++) { + XSSFSheet sheet = workbook.getSheetAt(sheetIdx); + for (Row row : sheet) { + if (row == null) continue; + for (Cell cell : row) { + if (cell == null) continue; + int currentIdx = cell.getCellStyle().getIndex(); + int canonicalIdx = oldToCanonical.getOrDefault(currentIdx, currentIdx); + + if (canonicalIdx != currentIdx) { + cell.setCellStyle(workbook.getCellStyleAt(canonicalIdx)); + cellsRemapped++; + } + } + } + } + + return cellsRemapped; + } + + /** + * Goes through the Workbook, optimising the fonts by updating cell styles + * that reference duplicate fonts to use the canonical (first) occurrence. + * + *

Two fonts are considered duplicates if they have identical properties: + * name, size, bold, italic, strikeout, underline, color, typeOffset, charset.

+ * + * @param workbook The workbook to optimise + * @return The number of duplicate fonts found + */ + public static int optimiseFonts(XSSFWorkbook workbook) { + int numFonts = workbook.getNumberOfFonts(); + + if (numFonts <= 1) { + return 0; // Nothing to optimize + } + + // Step 1: Build signature map for duplicate detection + Map signatureToCanonical = new LinkedHashMap<>(); + Map oldToCanonical = new HashMap<>(); + int duplicatesFound = 0; + + for (int i = 0; i < numFonts; i++) { + XSSFFont font = workbook.getFontAt(i); + String signature = getFontSignature(font); + + if (signatureToCanonical.containsKey(signature)) { + oldToCanonical.put(i, signatureToCanonical.get(signature)); + duplicatesFound++; + } else { + signatureToCanonical.put(signature, i); + oldToCanonical.put(i, i); + } + } + + if (duplicatesFound == 0) { + return 0; // No duplicates to remap + } + + // Step 2: Update font references in cell styles' CTXf entries + StylesTable stylesTable = workbook.getStylesSource(); + CTStylesheet ctStylesheet = stylesTable.getCTStylesheet(); + CTCellXfs ctXfs = ctStylesheet.getCellXfs(); + + if (ctXfs != null) { + for (int i = 0; i < ctXfs.sizeOfXfArray(); i++) { + CTXf xf = ctXfs.getXfArray(i); + if (xf.isSetFontId()) { + int oldFontId = (int) xf.getFontId(); + int canonicalFontId = oldToCanonical.getOrDefault(oldFontId, oldFontId); + if (canonicalFontId != oldFontId) { + xf.setFontId(canonicalFontId); + } + } + } + } + + return duplicatesFound; + } + + /** + * Generate a signature string for a cell style to detect duplicates. + * Uses font PROPERTIES (not index) so styles with identical but separate fonts + * are detected as duplicates. + */ + private static String getStyleSignature(XSSFCellStyle style) { + StringBuilder sb = new StringBuilder(); + // Use font properties, not index, so identical fonts are detected + XSSFFont font = style.getFont(); + sb.append("font:").append(getFontSignature(font)).append(";"); + sb.append("fmt:").append(style.getDataFormat()).append(";"); + sb.append("align:").append(style.getAlignment()).append(";"); + sb.append("valign:").append(style.getVerticalAlignment()).append(";"); + sb.append("wrap:").append(style.getWrapText()).append(";"); + sb.append("rotation:").append(style.getRotation()).append(";"); + sb.append("indent:").append(style.getIndention()).append(";"); + sb.append("borderL:").append(style.getBorderLeft()).append(";"); + sb.append("borderR:").append(style.getBorderRight()).append(";"); + sb.append("borderT:").append(style.getBorderTop()).append(";"); + sb.append("borderB:").append(style.getBorderBottom()).append(";"); + sb.append("fillFg:").append(colorToString(style.getFillForegroundColorColor())).append(";"); + sb.append("fillBg:").append(colorToString(style.getFillBackgroundColorColor())).append(";"); + sb.append("fillPat:").append(style.getFillPattern()).append(";"); + sb.append("locked:").append(style.getLocked()).append(";"); + sb.append("hidden:").append(style.getHidden()).append(";"); + return sb.toString(); + } + + /** + * Generate a signature string for a font to detect duplicates. + */ + private static String getFontSignature(XSSFFont font) { + StringBuilder sb = new StringBuilder(); + sb.append("name:").append(font.getFontName()).append(";"); + sb.append("size:").append(font.getFontHeightInPoints()).append(";"); + sb.append("bold:").append(font.getBold()).append(";"); + sb.append("italic:").append(font.getItalic()).append(";"); + sb.append("strike:").append(font.getStrikeout()).append(";"); + sb.append("underline:").append(font.getUnderline()).append(";"); + sb.append("color:").append(colorToString(font.getXSSFColor())).append(";"); + sb.append("typeOffset:").append(font.getTypeOffset()).append(";"); + sb.append("charset:").append(font.getCharSet()).append(";"); + return sb.toString(); + } + + /** + * Convert a color to a string for comparison. + */ + private static String colorToString(Color color) { + if (color == null) { + return "null"; + } + if (color instanceof XSSFColor) { + XSSFColor xssfColor = (XSSFColor) color; + byte[] rgb = xssfColor.getRGB(); + if (rgb != null) { + return String.format(Locale.ROOT, "#%02X%02X%02X", rgb[0] & 0xFF, rgb[1] & 0xFF, rgb[2] & 0xFF); + } + return "indexed:" + xssfColor.getIndexed(); + } + return color.toString(); + } +} diff --git a/poi-ooxml/src/test/java/org/apache/poi/xssf/usermodel/TestXSSFOptimiser.java b/poi-ooxml/src/test/java/org/apache/poi/xssf/usermodel/TestXSSFOptimiser.java new file mode 100755 index 0000000000..8587cffbe3 --- /dev/null +++ b/poi-ooxml/src/test/java/org/apache/poi/xssf/usermodel/TestXSSFOptimiser.java @@ -0,0 +1,305 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.xssf.usermodel; + +import static org.junit.jupiter.api.Assertions.*; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; + +import org.apache.poi.ss.usermodel.Cell; +import org.apache.poi.ss.usermodel.CellStyle; +import org.apache.poi.ss.usermodel.CellType; +import org.apache.poi.ss.usermodel.DataFormat; +import org.apache.poi.ss.usermodel.Font; +import org.apache.poi.ss.usermodel.HorizontalAlignment; +import org.apache.poi.ss.usermodel.Row; +import org.junit.jupiter.api.Test; + +/** + * Tests for {@link XSSFOptimiser}. + */ +class TestXSSFOptimiser { + + /** + * Test that optimiseCellStyles remaps cells with duplicate styles to canonical style. + */ + @Test + void testOptimiseCellStylesRemapsDuplicates() throws Exception { + try (XSSFWorkbook workbook = new XSSFWorkbook()) { + XSSFSheet sheet = workbook.createSheet("Test"); + + // Create 10 cells, each with a "new" style that is actually identical + for (int i = 0; i < 10; i++) { + Row row = sheet.createRow(i); + Cell cell = row.createCell(0); + cell.setCellValue("Value " + i); + + // Create a "new" style for each cell (causes style explosion) + CellStyle style = workbook.createCellStyle(); + Font font = workbook.createFont(); + font.setFontName("Calibri"); + font.setFontHeightInPoints((short) 11); + font.setBold(false); + style.setFont(font); + cell.setCellStyle(style); + } + + int stylesBeforeOptimize = workbook.getNumCellStyles(); + assertTrue(stylesBeforeOptimize > 5, + "Should have created multiple styles before optimization, got: " + stylesBeforeOptimize); + + // Optimize - returns number of cells remapped + int cellsRemapped = XSSFOptimiser.optimiseCellStyles(workbook); + assertTrue(cellsRemapped > 0, + "Should have remapped cells to canonical styles, got: " + cellsRemapped); + + // Verify all cells now reference the same canonical style index + int canonicalStyleIdx = sheet.getRow(0).getCell(0).getCellStyle().getIndex(); + for (int i = 1; i < 10; i++) { + Cell cell = sheet.getRow(i).getCell(0); + assertEquals(canonicalStyleIdx, cell.getCellStyle().getIndex(), + "Cell " + i + " should use canonical style index"); + } + + // Verify cells still have correct values + for (int i = 0; i < 10; i++) { + Cell cell = sheet.getRow(i).getCell(0); + assertEquals("Value " + i, cell.getStringCellValue()); + } + } + } + + /** + * Test that optimiseCellStyles handles unused styles gracefully. + */ + @Test + void testOptimiseCellStylesHandlesUnusedStyles() throws Exception { + try (XSSFWorkbook workbook = new XSSFWorkbook()) { + XSSFSheet sheet = workbook.createSheet("Test"); + + // Create a cell with one style + Row row = sheet.createRow(0); + Cell cell = row.createCell(0); + cell.setCellValue("Used style"); + CellStyle usedStyle = workbook.createCellStyle(); + usedStyle.setAlignment(HorizontalAlignment.CENTER); + cell.setCellStyle(usedStyle); + + // Create several unused styles + for (int i = 0; i < 5; i++) { + CellStyle unusedStyle = workbook.createCellStyle(); + unusedStyle.setAlignment(HorizontalAlignment.LEFT); + // Never apply this style to any cell + } + + int stylesBeforeOptimize = workbook.getNumCellStyles(); + assertTrue(stylesBeforeOptimize >= 6, + "Should have at least 6 styles (1 default + 1 used + 5 unused), got: " + stylesBeforeOptimize); + + // Optimize - should not throw, returns 0 (no duplicates to remap) + int cellsRemapped = XSSFOptimiser.optimiseCellStyles(workbook); + assertEquals(0, cellsRemapped, "No duplicate styles to remap"); + + // Verify the used cell still has correct alignment + CellStyle cellStyle = cell.getCellStyle(); + assertEquals(HorizontalAlignment.CENTER, cellStyle.getAlignment(), + "Cell should still have CENTER alignment after optimization"); + } + } + + /** + * Test that optimiseCellStyles preserves different styles. + */ + @Test + void testOptimiseCellStylesPreservesDifferentStyles() throws Exception { + try (XSSFWorkbook workbook = new XSSFWorkbook()) { + XSSFSheet sheet = workbook.createSheet("Test"); + + // Cell with bold style + Row row1 = sheet.createRow(0); + Cell cell1 = row1.createCell(0); + cell1.setCellValue("Bold"); + CellStyle boldStyle = workbook.createCellStyle(); + Font boldFont = workbook.createFont(); + boldFont.setBold(true); + boldStyle.setFont(boldFont); + cell1.setCellStyle(boldStyle); + + // Cell with italic style + Row row2 = sheet.createRow(1); + Cell cell2 = row2.createCell(0); + cell2.setCellValue("Italic"); + CellStyle italicStyle = workbook.createCellStyle(); + Font italicFont = workbook.createFont(); + italicFont.setItalic(true); + italicStyle.setFont(italicFont); + cell2.setCellStyle(italicStyle); + + // Cell with number format + Row row3 = sheet.createRow(2); + Cell cell3 = row3.createCell(0); + cell3.setCellValue(1234.56); + CellStyle numStyle = workbook.createCellStyle(); + DataFormat format = workbook.createDataFormat(); + numStyle.setDataFormat(format.getFormat("#,##0.00")); + cell3.setCellStyle(numStyle); + + // Optimize + XSSFOptimiser.optimiseCellStyles(workbook); + + // Verify each cell retains its distinct formatting + Font font1 = workbook.getFontAt(cell1.getCellStyle().getFontIndex()); + assertTrue(font1.getBold(), "Cell 1 should still be bold"); + + Font font2 = workbook.getFontAt(cell2.getCellStyle().getFontIndex()); + assertTrue(font2.getItalic(), "Cell 2 should still be italic"); + + String numFormat = cell3.getCellStyle().getDataFormatString(); + assertTrue(numFormat.contains("#,##0"), + "Cell 3 should still have number format, got: " + numFormat); + } + } + + /** + * Test that optimiseFonts remaps style font references to canonical fonts. + */ + @Test + void testOptimiseFontsRemapsDuplicates() throws Exception { + try (XSSFWorkbook workbook = new XSSFWorkbook()) { + XSSFSheet sheet = workbook.createSheet("Test"); + + // Create cells with duplicate fonts + for (int i = 0; i < 10; i++) { + Row row = sheet.createRow(i); + Cell cell = row.createCell(0); + cell.setCellValue("Value " + i); + + // Create a "new" font for each cell (identical fonts) + CellStyle style = workbook.createCellStyle(); + Font font = workbook.createFont(); + font.setFontName("Arial"); + font.setFontHeightInPoints((short) 12); + font.setBold(true); + style.setFont(font); + cell.setCellStyle(style); + } + + int fontsBeforeOptimize = workbook.getNumberOfFonts(); + assertTrue(fontsBeforeOptimize > 5, + "Should have created multiple fonts before optimization, got: " + fontsBeforeOptimize); + + // Optimize fonts - returns number of duplicates found + int duplicatesFound = XSSFOptimiser.optimiseFonts(workbook); + assertTrue(duplicatesFound > 0, + "Should have found duplicate fonts, got: " + duplicatesFound); + + // Verify cells still have bold Arial font (formatting preserved) + for (int i = 0; i < 10; i++) { + Cell cell = sheet.getRow(i).getCell(0); + Font font = workbook.getFontAt(cell.getCellStyle().getFontIndex()); + assertEquals("Arial", font.getFontName(), "Font name should be preserved"); + assertTrue(font.getBold(), "Font should still be bold"); + } + } + } + + /** + * Test that workbook is saveable after optimization. + */ + @Test + void testWorkbookSaveableAfterOptimization() throws Exception { + try (XSSFWorkbook workbook = new XSSFWorkbook()) { + XSSFSheet sheet = workbook.createSheet("Test"); + + // Create style explosion + for (int i = 0; i < 20; i++) { + Row row = sheet.createRow(i); + Cell cell = row.createCell(0); + cell.setCellValue("Value " + i); + + CellStyle style = workbook.createCellStyle(); + Font font = workbook.createFont(); + font.setFontName("Calibri"); + style.setFont(font); + cell.setCellStyle(style); + } + + // Optimize + XSSFOptimiser.optimiseCellStyles(workbook); + XSSFOptimiser.optimiseFonts(workbook); + + // Save to byte array (should not throw XmlValueDisconnectedException) + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + workbook.write(baos); + byte[] savedBytes = baos.toByteArray(); + + assertTrue(savedBytes.length > 0, "Saved workbook should have content"); + + // Verify we can read it back + try (XSSFWorkbook reloaded = new XSSFWorkbook(new ByteArrayInputStream(savedBytes))) { + XSSFSheet reloadedSheet = reloaded.getSheetAt(0); + assertEquals("Value 0", reloadedSheet.getRow(0).getCell(0).getStringCellValue()); + assertEquals("Value 19", reloadedSheet.getRow(19).getCell(0).getStringCellValue()); + } + } + } + + /** + * Test that optimization preserves formulas. + */ + @Test + void testOptimizationPreservesFormulas() throws Exception { + try (XSSFWorkbook workbook = new XSSFWorkbook()) { + XSSFSheet sheet = workbook.createSheet("Test"); + + // Create cells with values + for (int i = 0; i < 5; i++) { + Row row = sheet.createRow(i); + Cell cell = row.createCell(0); + cell.setCellValue(i + 1); + + CellStyle style = workbook.createCellStyle(); + style.setAlignment(HorizontalAlignment.RIGHT); + cell.setCellStyle(style); + } + + // Create formula cell + Row sumRow = sheet.createRow(5); + Cell sumCell = sumRow.createCell(0); + sumCell.setCellFormula("SUM(A1:A5)"); + CellStyle sumStyle = workbook.createCellStyle(); + Font boldFont = workbook.createFont(); + boldFont.setBold(true); + sumStyle.setFont(boldFont); + sumCell.setCellStyle(sumStyle); + + // Optimize + XSSFOptimiser.optimiseCellStyles(workbook); + + // Verify formula is preserved + Cell formulaCell = sheet.getRow(5).getCell(0); + assertEquals(CellType.FORMULA, formulaCell.getCellType()); + assertEquals("SUM(A1:A5)", formulaCell.getCellFormula()); + + // Verify bold style is preserved on formula cell + Font font = workbook.getFontAt(formulaCell.getCellStyle().getFontIndex()); + assertTrue(font.getBold(), "Formula cell should still be bold"); + } + } +}