Optimize handling of large XSSFTables

The current implementation calls updateHeaders() very often
causing cloning a sheet to take very long with high CPU.

We can optimize a number of things here:
* Use getTableColumnArray() as the List-based methods
have very costly implementations of hasNext()/next()
* Avoid some duplicated calls to updateHeaders()

There are likely more aggressive optimizations like
only calling updateHeaders() once after cloning,
but this would require more invasive changes in this
rarely used code-area.
This commit is contained in:
Dominik Stadler 2026-01-11 18:10:46 +01:00
parent 1fc4f9e3b7
commit d93a29ae67
2 changed files with 33 additions and 8 deletions

View File

@ -211,7 +211,9 @@ public class XSSFTable extends POIXMLDocumentPart implements Table {
List<XSSFTableColumn> columns = new ArrayList<>();
CTTableColumns ctTableColumns = ctTable.getTableColumns();
if (ctTableColumns != null) {
for (CTTableColumn column : ctTableColumns.getTableColumnList()) {
// Use Array and not List-based access, as list-iteration is
// very slow for large tables
for (CTTableColumn column : ctTableColumns.getTableColumnArray()) {
XSSFTableColumn tableColumn = new XSSFTableColumn(this, column);
columns.add(tableColumn);
}
@ -308,11 +310,12 @@ public class XSSFTable extends POIXMLDocumentPart implements Table {
tableStart.getCol() + newColumnCount - 1);
AreaReference newTableArea = new AreaReference(tableStart, newTableEnd, version);
// setCellRef also calls updateHeaders()
setCellRef(newTableArea);
} else {
updateHeaders();
}
updateHeaders();
return getColumns().get(columnIndex);
}
@ -353,7 +356,9 @@ public class XSSFTable extends POIXMLDocumentPart implements Table {
CTTableColumns tableColumns = ctTable.getTableColumns();
tableColumns.removeTableColumn(columnIndex);
tableColumns.setCount(tableColumns.getTableColumnList().size());
// Use Array and not List-based access, as list-iteration is
// very slow for large tables
tableColumns.setCount(tableColumns.getTableColumnArray().length);
updateReferences();
updateHeaders();
}
@ -406,7 +411,7 @@ public class XSSFTable extends POIXMLDocumentPart implements Table {
try {
ctTable.getTableStyleInfo().unsetName();
} catch (Exception e) {
LOG.atDebug().log("Failed to unset style name", e);
LOG.atDebug().log("Failed to unset style name: {}", e);
}
}
styleName = null;
@ -821,13 +826,19 @@ public class XSSFTable extends POIXMLDocumentPart implements Table {
int cellnum = firstHeaderColumn;
CTTableColumns ctTableColumns = getCTTable().getTableColumns();
if(ctTableColumns != null) {
for (CTTableColumn col : ctTableColumns.getTableColumnList()) {
// Use Array and not List-based access, as list-iteration is
// very slow for large tables
for (CTTableColumn col : ctTableColumns.getTableColumnArray()) {
XSSFCell cell = row.getCell(cellnum);
if (cell != null) {
String colName = formatter.formatCellValue(cell);
colName = colName.replace("\n", "_x000a_");
colName = colName.replace("\r", "_x000d_");
col.setName(colName);
// setName() is costly, let's only run it when necessary
if (!colName.equals(col.getName())) {
col.setName(colName);
}
}
cellnum++;
}

View File

@ -39,7 +39,6 @@ import org.junit.jupiter.api.Test;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTTable;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTTableColumn;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTTableStyleInfo;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTWorksheet;
import static org.junit.jupiter.api.Assertions.*;
@ -784,4 +783,19 @@ public final class TestXSSFTable {
}
}
}
@Test
void testGetStyleNameNull() throws IOException {
try (XSSFWorkbook wb = new XSSFWorkbook()) {
XSSFSheet sh = wb.createSheet();
AreaReference tableArea = new AreaReference("B2:B6", wb.getSpreadsheetVersion());
XSSFTable table = sh.createTable(tableArea);
assertNull(table.getStyleName());
table.getCTTable().setTableStyleInfo(CTTableStyleInfo.Factory.newInstance());
assertNull(table.getStyleName());
}
}
}