Speed up execution of formulas with whole-column area reference

The previous change via r1894675 did not fully solve this as
it only took place when more than one sheet were used.

There were still some cases where evaluating formulas needed
to iterate across a huge number of non-existant rows, e.g.
TestVLookup, which took more than 1 minute locally.

However doing the lowering of row-numbers always would trigger
test-regressions as some Excel-functionality depends on actions
being possible on rows larger than current "last row" in the sheet.

Thus changed this to a slightly different approach which only adjusts
last-row when it is at or above the workbook-limit of rows

Execution of TestVLookup is now at 1-2 seconds!

Also added a test-case to verify details of the implementation now.

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1899533 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Dominik Stadler 2022-04-03 13:25:21 +00:00
parent 86bc671633
commit d80f48dd82
3 changed files with 177 additions and 5 deletions

View File

@ -38,6 +38,7 @@ final class SheetRangeEvaluator implements SheetRange {
_lastSheetIndex = lastSheetIndex;
_sheetEvaluators = sheetEvaluators.clone();
}
public SheetRangeEvaluator(int onlySheetIndex, SheetRefEvaluator sheetEvaluator) {
this(onlySheetIndex, onlySheetIndex, new SheetRefEvaluator[] {sheetEvaluator});
}
@ -53,6 +54,7 @@ final class SheetRangeEvaluator implements SheetRange {
public int getFirstSheetIndex() {
return _firstSheetIndex;
}
public int getLastSheetIndex() {
return _lastSheetIndex;
}
@ -60,6 +62,7 @@ final class SheetRangeEvaluator implements SheetRange {
public String getSheetName(int sheetIndex) {
return getSheetEvaluator(sheetIndex).getSheetName();
}
public String getSheetNameRange() {
StringBuilder sb = new StringBuilder();
sb.append(getSheetName(_firstSheetIndex));
@ -86,13 +89,31 @@ final class SheetRangeEvaluator implements SheetRange {
* Otherwise, the highest used row number across all sheets is returned.
*/
public int adjustRowNumber(int rowIndex) {
int maxRowNum = rowIndex;
for (int i = _firstSheetIndex; i < _lastSheetIndex; i++) {
maxRowNum = Math.max(maxRowNum, _sheetEvaluators[i].getLastRowNum());
// some code is depending on the row-numbers higher than the "last row of the sheet"
// so we can only apply this optimisation if the given rowIndex is at or above the maximum
// possible number of row-indices for the current SpreadSheetVersion
boolean found = false;
for (int i = _firstSheetIndex; i <= _lastSheetIndex; i++) {
if (rowIndex >= _sheetEvaluators[i - _firstSheetIndex].getMaxRowNum()) {
found = true;
}
}
// do not try to evaluate further than there are rows in any sheet
// rowIndex is not large enough, so no change necessary
if (!found) {
return rowIndex;
}
// reset maxRowNum if it is at the limit, i.e. we did choose "all rows" somewhere
// This happens e.g. for whole-column-references like "A:B"
// find the highest row-number across all relevant sheets
int maxRowNum = 0;
for (int i = _firstSheetIndex; i <= _lastSheetIndex; i++) {
maxRowNum = Math.max(maxRowNum, _sheetEvaluators[i - _firstSheetIndex].getLastRowNum());
}
// now use the new maxRowNum if it was actually lowered
return Math.min(rowIndex, maxRowNum);
}
}

View File

@ -96,4 +96,12 @@ final class SheetRefEvaluator {
public int getLastRowNum() {
return getSheet().getLastRowNum();
}
/**
* @return The maximum row number that is possible for the current
* Spreadsheet version, see {@link org.apache.poi.ss.SpreadsheetVersion#getLastRowIndex()}
*/
public int getMaxRowNum() {
return _bookEvaluator.getWorkbook().getSpreadsheetVersion().getLastRowIndex();
}
}

View File

@ -0,0 +1,143 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.ss.formula;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
import org.apache.poi.hssf.usermodel.HSSFEvaluationWorkbook;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.ss.SpreadsheetVersion;
import org.apache.poi.ss.formula.eval.BlankEval;
import org.apache.poi.ss.formula.eval.ValueEval;
import org.apache.poi.ss.usermodel.Sheet;
import org.junit.jupiter.api.Test;
class SheetRangeEvaluatorTest {
@Test
public void testConstruct() {
SheetRangeEvaluator eval = new SheetRangeEvaluator(0, 1, new SheetRefEvaluator[0]);
assertEquals(0, eval.getFirstSheetIndex());
assertEquals(1, eval.getLastSheetIndex());
}
@Test
public void testConstruct2() {
SheetRangeEvaluator eval = new SheetRangeEvaluator(1, new SheetRefEvaluator(null, null, 0));
assertEquals(1, eval.getFirstSheetIndex());
assertEquals(1, eval.getLastSheetIndex());
assertNotNull(eval.getSheetEvaluator(1));
}
@Test
public void testGetSheetName() {
HSSFWorkbook wb = new HSSFWorkbook();
wb.createSheet("abc");
wb.createSheet("def");
WorkbookEvaluator book = new WorkbookEvaluator(HSSFEvaluationWorkbook.create(wb),
IStabilityClassifier.TOTALLY_IMMUTABLE, null);
SheetRangeEvaluator eval = new SheetRangeEvaluator(1, new SheetRefEvaluator(book, null, 1));
assertEquals("def", eval.getSheetName(1));
assertEquals("def", eval.getSheetNameRange());
}
@Test
public void testGetSheetNameRange() {
SheetRangeEvaluator eval = createWithTwoSheets();
assertEquals("abc", eval.getSheetName(0));
assertEquals("def", eval.getSheetName(1));
assertEquals("abc:def", eval.getSheetNameRange());
}
private SheetRangeEvaluator createWithTwoSheets() {
HSSFWorkbook wb = new HSSFWorkbook();
wb.createSheet("abc");
wb.createSheet("def");
WorkbookEvaluator book = new WorkbookEvaluator(HSSFEvaluationWorkbook.create(wb),
IStabilityClassifier.TOTALLY_IMMUTABLE, null);
return new SheetRangeEvaluator(0, 1,
new SheetRefEvaluator[] {
new SheetRefEvaluator(book, null, 0),
new SheetRefEvaluator(book, null, 1)
});
}
@SuppressWarnings("ResultOfMethodCallIgnored")
@Test
public void testInvalidInput() {
assertThrows(IllegalArgumentException.class, () -> new SheetRangeEvaluator(-1, 0, null));
assertThrows(IllegalArgumentException.class, () -> new SheetRangeEvaluator(1, 0, null));
SheetRangeEvaluator eval = new SheetRangeEvaluator(1, new SheetRefEvaluator(null, null, 0));
assertThrows(IllegalArgumentException.class, () -> eval.getSheetEvaluator(-1));
assertThrows(IllegalArgumentException.class, () -> eval.getSheetEvaluator(0));
assertThrows(IllegalArgumentException.class, () -> eval.getSheetEvaluator(2));
}
@Test
public void testGetEval() {
SheetRangeEvaluator eval = createWithTwoSheets();
ValueEval valueEval = eval.getEvalForCell(1, 0, 0);
assertTrue(valueEval instanceof BlankEval,
"Had: " + valueEval);
}
@Test
public void testAdjustRowNumber() {
HSSFWorkbook wb = new HSSFWorkbook();
Sheet sheet = wb.createSheet("abc");
sheet.createRow(0);
sheet.createRow(10);
WorkbookEvaluator book = new WorkbookEvaluator(HSSFEvaluationWorkbook.create(wb),
IStabilityClassifier.TOTALLY_IMMUTABLE, null);
SheetRangeEvaluator eval = new SheetRangeEvaluator(0, new SheetRefEvaluator(book, null, 0));
assertEquals(0, eval.adjustRowNumber(0),
"Should stay if input is lower than max row");
assertEquals(1, eval.adjustRowNumber(1),
"Should stay if input is lower than max row");
assertEquals(8, eval.adjustRowNumber(8),
"Should stay if input is lower than max row");
assertEquals(9, eval.adjustRowNumber(9),
"Should stay if input is lower than max row");
assertEquals(10, eval.adjustRowNumber(10),
"Should stay if input is equal than max row");
assertEquals(11, eval.adjustRowNumber(11),
"Should be adjusted to 9 as this is the max row-index");
assertEquals(10, eval.adjustRowNumber(Integer.MAX_VALUE),
"Should be adjusted to 9 as this is the max row-index");
assertEquals(10, eval.adjustRowNumber(SpreadsheetVersion.EXCEL97.getLastRowIndex()),
"Should be adjusted to 9 as this is the max row-index");
}
@Test
public void testAdjustRowNumberEmpty() {
SheetRangeEvaluator eval = createWithTwoSheets();
assertEquals(0, eval.adjustRowNumber(0),
"Should stay at zero as there is no row in workbook");
assertEquals(1, eval.adjustRowNumber(1),
"Should be adjusted to zero as there is no row in workbook");
assertEquals(0, eval.adjustRowNumber(SpreadsheetVersion.EXCEL97.getLastRowIndex()),
"Should be adjusted to 9 as this is the max row-index");
assertEquals(0, eval.adjustRowNumber(Integer.MAX_VALUE),
"Should be adjusted to zero as there is no row in workbook");
}
}