[github-137] solves/unifies blank/missing value colection for PRODUCT/MDETERM/GEOMEAN. Thanks to gallonfizik. This closes #137

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1849237 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
PJ Fanning 2018-12-18 21:16:25 +00:00
parent 1a25ff4dce
commit 2dea9797ce
6 changed files with 304 additions and 126 deletions

View File

@ -218,11 +218,7 @@ public abstract class AggregateFunction extends MultiOperandNumericFunction {
public static final Function PERCENTILE = new Percentile();
public static final Function PRODUCT = new AggregateFunction() {
protected double evaluate(double[] values) {
return MathX.product(values);
}
};
public static final Function PRODUCT = new Product();
public static final Function SMALL = new LargeSmall(false);
public static final Function STDEV = new AggregateFunction() {
protected double evaluate(double[] values) throws EvaluationException {
@ -258,7 +254,24 @@ public abstract class AggregateFunction extends MultiOperandNumericFunction {
return StatsLib.varp(values);
}
};
public static final Function GEOMEAN = new AggregateFunction() {
public static final Function GEOMEAN = new Geomean();
private static class Product extends AggregateFunction {
Product() {
setMissingArgPolicy(Policy.SKIP);
}
@Override
protected double evaluate(double[] values) throws EvaluationException {
return MathX.product(values);
}
}
private static class Geomean extends AggregateFunction {
Geomean() {
setMissingArgPolicy(Policy.COERCE);
}
@Override
protected double evaluate(double[] values) throws EvaluationException {
// The library implementation returns 0 for an input sequence like [1, 0]. So this check is necessary.
@ -269,5 +282,5 @@ public abstract class AggregateFunction extends MultiOperandNumericFunction {
}
return new GeometricMean().evaluate(values, 0, values.length);
}
};
}
}

View File

@ -284,16 +284,23 @@ public abstract class MatrixFunction implements Function{
}
};
public static final Function MDETERM = new OneArrayArg() {
private final MutableValueCollector instance = new MutableValueCollector(false, false);
public static final Function MDETERM = new Mdeterm();
private static class Mdeterm extends OneArrayArg {
private final MutableValueCollector instance;
public Mdeterm() {
instance = new MutableValueCollector(false, false);
instance.setBlankEvalPolicy(MultiOperandNumericFunction.Policy.ERROR);
}
protected double[] collectValues(ValueEval arg) throws EvaluationException {
double[] values = instance.collectValues(arg);
/* handle case where MDETERM is operating on an array that that is not completely filled*/
if (arg instanceof AreaEval && values.length == 1)
throw new EvaluationException(ErrorEval.VALUE_INVALID);
return instance.collectValues(arg);
}
@ -307,7 +314,7 @@ public abstract class MatrixFunction implements Function{
result[0][0] = (new LUDecomposition(temp)).getDeterminant();
return result;
}
};
}
public static final Function MMULT = new TwoArrayArg() {
private final MutableValueCollector instance = new MutableValueCollector(false, false);

View File

@ -38,13 +38,21 @@ import org.apache.poi.ss.formula.eval.ValueEval;
* where the order of operands does not matter
*/
public abstract class MultiOperandNumericFunction implements Function {
public enum Policy {COERCE, SKIP, ERROR}
private final boolean _isReferenceBoolCounted;
private final boolean _isBlankCounted;
private interface EvalConsumer<T, R> {
void accept(T value, R receiver) throws EvaluationException;
}
private EvalConsumer<BoolEval, DoubleList> boolByRefConsumer;
private EvalConsumer<BoolEval, DoubleList> boolByValueConsumer;
private EvalConsumer<BlankEval, DoubleList> blankConsumer;
private EvalConsumer<MissingArgEval, DoubleList> missingArgConsumer = ConsumerFactory.createForMissingArg(Policy.SKIP);
protected MultiOperandNumericFunction(boolean isReferenceBoolCounted, boolean isBlankCounted) {
_isReferenceBoolCounted = isReferenceBoolCounted;
_isBlankCounted = isBlankCounted;
boolByRefConsumer = ConsumerFactory.createForBoolEval(isReferenceBoolCounted ? Policy.COERCE : Policy.SKIP);
boolByValueConsumer = ConsumerFactory.createForBoolEval(Policy.COERCE);
blankConsumer = ConsumerFactory.createForBlank(isBlankCounted ? Policy.COERCE : Policy.SKIP);
}
static final double[] EMPTY_DOUBLE_ARRAY = {};
@ -85,6 +93,14 @@ public abstract class MultiOperandNumericFunction implements Function {
private static final int DEFAULT_MAX_NUM_OPERANDS = SpreadsheetVersion.EXCEL2007.getMaxFunctionArgs();
public void setMissingArgPolicy(Policy policy) {
missingArgConsumer = ConsumerFactory.createForMissingArg(policy);
}
public void setBlankEvalPolicy(Policy policy) {
blankConsumer = ConsumerFactory.createForBlank(policy);
}
public final ValueEval evaluate(ValueEval[] args, int srcCellRow, int srcCellCol) {
try {
double[] values = getNumberArray(args);
@ -183,9 +199,11 @@ public abstract class MultiOperandNumericFunction implements Function {
throw new IllegalArgumentException("ve must not be null");
}
if (ve instanceof BoolEval) {
if (!isViaReference || _isReferenceBoolCounted) {
BoolEval boolEval = (BoolEval) ve;
temp.add(boolEval.getNumberValue());
BoolEval boolEval = (BoolEval) ve;
if (isViaReference) {
boolByRefConsumer.accept(boolEval, temp);
} else {
boolByValueConsumer.accept(boolEval, temp);
}
return;
}
@ -211,16 +229,58 @@ public abstract class MultiOperandNumericFunction implements Function {
throw new EvaluationException((ErrorEval) ve);
}
if (ve == BlankEval.instance) {
if (_isBlankCounted) {
temp.add(0.0);
}
blankConsumer.accept((BlankEval) ve, temp);
return;
}
if (ve == MissingArgEval.instance) {
temp.add(0.0);
missingArgConsumer.accept((MissingArgEval) ve, temp);
return;
}
throw new RuntimeException("Invalid ValueEval type passed for conversion: ("
+ ve.getClass() + ")");
}
private static class ConsumerFactory {
static EvalConsumer<MissingArgEval, DoubleList> createForMissingArg(Policy policy) {
final EvalConsumer<MissingArgEval, DoubleList> coercer =
(MissingArgEval value, DoubleList receiver) -> receiver.add(0.0);
return createAny(coercer, policy);
}
static EvalConsumer<BoolEval, DoubleList> createForBoolEval(Policy policy) {
final EvalConsumer<BoolEval, DoubleList> coercer =
(BoolEval value, DoubleList receiver) -> receiver.add(value.getNumberValue());
return createAny(coercer, policy);
}
static EvalConsumer<BlankEval, DoubleList> createForBlank(Policy policy) {
final EvalConsumer<BlankEval, DoubleList> coercer =
(BlankEval value, DoubleList receiver) -> receiver.add(0.0);
return createAny(coercer, policy);
}
private static <T> EvalConsumer<T, DoubleList> createAny(EvalConsumer<T, DoubleList> coercer, Policy policy) {
switch (policy) {
case COERCE:
return coercer;
case SKIP:
return doNothing();
case ERROR:
return throwValueInvalid();
default:
throw new AssertionError();
}
}
private static <T> EvalConsumer<T, DoubleList> doNothing() {
return (T value, DoubleList receiver) -> {
};
}
private static <T> EvalConsumer<T, DoubleList> throwValueInvalid() {
return (T value, DoubleList receiver) -> {
throw new EvaluationException(ErrorEval.VALUE_INVALID);
};
}
}
}

View File

@ -20,10 +20,15 @@ package org.apache.poi.hssf.model;
import junit.framework.AssertionFailedError;
import junit.framework.TestCase;
import org.apache.poi.ss.formula.eval.BlankEval;
import org.apache.poi.ss.formula.eval.ErrorEval;
import org.apache.poi.ss.formula.eval.NumberEval;
import org.apache.poi.ss.formula.eval.ValueEval;
import org.apache.poi.ss.formula.functions.EvalFactory;
import org.apache.poi.ss.formula.functions.MatrixFunction;
import org.apache.poi.ss.formula.ptg.AbstractFunctionPtg;
import org.apache.poi.ss.formula.ptg.FuncVarPtg;
import org.apache.poi.ss.formula.ptg.Ptg;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
/**
* Tests specific formula examples in <tt>OperandClassTransformer</tt>.
@ -32,114 +37,127 @@ import org.apache.poi.hssf.usermodel.HSSFWorkbook;
*/
public final class TestOperandClassTransformer extends TestCase {
private static Ptg[] parseFormula(String formula) {
Ptg[] result = HSSFFormulaParser.parse(formula, null);
assertNotNull("Ptg array should not be null", result);
return result;
}
public void testMdeterm() {
String formula = "MDETERM(ABS(A1))";
Ptg[] ptgs = parseFormula(formula);
private static Ptg[] parseFormula(String formula) {
Ptg[] result = HSSFFormulaParser.parse(formula, null);
assertNotNull("Ptg array should not be null", result);
return result;
}
confirmTokenClass(ptgs, 0, Ptg.CLASS_ARRAY);
confirmFuncClass(ptgs, 1, "ABS", Ptg.CLASS_ARRAY);
confirmFuncClass(ptgs, 2, "MDETERM", Ptg.CLASS_VALUE);
}
public void testMdeterm() {
String formula = "MDETERM(ABS(A1))";
Ptg[] ptgs = parseFormula(formula);
/**
* In the example: <code>INDEX(PI(),1)</code>, Excel encodes PI() as 'array'. It is not clear
* what rule justifies this. POI currently encodes it as 'value' which Excel(2007) seems to
* tolerate. Changing the metadata for INDEX to have first parameter as 'array' class breaks
* other formulas involving INDEX. It seems like a special case needs to be made. Perhaps an
* important observation is that INDEX is one of very few functions that returns 'reference' type.
*
* This test has been added but disabled in order to document this issue.
*/
public void DISABLED_testIndexPi1() {
String formula = "INDEX(PI(),1)";
Ptg[] ptgs = parseFormula(formula);
confirmTokenClass(ptgs, 0, Ptg.CLASS_ARRAY);
confirmFuncClass(ptgs, 1, "ABS", Ptg.CLASS_ARRAY);
confirmFuncClass(ptgs, 2, "MDETERM", Ptg.CLASS_VALUE);
}
confirmFuncClass(ptgs, 1, "PI", Ptg.CLASS_ARRAY); // fails as of POI 3.1
confirmFuncClass(ptgs, 2, "INDEX", Ptg.CLASS_VALUE);
}
public void testMdetermReturnsValueInvalidOnABlankCell() {
ValueEval matrixRef = EvalFactory.createAreaEval("A1:B2",
new ValueEval[]{
BlankEval.instance,
new NumberEval(1),
new NumberEval(2),
new NumberEval(3)
}
);
ValueEval result = MatrixFunction.MDETERM.evaluate(new ValueEval[]{matrixRef} , 0, 0);
assertEquals(ErrorEval.VALUE_INVALID, result);
}
/**
* Even though count expects args of type R, because A1 is a direct operand of a
* value operator it must get type V
*/
public void testDirectOperandOfValueOperator() {
String formula = "COUNT(A1*1)";
Ptg[] ptgs = parseFormula(formula);
if (ptgs[0].getPtgClass() == Ptg.CLASS_REF) {
throw new AssertionFailedError("Identified bug 45348");
}
/**
* In the example: <code>INDEX(PI(),1)</code>, Excel encodes PI() as 'array'. It is not clear
* what rule justifies this. POI currently encodes it as 'value' which Excel(2007) seems to
* tolerate. Changing the metadata for INDEX to have first parameter as 'array' class breaks
* other formulas involving INDEX. It seems like a special case needs to be made. Perhaps an
* important observation is that INDEX is one of very few functions that returns 'reference' type.
* <p>
* This test has been added but disabled in order to document this issue.
*/
public void DISABLED_testIndexPi1() {
String formula = "INDEX(PI(),1)";
Ptg[] ptgs = parseFormula(formula);
confirmTokenClass(ptgs, 0, Ptg.CLASS_VALUE);
confirmTokenClass(ptgs, 3, Ptg.CLASS_VALUE);
}
/**
* A cell ref passed to a function expecting type V should be converted to type V
*/
public void testRtoV() {
confirmFuncClass(ptgs, 1, "PI", Ptg.CLASS_ARRAY); // fails as of POI 3.1
confirmFuncClass(ptgs, 2, "INDEX", Ptg.CLASS_VALUE);
}
String formula = "lookup(A1, A3:A52, B3:B52)";
Ptg[] ptgs = parseFormula(formula);
confirmTokenClass(ptgs, 0, Ptg.CLASS_VALUE);
}
public void testComplexIRR_bug45041() {
String formula = "(1+IRR(SUMIF(A:A,ROW(INDIRECT(MIN(A:A)&\":\"&MAX(A:A))),B:B),0))^365-1";
Ptg[] ptgs = parseFormula(formula);
/**
* Even though count expects args of type R, because A1 is a direct operand of a
* value operator it must get type V
*/
public void testDirectOperandOfValueOperator() {
String formula = "COUNT(A1*1)";
Ptg[] ptgs = parseFormula(formula);
if (ptgs[0].getPtgClass() == Ptg.CLASS_REF) {
throw new AssertionFailedError("Identified bug 45348");
}
FuncVarPtg rowFunc = (FuncVarPtg) ptgs[10];
FuncVarPtg sumifFunc = (FuncVarPtg) ptgs[12];
assertEquals("ROW", rowFunc.getName());
assertEquals("SUMIF", sumifFunc.getName());
confirmTokenClass(ptgs, 0, Ptg.CLASS_VALUE);
confirmTokenClass(ptgs, 3, Ptg.CLASS_VALUE);
}
if (rowFunc.getPtgClass() == Ptg.CLASS_VALUE || sumifFunc.getPtgClass() == Ptg.CLASS_VALUE) {
throw new AssertionFailedError("Identified bug 45041");
}
confirmTokenClass(ptgs, 1, Ptg.CLASS_REF);
confirmTokenClass(ptgs, 2, Ptg.CLASS_REF);
confirmFuncClass(ptgs, 3, "MIN", Ptg.CLASS_VALUE);
confirmTokenClass(ptgs, 6, Ptg.CLASS_REF);
confirmFuncClass(ptgs, 7, "MAX", Ptg.CLASS_VALUE);
confirmFuncClass(ptgs, 9, "INDIRECT", Ptg.CLASS_REF);
confirmFuncClass(ptgs, 10, "ROW", Ptg.CLASS_ARRAY);
confirmTokenClass(ptgs, 11, Ptg.CLASS_REF);
confirmFuncClass(ptgs, 12, "SUMIF", Ptg.CLASS_ARRAY);
confirmFuncClass(ptgs, 14, "IRR", Ptg.CLASS_VALUE);
}
/**
* A cell ref passed to a function expecting type V should be converted to type V
*/
public void testRtoV() {
private void confirmFuncClass(Ptg[] ptgs, int i, String expectedFunctionName, byte operandClass) {
confirmTokenClass(ptgs, i, operandClass);
AbstractFunctionPtg afp = (AbstractFunctionPtg) ptgs[i];
assertEquals(expectedFunctionName, afp.getName());
}
String formula = "lookup(A1, A3:A52, B3:B52)";
Ptg[] ptgs = parseFormula(formula);
confirmTokenClass(ptgs, 0, Ptg.CLASS_VALUE);
}
private void confirmTokenClass(Ptg[] ptgs, int i, byte operandClass) {
Ptg ptg = ptgs[i];
if (ptg.isBaseToken()) {
throw new AssertionFailedError("ptg[" + i + "] is a base token");
}
if (operandClass != ptg.getPtgClass()) {
throw new AssertionFailedError("Wrong operand class for ptg ("
+ ptg + "). Expected " + getOperandClassName(operandClass)
+ " but got " + getOperandClassName(ptg.getPtgClass()));
}
}
public void testComplexIRR_bug45041() {
String formula = "(1+IRR(SUMIF(A:A,ROW(INDIRECT(MIN(A:A)&\":\"&MAX(A:A))),B:B),0))^365-1";
Ptg[] ptgs = parseFormula(formula);
private static String getOperandClassName(byte ptgClass) {
switch (ptgClass) {
case Ptg.CLASS_REF:
return "R";
case Ptg.CLASS_VALUE:
return "V";
case Ptg.CLASS_ARRAY:
return "A";
}
throw new RuntimeException("Unknown operand class (" + ptgClass + ")");
}
FuncVarPtg rowFunc = (FuncVarPtg) ptgs[10];
FuncVarPtg sumifFunc = (FuncVarPtg) ptgs[12];
assertEquals("ROW", rowFunc.getName());
assertEquals("SUMIF", sumifFunc.getName());
if (rowFunc.getPtgClass() == Ptg.CLASS_VALUE || sumifFunc.getPtgClass() == Ptg.CLASS_VALUE) {
throw new AssertionFailedError("Identified bug 45041");
}
confirmTokenClass(ptgs, 1, Ptg.CLASS_REF);
confirmTokenClass(ptgs, 2, Ptg.CLASS_REF);
confirmFuncClass(ptgs, 3, "MIN", Ptg.CLASS_VALUE);
confirmTokenClass(ptgs, 6, Ptg.CLASS_REF);
confirmFuncClass(ptgs, 7, "MAX", Ptg.CLASS_VALUE);
confirmFuncClass(ptgs, 9, "INDIRECT", Ptg.CLASS_REF);
confirmFuncClass(ptgs, 10, "ROW", Ptg.CLASS_ARRAY);
confirmTokenClass(ptgs, 11, Ptg.CLASS_REF);
confirmFuncClass(ptgs, 12, "SUMIF", Ptg.CLASS_ARRAY);
confirmFuncClass(ptgs, 14, "IRR", Ptg.CLASS_VALUE);
}
private void confirmFuncClass(Ptg[] ptgs, int i, String expectedFunctionName, byte operandClass) {
confirmTokenClass(ptgs, i, operandClass);
AbstractFunctionPtg afp = (AbstractFunctionPtg) ptgs[i];
assertEquals(expectedFunctionName, afp.getName());
}
private void confirmTokenClass(Ptg[] ptgs, int i, byte operandClass) {
Ptg ptg = ptgs[i];
if (ptg.isBaseToken()) {
throw new AssertionFailedError("ptg[" + i + "] is a base token");
}
if (operandClass != ptg.getPtgClass()) {
throw new AssertionFailedError("Wrong operand class for ptg ("
+ ptg + "). Expected " + getOperandClassName(operandClass)
+ " but got " + getOperandClassName(ptg.getPtgClass()));
}
}
private static String getOperandClassName(byte ptgClass) {
switch (ptgClass) {
case Ptg.CLASS_REF:
return "R";
case Ptg.CLASS_VALUE:
return "V";
case Ptg.CLASS_ARRAY:
return "A";
}
throw new RuntimeException("Unknown operand class (" + ptgClass + ")");
}
}

View File

@ -42,16 +42,26 @@ public class TestMultiOperandNumericFunction {
}
@Test
public void missingArgEvalsAreCountedAsZero() {
MultiOperandNumericFunction instance = new Stub(true, true);
public void missingArgEvalsAreCountedAsZeroIfPolicyIsCoerce() {
MultiOperandNumericFunction instance = new Stub(true, true, MultiOperandNumericFunction.Policy.COERCE);
ValueEval result = instance.evaluate(new ValueEval[]{MissingArgEval.instance}, 0, 0);
assertTrue(result instanceof NumberEval);
assertEquals(0.0, ((NumberEval)result).getNumberValue(), 0);
}
@Test
public void missingArgEvalsAreSkippedIfZeroIfPolicyIsSkipped() {
MultiOperandNumericFunction instance = new Stub(true, true, MultiOperandNumericFunction.Policy.SKIP);
ValueEval result = instance.evaluate(new ValueEval[]{new NumberEval(1), MissingArgEval.instance}, 0, 0);
assertTrue(result instanceof NumberEval);
assertEquals(1.0, ((NumberEval)result).getNumberValue(), 0);
}
private static class Stub extends MultiOperandNumericFunction {
protected Stub(boolean isReferenceBoolCounted, boolean isBlankCounted) {
protected Stub(
boolean isReferenceBoolCounted, boolean isBlankCounted, MultiOperandNumericFunction.Policy missingArgEvalPolicy) {
super(isReferenceBoolCounted, isBlankCounted);
setMissingArgPolicy(missingArgEvalPolicy);
}
@Override

View File

@ -0,0 +1,70 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.ss.formula.functions;
import org.apache.poi.ss.formula.eval.BoolEval;
import org.apache.poi.ss.formula.eval.MissingArgEval;
import org.apache.poi.ss.formula.eval.NumberEval;
import org.apache.poi.ss.formula.eval.StringEval;
import org.apache.poi.ss.formula.eval.ValueEval;
import org.junit.Test;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
public class TestProduct {
@Test
public void missingArgsAreIgnored() {
ValueEval result = getInstance().evaluate(new ValueEval[]{new NumberEval(2.0), MissingArgEval.instance}, 0, 0);
assertTrue(result instanceof NumberEval);
assertEquals(2, ((NumberEval)result).getNumberValue(), 0);
}
/**
* Actually PRODUCT() requires at least one arg but the checks are performed elsewhere.
* However, PRODUCT(,) is a valid call (which should return 0). So it makes sense to
* assert that PRODUCT() is also 0 (at least, nothing explodes).
*/
public void missingArgEvalReturns0() {
ValueEval result = getInstance().evaluate(new ValueEval[0], 0, 0);
assertTrue(result instanceof NumberEval);
assertEquals(0, ((NumberEval)result).getNumberValue(), 0);
}
@Test
public void twoMissingArgEvalsReturn0() {
ValueEval result = getInstance().evaluate(new ValueEval[]{MissingArgEval.instance, MissingArgEval.instance}, 0, 0);
assertTrue(result instanceof NumberEval);
assertEquals(0, ((NumberEval)result).getNumberValue(), 0);
}
@Test
public void acceptanceTest() {
final ValueEval[] args = {
new NumberEval(2.0),
MissingArgEval.instance,
new StringEval("6"),
BoolEval.TRUE};
ValueEval result = getInstance().evaluate(args, 0, 0);
assertTrue(result instanceof NumberEval);
assertEquals(12, ((NumberEval)result).getNumberValue(), 0);
}
private static Function getInstance() {
return AggregateFunction.PRODUCT;
}
}