Implemented GEOMEAN function. Thanks to gallonfizik. This closes #136

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1849042 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
PJ Fanning 2018-12-16 18:51:09 +00:00
parent 8d51a39edd
commit e399507710
5 changed files with 308 additions and 158 deletions

View File

@ -299,7 +299,7 @@ public final class FunctionEval {
// 316: TTEST
// 317: PROB
retval[318] = AggregateFunction.DEVSQ;
// 319: GEOMEAN
retval[319] = AggregateFunction.GEOMEAN;
// 320: HARMEAN
retval[321] = AggregateFunction.SUMSQ;
// 322: KURT

View File

@ -17,6 +17,7 @@
package org.apache.poi.ss.formula.functions;
import org.apache.commons.math3.stat.descriptive.moment.GeometricMean;
import org.apache.poi.ss.formula.eval.ErrorEval;
import org.apache.poi.ss.formula.eval.EvaluationException;
import org.apache.poi.ss.formula.eval.NumberEval;
@ -28,45 +29,45 @@ import org.apache.poi.ss.formula.eval.ValueEval;
*/
public abstract class AggregateFunction extends MultiOperandNumericFunction {
private static final class LargeSmall extends Fixed2ArgFunction {
private final boolean _isLarge;
protected LargeSmall(boolean isLarge) {
_isLarge = isLarge;
}
private static final class LargeSmall extends Fixed2ArgFunction {
private final boolean _isLarge;
protected LargeSmall(boolean isLarge) {
_isLarge = isLarge;
}
public ValueEval evaluate(int srcRowIndex, int srcColumnIndex, ValueEval arg0,
ValueEval arg1) {
double dn;
try {
ValueEval ve1 = OperandResolver.getSingleValue(arg1, srcRowIndex, srcColumnIndex);
dn = OperandResolver.coerceValueToDouble(ve1);
} catch (EvaluationException e1) {
// all errors in the second arg translate to #VALUE!
return ErrorEval.VALUE_INVALID;
}
// weird Excel behaviour on second arg
if (dn < 1.0) {
// values between 0.0 and 1.0 result in #NUM!
return ErrorEval.NUM_ERROR;
}
// all other values are rounded up to the next integer
int k = (int) Math.ceil(dn);
public ValueEval evaluate(int srcRowIndex, int srcColumnIndex, ValueEval arg0,
ValueEval arg1) {
double dn;
try {
ValueEval ve1 = OperandResolver.getSingleValue(arg1, srcRowIndex, srcColumnIndex);
dn = OperandResolver.coerceValueToDouble(ve1);
} catch (EvaluationException e1) {
// all errors in the second arg translate to #VALUE!
return ErrorEval.VALUE_INVALID;
}
// weird Excel behaviour on second arg
if (dn < 1.0) {
// values between 0.0 and 1.0 result in #NUM!
return ErrorEval.NUM_ERROR;
}
// all other values are rounded up to the next integer
int k = (int) Math.ceil(dn);
double result;
try {
double[] ds = ValueCollector.collectValues(arg0);
if (k > ds.length) {
return ErrorEval.NUM_ERROR;
}
result = _isLarge ? StatsLib.kthLargest(ds, k) : StatsLib.kthSmallest(ds, k);
NumericFunction.checkValue(result);
} catch (EvaluationException e) {
return e.getErrorEval();
}
double result;
try {
double[] ds = ValueCollector.collectValues(arg0);
if (k > ds.length) {
return ErrorEval.NUM_ERROR;
}
result = _isLarge ? StatsLib.kthLargest(ds, k) : StatsLib.kthSmallest(ds, k);
NumericFunction.checkValue(result);
} catch (EvaluationException e) {
return e.getErrorEval();
}
return new NumberEval(result);
}
}
return new NumberEval(result);
}
}
/**
* Returns the k-th percentile of values in a range. You can use this function to establish a threshold of
@ -84,67 +85,67 @@ public abstract class AggregateFunction extends MultiOperandNumericFunction {
* <li>If k is not a multiple of 1/(n - 1), PERCENTILE interpolates to determine the value at the k-th percentile.</li>
* </ul>
*/
private static final class Percentile extends Fixed2ArgFunction {
protected Percentile() {
}
private static final class Percentile extends Fixed2ArgFunction {
public ValueEval evaluate(int srcRowIndex, int srcColumnIndex, ValueEval arg0,
ValueEval arg1) {
double dn;
try {
ValueEval ve1 = OperandResolver.getSingleValue(arg1, srcRowIndex, srcColumnIndex);
dn = OperandResolver.coerceValueToDouble(ve1);
} catch (EvaluationException e1) {
// all errors in the second arg translate to #VALUE!
return ErrorEval.VALUE_INVALID;
}
if (dn < 0 || dn > 1) { // has to be percentage
return ErrorEval.NUM_ERROR;
}
protected Percentile() {
}
double result;
try {
double[] ds = ValueCollector.collectValues(arg0);
int N = ds.length;
public ValueEval evaluate(int srcRowIndex, int srcColumnIndex, ValueEval arg0,
ValueEval arg1) {
double dn;
try {
ValueEval ve1 = OperandResolver.getSingleValue(arg1, srcRowIndex, srcColumnIndex);
dn = OperandResolver.coerceValueToDouble(ve1);
} catch (EvaluationException e1) {
// all errors in the second arg translate to #VALUE!
return ErrorEval.VALUE_INVALID;
}
if (dn < 0 || dn > 1) { // has to be percentage
return ErrorEval.NUM_ERROR;
}
if (N == 0 || N > 8191) {
double result;
try {
double[] ds = ValueCollector.collectValues(arg0);
int N = ds.length;
if (N == 0 || N > 8191) {
return ErrorEval.NUM_ERROR;
}
double n = (N - 1) * dn + 1;
if (n == 1d) {
result = StatsLib.kthSmallest(ds, 1);
} else if (Double.compare(n, N) == 0) {
result = StatsLib.kthLargest(ds, 1);
} else {
int k = (int) n;
double d = n - k;
result = StatsLib.kthSmallest(ds, k) + d
* (StatsLib.kthSmallest(ds, k + 1) - StatsLib.kthSmallest(ds, k));
}
double n = (N - 1) * dn + 1;
if (n == 1d) {
result = StatsLib.kthSmallest(ds, 1);
} else if (Double.compare(n, N) == 0) {
result = StatsLib.kthLargest(ds, 1);
} else {
int k = (int) n;
double d = n - k;
result = StatsLib.kthSmallest(ds, k) + d
* (StatsLib.kthSmallest(ds, k + 1) - StatsLib.kthSmallest(ds, k));
}
NumericFunction.checkValue(result);
} catch (EvaluationException e) {
return e.getErrorEval();
}
NumericFunction.checkValue(result);
} catch (EvaluationException e) {
return e.getErrorEval();
}
return new NumberEval(result);
}
}
static final class ValueCollector extends MultiOperandNumericFunction {
private static final ValueCollector instance = new ValueCollector();
public ValueCollector() {
super(false, false);
}
public static double[] collectValues(ValueEval...operands) throws EvaluationException {
return instance.getNumberArray(operands);
}
protected double evaluate(double[] values) {
throw new IllegalStateException("should not be called");
}
}
return new NumberEval(result);
}
}
static final class ValueCollector extends MultiOperandNumericFunction {
private static final ValueCollector instance = new ValueCollector();
public ValueCollector() {
super(false, false);
}
public static double[] collectValues(ValueEval...operands) throws EvaluationException {
return instance.getNumberArray(operands);
}
protected double evaluate(double[] values) {
throw new IllegalStateException("should not be called");
}
}
protected AggregateFunction() {
super(false, false);
@ -181,66 +182,66 @@ public abstract class AggregateFunction extends MultiOperandNumericFunction {
}
public static final Function AVEDEV = new AggregateFunction() {
protected double evaluate(double[] values) {
return StatsLib.avedev(values);
}
};
public static final Function AVERAGE = new AggregateFunction() {
protected double evaluate(double[] values) throws EvaluationException {
if (values.length < 1) {
throw new EvaluationException(ErrorEval.DIV_ZERO);
}
return MathX.average(values);
}
};
public static final Function DEVSQ = new AggregateFunction() {
protected double evaluate(double[] values) {
return StatsLib.devsq(values);
}
};
public static final Function LARGE = new LargeSmall(true);
public static final Function MAX = new AggregateFunction() {
protected double evaluate(double[] values) {
return values.length > 0 ? MathX.max(values) : 0;
}
};
public static final Function MEDIAN = new AggregateFunction() {
protected double evaluate(double[] values) {
return StatsLib.median(values);
}
};
public static final Function MIN = new AggregateFunction() {
protected double evaluate(double[] values) {
return values.length > 0 ? MathX.min(values) : 0;
}
};
public static final Function PERCENTILE = new Percentile();
public static final Function PRODUCT = new AggregateFunction() {
protected double evaluate(double[] values) {
return MathX.product(values);
}
};
public static final Function SMALL = new LargeSmall(false);
public static final Function STDEV = new AggregateFunction() {
protected double evaluate(double[] values) throws EvaluationException {
if (values.length < 1) {
throw new EvaluationException(ErrorEval.DIV_ZERO);
}
return StatsLib.stdev(values);
}
};
public static final Function SUM = new AggregateFunction() {
protected double evaluate(double[] values) {
return MathX.sum(values);
}
};
public static final Function SUMSQ = new AggregateFunction() {
protected double evaluate(double[] values) {
return MathX.sumsq(values);
}
};
protected double evaluate(double[] values) {
return StatsLib.avedev(values);
}
};
public static final Function AVERAGE = new AggregateFunction() {
protected double evaluate(double[] values) throws EvaluationException {
if (values.length < 1) {
throw new EvaluationException(ErrorEval.DIV_ZERO);
}
return MathX.average(values);
}
};
public static final Function DEVSQ = new AggregateFunction() {
protected double evaluate(double[] values) {
return StatsLib.devsq(values);
}
};
public static final Function LARGE = new LargeSmall(true);
public static final Function MAX = new AggregateFunction() {
protected double evaluate(double[] values) {
return values.length > 0 ? MathX.max(values) : 0;
}
};
public static final Function MEDIAN = new AggregateFunction() {
protected double evaluate(double[] values) {
return StatsLib.median(values);
}
};
public static final Function MIN = new AggregateFunction() {
protected double evaluate(double[] values) {
return values.length > 0 ? MathX.min(values) : 0;
}
};
public static final Function PERCENTILE = new Percentile();
public static final Function PRODUCT = new AggregateFunction() {
protected double evaluate(double[] values) {
return MathX.product(values);
}
};
public static final Function SMALL = new LargeSmall(false);
public static final Function STDEV = new AggregateFunction() {
protected double evaluate(double[] values) throws EvaluationException {
if (values.length < 1) {
throw new EvaluationException(ErrorEval.DIV_ZERO);
}
return StatsLib.stdev(values);
}
};
public static final Function SUM = new AggregateFunction() {
protected double evaluate(double[] values) {
return MathX.sum(values);
}
};
public static final Function SUMSQ = new AggregateFunction() {
protected double evaluate(double[] values) {
return MathX.sumsq(values);
}
};
public static final Function VAR = new AggregateFunction() {
protected double evaluate(double[] values) throws EvaluationException {
if (values.length < 1) {
@ -257,4 +258,16 @@ public abstract class AggregateFunction extends MultiOperandNumericFunction {
return StatsLib.varp(values);
}
};
public static final Function GEOMEAN = new AggregateFunction() {
@Override
protected double evaluate(double[] values) throws EvaluationException {
// The library implementation returns 0 for an input sequence like [1, 0]. So this check is necessary.
for (double value: values) {
if (value <= 0) {
throw new EvaluationException(ErrorEval.NUM_ERROR);
}
}
return new GeometricMean().evaluate(values, 0, values.length);
}
};
}

View File

@ -24,6 +24,7 @@ import org.apache.poi.ss.formula.eval.BlankEval;
import org.apache.poi.ss.formula.eval.BoolEval;
import org.apache.poi.ss.formula.eval.ErrorEval;
import org.apache.poi.ss.formula.eval.EvaluationException;
import org.apache.poi.ss.formula.eval.MissingArgEval;
import org.apache.poi.ss.formula.eval.NumberEval;
import org.apache.poi.ss.formula.eval.NumericValueEval;
import org.apache.poi.ss.formula.eval.OperandResolver;
@ -85,19 +86,16 @@ public abstract class MultiOperandNumericFunction implements Function {
private static final int DEFAULT_MAX_NUM_OPERANDS = SpreadsheetVersion.EXCEL2007.getMaxFunctionArgs();
public final ValueEval evaluate(ValueEval[] args, int srcCellRow, int srcCellCol) {
double d;
try {
double[] values = getNumberArray(args);
d = evaluate(values);
double d = evaluate(values);
if (Double.isNaN(d) || Double.isInfinite(d)) {
return ErrorEval.NUM_ERROR;
}
return new NumberEval(d);
} catch (EvaluationException e) {
return e.getErrorEval();
}
if (Double.isNaN(d) || Double.isInfinite(d))
return ErrorEval.NUM_ERROR;
return new NumberEval(d);
}
protected abstract double evaluate(double[] values) throws EvaluationException;
@ -217,6 +215,10 @@ public abstract class MultiOperandNumericFunction implements Function {
}
return;
}
if (ve == MissingArgEval.instance) {
temp.add(0.0);
return;
}
throw new RuntimeException("Invalid ValueEval type passed for conversion: ("
+ ve.getClass() + ")");
}

View File

@ -0,0 +1,112 @@
package org.apache.poi.ss.formula.functions;
import org.apache.poi.ss.formula.eval.*;
import org.junit.Test;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
/**
* From Excel documentation at https://support.office.com/en-us/article/geomean-function-db1ac48d-25a5-40a0-ab83-0b38980e40d5:
* 1. Arguments can either be numbers or names, arrays, or references that contain numbers.
* 2. Logical values and text representations of numbers that you type directly into the list of arguments are counted.
* 3. If an array or reference argument contains text, logical values, or empty cells, those values are ignored; however, cells with the value zero are included.
* 4. Arguments that are error values or text that cannot be translated into numbers cause errors.
* 5. If any data point 0, GEOMEAN returns the #NUM! error value.
*
* Remarks:
* Actually, 5. is not true. If an error is encountered before a 0 value, the error is returned.
*/
public class TestGeomean {
@Test
public void acceptanceTest() {
Function geomean = getInstance();
final ValueEval result = geomean.evaluate(new ValueEval[]{new NumberEval(2), new NumberEval(3)}, 0, 0);
verifyNumericResult(2.449489742783178, result);
}
@Test
public void booleansByValueAreCoerced() {
final ValueEval[] args = {BoolEval.TRUE};
final ValueEval result = getInstance().evaluate(args, 0, 0);
verifyNumericResult(1.0, result);
}
@Test
public void stringsByValueAreCoerced() {
final ValueEval[] args = {new StringEval("2")};
final ValueEval result = getInstance().evaluate(args, 0, 0);
verifyNumericResult(2.0, result);
}
@Test
public void nonCoerceableStringsByValueCauseValueInvalid() {
final ValueEval[] args = {new StringEval("foo")};
final ValueEval result = getInstance().evaluate(args, 0, 0);
assertEquals(ErrorEval.VALUE_INVALID, result);
}
@Test
public void booleansByReferenceAreSkipped() {
final ValueEval[] args = new ValueEval[]{new NumberEval(2.0), EvalFactory.createRefEval("A1", BoolEval.TRUE)};
final ValueEval result = getInstance().evaluate(args, 0, 0);
verifyNumericResult(2.0, result);
}
@Test
public void booleansStringsAndBlanksByReferenceAreSkipped() {
ValueEval ref = EvalFactory.createAreaEval("A1:A3", new ValueEval[]{new StringEval("foo"), BoolEval.FALSE, BlankEval.instance});
final ValueEval[] args = {ref, new NumberEval(2.0)};
final ValueEval result = getInstance().evaluate(args, 0, 0);
verifyNumericResult(2.0, result);
}
@Test
public void stringsByValueAreCounted() {
final ValueEval[] args = {new StringEval("2.0")};
final ValueEval result = getInstance().evaluate(args, 0, 0);
verifyNumericResult(2.0, result);
}
@Test
public void missingArgCountAsZero() {
// and, naturally, produces a NUM_ERROR
final ValueEval[] args = {new NumberEval(1.0), MissingArgEval.instance};
final ValueEval result = getInstance().evaluate(args, 0, 0);
assertEquals(ErrorEval.NUM_ERROR, result);
}
/**
* Implementation-specific: the math lib returns 0 for the input [1.0, 0.0], but a NUM_ERROR should be returned.
*/
@Test
public void sequence_1_0_shouldReturnError() {
final ValueEval[] args = {new NumberEval(1.0), new NumberEval(0)};
final ValueEval result = getInstance().evaluate(args, 0, 0);
assertEquals(ErrorEval.NUM_ERROR, result);
}
@Test
public void minusOneShouldReturnError() {
final ValueEval[] args = {new NumberEval(1.0), new NumberEval(-1.0)};
final ValueEval result = getInstance().evaluate(args, 0, 0);
assertEquals(ErrorEval.NUM_ERROR, result);
}
@Test
public void firstErrorPropagates() {
final ValueEval[] args = {ErrorEval.DIV_ZERO, ErrorEval.NUM_ERROR};
final ValueEval result = getInstance().evaluate(args, 0, 0);
assertEquals(ErrorEval.DIV_ZERO, result);
}
private void verifyNumericResult(double expected, ValueEval result) {
assertTrue(result instanceof NumberEval);
assertEquals(expected, ((NumberEval) result).getNumberValue(), 1e-15);
}
private Function getInstance() {
return AggregateFunction.GEOMEAN;
}
}

View File

@ -18,9 +18,13 @@
package org.apache.poi.ss.formula.functions;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import org.apache.poi.ss.SpreadsheetVersion;
import org.apache.poi.ss.formula.eval.EvaluationException;
import org.apache.poi.ss.formula.eval.MissingArgEval;
import org.apache.poi.ss.formula.eval.NumberEval;
import org.apache.poi.ss.formula.eval.ValueEval;
import org.junit.Test;
public class TestMultiOperandNumericFunction {
@ -36,4 +40,23 @@ public class TestMultiOperandNumericFunction {
};
assertEquals(SpreadsheetVersion.EXCEL2007.getMaxFunctionArgs(), fun.getMaxNumOperands());
}
@Test
public void missingArgEvalsAreCountedAsZero() {
MultiOperandNumericFunction instance = new Stub(true, true);
ValueEval result = instance.evaluate(new ValueEval[]{MissingArgEval.instance}, 0, 0);
assertTrue(result instanceof NumberEval);
assertEquals(0.0, ((NumberEval)result).getNumberValue(), 0);
}
private static class Stub extends MultiOperandNumericFunction {
protected Stub(boolean isReferenceBoolCounted, boolean isBlankCounted) {
super(isReferenceBoolCounted, isBlankCounted);
}
@Override
protected double evaluate(double[] values) throws EvaluationException {
return values[0];
}
}
}