|
@@ -6,6 +6,7 @@
|
|
|
|
|
|
package org.elasticsearch.xpack.analytics.ttest;
|
|
|
|
|
|
+import org.apache.lucene.document.IntPoint;
|
|
|
import org.apache.lucene.document.NumericDocValuesField;
|
|
|
import org.apache.lucene.document.SortedNumericDocValuesField;
|
|
|
import org.apache.lucene.index.DirectoryReader;
|
|
@@ -21,6 +22,7 @@ import org.elasticsearch.index.fielddata.ScriptDocValues;
|
|
|
import org.elasticsearch.index.mapper.KeywordFieldMapper;
|
|
|
import org.elasticsearch.index.mapper.MappedFieldType;
|
|
|
import org.elasticsearch.index.mapper.NumberFieldMapper;
|
|
|
+import org.elasticsearch.index.query.QueryBuilders;
|
|
|
import org.elasticsearch.script.MockScriptEngine;
|
|
|
import org.elasticsearch.script.Script;
|
|
|
import org.elasticsearch.script.ScriptEngine;
|
|
@@ -59,11 +61,15 @@ public class TTestAggregatorTests extends AggregatorTestCase {
|
|
|
*/
|
|
|
public static final String ADD_HALF_SCRIPT = "add_one";
|
|
|
|
|
|
+ public static final String TERM_FILTERING = "term_filtering";
|
|
|
+
|
|
|
@Override
|
|
|
protected AggregationBuilder createAggBuilderForTypeTest(MappedFieldType fieldType, String fieldName) {
|
|
|
return new TTestAggregationBuilder("foo")
|
|
|
- .a(new MultiValuesSourceFieldConfig.Builder().setFieldName(fieldName).build())
|
|
|
- .b(new MultiValuesSourceFieldConfig.Builder().setFieldName(fieldName).build());
|
|
|
+ .a(new MultiValuesSourceFieldConfig.Builder().setFieldName(fieldName)
|
|
|
+ .setFilter(QueryBuilders.rangeQuery(fieldName).lt(10)).build())
|
|
|
+ .b(new MultiValuesSourceFieldConfig.Builder().setFieldName(fieldName)
|
|
|
+ .setFilter(QueryBuilders.rangeQuery(fieldName).gte(10)).build());
|
|
|
}
|
|
|
|
|
|
@Override
|
|
@@ -79,11 +85,18 @@ public class TTestAggregatorTests extends AggregatorTestCase {
|
|
|
LeafDocLookup leafDocLookup = (LeafDocLookup) vars.get("doc");
|
|
|
String fieldname = (String) vars.get("fieldname");
|
|
|
ScriptDocValues<?> scriptDocValues = leafDocLookup.get(fieldname);
|
|
|
- double val = ((Number) scriptDocValues.get(0)).doubleValue();
|
|
|
- if (val == 1) {
|
|
|
- val += 0.0000001;
|
|
|
+ return ((Number) scriptDocValues.get(0)).doubleValue() + 0.5;
|
|
|
+ });
|
|
|
+
|
|
|
+ scripts.put(TERM_FILTERING, vars -> {
|
|
|
+ LeafDocLookup leafDocLookup = (LeafDocLookup) vars.get("doc");
|
|
|
+ int term = (Integer) vars.get("term");
|
|
|
+ ScriptDocValues<?> termDocValues = leafDocLookup.get("term");
|
|
|
+ int currentTerm = ((Number) termDocValues.get(0)).intValue();
|
|
|
+ if (currentTerm == term) {
|
|
|
+ return ((Number) leafDocLookup.get("field").get(0)).doubleValue();
|
|
|
}
|
|
|
- return val + 0.5;
|
|
|
+ return null;
|
|
|
});
|
|
|
|
|
|
MockScriptEngine scriptEngine = new MockScriptEngine(MockScriptEngine.NAME,
|
|
@@ -142,6 +155,26 @@ public class TTestAggregatorTests extends AggregatorTestCase {
|
|
|
ex.getMessage());
|
|
|
}
|
|
|
|
|
|
+ public void testSameFieldAndNoFilters() {
|
|
|
+ TTestType tTestType = randomFrom(TTestType.values());
|
|
|
+ MappedFieldType fieldType = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.INTEGER);
|
|
|
+ fieldType.setName("field");
|
|
|
+ TTestAggregationBuilder aggregationBuilder = new TTestAggregationBuilder("t_test")
|
|
|
+ .a(new MultiValuesSourceFieldConfig.Builder().setFieldName("field").setMissing(100).build())
|
|
|
+ .b(new MultiValuesSourceFieldConfig.Builder().setFieldName("field").setMissing(100).build())
|
|
|
+ .testType(tTestType);
|
|
|
+
|
|
|
+ IllegalArgumentException ex = expectThrows(IllegalArgumentException.class, () ->
|
|
|
+ testCase(aggregationBuilder, new MatchAllDocsQuery(), iw -> {
|
|
|
+ iw.addDocument(singleton(new SortedNumericDocValuesField("field", 102)));
|
|
|
+ iw.addDocument(singleton(new SortedNumericDocValuesField("field", 99)));
|
|
|
+ }, tTest -> fail("Should have thrown exception"), fieldType)
|
|
|
+ );
|
|
|
+ assertEquals(
|
|
|
+ "The same field [field] is used for both population but no filters are specified.",
|
|
|
+ ex.getMessage());
|
|
|
+ }
|
|
|
+
|
|
|
public void testMultipleUnpairedValues() throws IOException {
|
|
|
TTestType tTestType = randomFrom(TTestType.HETEROSCEDASTIC, TTestType.HOMOSCEDASTIC);
|
|
|
testCase(new MatchAllDocsQuery(), tTestType, iw -> {
|
|
@@ -151,6 +184,15 @@ public class TTestAggregatorTests extends AggregatorTestCase {
|
|
|
}, tTest -> assertEquals(tTestType == TTestType.HETEROSCEDASTIC ? 0.0607303911 : 0.01718374671, tTest.getValue(), 0.000001));
|
|
|
}
|
|
|
|
|
|
+ public void testUnpairedValuesWithFilters() throws IOException {
|
|
|
+ TTestType tTestType = randomFrom(TTestType.HETEROSCEDASTIC, TTestType.HOMOSCEDASTIC);
|
|
|
+ testCase(new MatchAllDocsQuery(), tTestType, iw -> {
|
|
|
+ iw.addDocument(asList(new SortedNumericDocValuesField("a", 102), new SortedNumericDocValuesField("a", 103),
|
|
|
+ new SortedNumericDocValuesField("b", 89)));
|
|
|
+ iw.addDocument(asList(new SortedNumericDocValuesField("a", 99), new SortedNumericDocValuesField("b", 93)));
|
|
|
+ }, tTest -> assertEquals(tTestType == TTestType.HETEROSCEDASTIC ? 0.0607303911 : 0.01718374671, tTest.getValue(), 0.000001));
|
|
|
+ }
|
|
|
+
|
|
|
public void testMissingValues() throws IOException {
|
|
|
TTestType tTestType = randomFrom(TTestType.values());
|
|
|
testCase(new MatchAllDocsQuery(), tTestType, iw -> {
|
|
@@ -434,12 +476,12 @@ public class TTestAggregatorTests extends AggregatorTestCase {
|
|
|
a(fieldInA ? a : b).b(fieldInA ? b : a).testType(tTestType);
|
|
|
|
|
|
testCase(aggregationBuilder, new MatchAllDocsQuery(), iw -> {
|
|
|
- iw.addDocument(singleton(new NumericDocValuesField("field", 1)));
|
|
|
- iw.addDocument(singleton(new NumericDocValuesField("field", 2)));
|
|
|
- iw.addDocument(singleton(new NumericDocValuesField("field", 3)));
|
|
|
- }, (Consumer<InternalTTest>) tTest -> {
|
|
|
- assertEquals(tTestType == TTestType.PAIRED ? 0 : 0.5733922538, tTest.getValue(), 0.000001);
|
|
|
- }, fieldType);
|
|
|
+ iw.addDocument(singleton(new NumericDocValuesField("field", 1)));
|
|
|
+ iw.addDocument(singleton(new NumericDocValuesField("field", 2)));
|
|
|
+ iw.addDocument(singleton(new NumericDocValuesField("field", 3)));
|
|
|
+ }, (Consumer<InternalTTest>) tTest -> {
|
|
|
+ assertEquals(tTestType == TTestType.PAIRED ? 0 : 0.5733922538, tTest.getValue(), 0.000001);
|
|
|
+ }, fieldType);
|
|
|
}
|
|
|
|
|
|
public void testPaired() throws IOException {
|
|
@@ -492,7 +534,6 @@ public class TTestAggregatorTests extends AggregatorTestCase {
|
|
|
}, fieldType1, fieldType2);
|
|
|
}
|
|
|
|
|
|
-
|
|
|
public void testHeteroscedastic() throws IOException {
|
|
|
MappedFieldType fieldType1 = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.INTEGER);
|
|
|
fieldType1.setName("a");
|
|
@@ -520,6 +561,93 @@ public class TTestAggregatorTests extends AggregatorTestCase {
|
|
|
}, fieldType1, fieldType2);
|
|
|
}
|
|
|
|
|
|
+ public void testFiltered() throws IOException {
|
|
|
+ TTestType tTestType = randomFrom(TTestType.values());
|
|
|
+ MappedFieldType fieldType1 = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.INTEGER);
|
|
|
+ fieldType1.setName("a");
|
|
|
+ MappedFieldType fieldType2 = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.INTEGER);
|
|
|
+ fieldType2.setName("b");
|
|
|
+ TTestAggregationBuilder aggregationBuilder = new TTestAggregationBuilder("t_test")
|
|
|
+ .a(new MultiValuesSourceFieldConfig.Builder().setFieldName("a").setFilter(QueryBuilders.termQuery("b", 1)).build())
|
|
|
+ .b(new MultiValuesSourceFieldConfig.Builder().setFieldName("a").setFilter(QueryBuilders.termQuery("b", 2)).build())
|
|
|
+ .testType(tTestType);
|
|
|
+ int tails = randomIntBetween(1, 2);
|
|
|
+ if (tails == 1 || randomBoolean()) {
|
|
|
+ aggregationBuilder.tails(tails);
|
|
|
+ }
|
|
|
+ CheckedConsumer<RandomIndexWriter, IOException> buildIndex = iw -> {
|
|
|
+ iw.addDocument(asList(new NumericDocValuesField("a", 102), new IntPoint("b", 1)));
|
|
|
+ iw.addDocument(asList(new NumericDocValuesField("a", 99), new IntPoint("b", 1)));
|
|
|
+ iw.addDocument(asList(new NumericDocValuesField("a", 111), new IntPoint("b", 1)));
|
|
|
+ iw.addDocument(asList(new NumericDocValuesField("a", 97), new IntPoint("b", 1)));
|
|
|
+ iw.addDocument(asList(new NumericDocValuesField("a", 101), new IntPoint("b", 1)));
|
|
|
+ iw.addDocument(asList(new NumericDocValuesField("a", 99), new IntPoint("b", 1)));
|
|
|
+
|
|
|
+ iw.addDocument(asList(new NumericDocValuesField("a", 89), new IntPoint("b", 2)));
|
|
|
+ iw.addDocument(asList(new NumericDocValuesField("a", 93), new IntPoint("b", 2)));
|
|
|
+ iw.addDocument(asList(new NumericDocValuesField("a", 72), new IntPoint("b", 2)));
|
|
|
+ iw.addDocument(asList(new NumericDocValuesField("a", 98), new IntPoint("b", 2)));
|
|
|
+ iw.addDocument(asList(new NumericDocValuesField("a", 102), new IntPoint("b", 2)));
|
|
|
+ iw.addDocument(asList(new NumericDocValuesField("a", 98), new IntPoint("b", 2)));
|
|
|
+
|
|
|
+ iw.addDocument(asList(new NumericDocValuesField("a", 189), new IntPoint("b", 3)));
|
|
|
+ iw.addDocument(asList(new NumericDocValuesField("a", 193), new IntPoint("b", 3)));
|
|
|
+ iw.addDocument(asList(new NumericDocValuesField("a", 172), new IntPoint("b", 3)));
|
|
|
+ iw.addDocument(asList(new NumericDocValuesField("a", 198), new IntPoint("b", 3)));
|
|
|
+ iw.addDocument(asList(new NumericDocValuesField("a", 1102), new IntPoint("b", 3)));
|
|
|
+ iw.addDocument(asList(new NumericDocValuesField("a", 198), new IntPoint("b", 3)));
|
|
|
+ };
|
|
|
+ if (tTestType == TTestType.PAIRED) {
|
|
|
+ IllegalArgumentException ex = expectThrows(IllegalArgumentException.class, () ->
|
|
|
+ testCase(aggregationBuilder, new MatchAllDocsQuery(), buildIndex, tTest -> fail("Should have thrown exception"),
|
|
|
+ fieldType1, fieldType2)
|
|
|
+ );
|
|
|
+ assertEquals("Paired t-test doesn't support filters", ex.getMessage());
|
|
|
+ } else {
|
|
|
+ testCase(aggregationBuilder, new MatchAllDocsQuery(), buildIndex, (Consumer<InternalTTest>) ttest -> {
|
|
|
+ if (tTestType == TTestType.HOMOSCEDASTIC) {
|
|
|
+ assertEquals(0.03928288693 * tails, ttest.getValue(), 0.00001);
|
|
|
+ } else {
|
|
|
+ assertEquals(0.04538666214 * tails, ttest.getValue(), 0.00001);
|
|
|
+ }
|
|
|
+ }, fieldType1, fieldType2);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ public void testFilterByFilterOrScript() throws IOException {
|
|
|
+ boolean fieldInA = randomBoolean();
|
|
|
+ TTestType tTestType = randomFrom(TTestType.HOMOSCEDASTIC, TTestType.HETEROSCEDASTIC);
|
|
|
+
|
|
|
+ MappedFieldType fieldType1 = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.INTEGER);
|
|
|
+ fieldType1.setName("field");
|
|
|
+ MappedFieldType fieldType2 = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.INTEGER);
|
|
|
+ fieldType2.setName("term");
|
|
|
+
|
|
|
+ boolean filterTermOne = randomBoolean();
|
|
|
+
|
|
|
+ MultiValuesSourceFieldConfig.Builder a = new MultiValuesSourceFieldConfig.Builder().setFieldName("field").setFilter(
|
|
|
+ QueryBuilders.termQuery("term", filterTermOne? 1 : 2)
|
|
|
+ );
|
|
|
+ MultiValuesSourceFieldConfig.Builder b = new MultiValuesSourceFieldConfig.Builder().setScript(
|
|
|
+ new Script(ScriptType.INLINE, MockScriptEngine.NAME, TERM_FILTERING, Collections.singletonMap("term", filterTermOne? 2 : 1))
|
|
|
+ );
|
|
|
+
|
|
|
+ TTestAggregationBuilder aggregationBuilder = new TTestAggregationBuilder("t_test").
|
|
|
+ a(fieldInA ? a.build() : b.build()).b(fieldInA ? b.build() : a.build()).testType(tTestType);
|
|
|
+
|
|
|
+ testCase(aggregationBuilder, new MatchAllDocsQuery(), iw -> {
|
|
|
+ iw.addDocument(asList(new NumericDocValuesField("field", 1), new IntPoint("term", 1), new NumericDocValuesField("term", 1)));
|
|
|
+ iw.addDocument(asList(new NumericDocValuesField("field", 2), new IntPoint("term", 1), new NumericDocValuesField("term", 1)));
|
|
|
+ iw.addDocument(asList(new NumericDocValuesField("field", 3), new IntPoint("term", 1), new NumericDocValuesField("term", 1)));
|
|
|
+
|
|
|
+ iw.addDocument(asList(new NumericDocValuesField("field", 4), new IntPoint("term", 2), new NumericDocValuesField("term", 2)));
|
|
|
+ iw.addDocument(asList(new NumericDocValuesField("field", 5), new IntPoint("term", 2), new NumericDocValuesField("term", 2)));
|
|
|
+ iw.addDocument(asList(new NumericDocValuesField("field", 6), new IntPoint("term", 2), new NumericDocValuesField("term", 2)));
|
|
|
+ }, (Consumer<InternalTTest>) tTest -> {
|
|
|
+ assertEquals(0.02131164113, tTest.getValue(), 0.000001);
|
|
|
+ }, fieldType1, fieldType2);
|
|
|
+ }
|
|
|
+
|
|
|
private void testCase(Query query, TTestType type,
|
|
|
CheckedConsumer<RandomIndexWriter, IOException> buildIndex,
|
|
|
Consumer<InternalTTest> verify) throws IOException {
|