1
0
Эх сурвалжийг харах

Scripting: Add Multi-Valued Field Methods to Expressions

Add methods to operate on multi-valued fields in the expressions language.
Note that users will still not be able to access individual values
within a multi-valued field.

The following methods will be included:

* min
* max
* avg
* median
* count
* sum

Additionally, changes have been made to MultiValueMode to support the
new median method.

closes #11105
Jack Conradson 10 жил өмнө
parent
commit
a5c0ac0d67

+ 2 - 0
docs/reference/search/request/sort.asciidoc

@@ -50,6 +50,8 @@ to. The `mode` option can have the following values:
         number based array fields.
 `avg`:: Use the average of all values as sort value. Only applicable
         for number based array fields.
+`median`:: Use the median of all values as sort value.  Only applicable
+           for number based array fields.
 
 ===== Sort mode example usage
 

+ 44 - 0
src/main/java/org/elasticsearch/script/expression/CountMethodFunctionValues.java

@@ -0,0 +1,44 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.script.expression;
+
+import org.apache.lucene.queries.function.ValueSource;
+import org.apache.lucene.queries.function.docvalues.DoubleDocValues;
+import org.elasticsearch.index.fielddata.AtomicNumericFieldData;
+import org.elasticsearch.index.fielddata.SortedNumericDoubleValues;
+
+/**
+ * FunctionValues to get the count of the number of values in a field for a document.
+ */
+public class CountMethodFunctionValues extends DoubleDocValues {
+    SortedNumericDoubleValues values;
+
+    CountMethodFunctionValues(ValueSource parent, AtomicNumericFieldData fieldData) {
+        super(parent);
+
+        values = fieldData.getDoubleValues();
+    }
+
+    @Override
+    public double doubleVal(int doc) {
+        values.setDocument(doc);
+        return values.count();
+    }
+}

+ 73 - 0
src/main/java/org/elasticsearch/script/expression/CountMethodValueSource.java

@@ -0,0 +1,73 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.script.expression;
+
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.queries.function.FunctionValues;
+import org.apache.lucene.queries.function.ValueSource;
+import org.elasticsearch.index.fielddata.AtomicFieldData;
+import org.elasticsearch.index.fielddata.AtomicNumericFieldData;
+import org.elasticsearch.index.fielddata.IndexFieldData;
+import org.elasticsearch.search.MultiValueMode;
+
+import java.io.IOException;
+import java.util.Map;
+import java.util.Objects;
+
+/**
+ * A ValueSource to create FunctionValues to get the count of the number of values in a field for a document.
+ */
+public class CountMethodValueSource extends ValueSource {
+    protected IndexFieldData<?> fieldData;
+
+    protected CountMethodValueSource(IndexFieldData<?> fieldData) {
+        Objects.requireNonNull(fieldData);
+
+        this.fieldData = fieldData;
+    }
+
+    @Override
+    public FunctionValues getValues(Map context, LeafReaderContext leaf) throws IOException {
+        AtomicFieldData leafData = fieldData.load(leaf);
+        assert(leafData instanceof AtomicNumericFieldData);
+
+        return new CountMethodFunctionValues(this, (AtomicNumericFieldData)leafData);
+    }
+
+    @Override
+    public boolean equals(Object o) {
+        if (this == o) return true;
+        if (o == null || getClass() != o.getClass()) return false;
+
+        FieldDataValueSource that = (FieldDataValueSource) o;
+
+        return fieldData.equals(that.fieldData);
+    }
+
+    @Override
+    public int hashCode() {
+        return fieldData.hashCode();
+    }
+
+    @Override
+    public String description() {
+        return "count: field(" + fieldData.getFieldNames().toString() + ")";
+    }
+}

+ 3 - 2
src/main/java/org/elasticsearch/script/expression/DateMethodFunctionValues.java

@@ -25,13 +25,14 @@ import java.util.TimeZone;
 
 import org.apache.lucene.queries.function.ValueSource;
 import org.elasticsearch.index.fielddata.AtomicNumericFieldData;
+import org.elasticsearch.search.MultiValueMode;
 
 class DateMethodFunctionValues extends FieldDataFunctionValues {
     private final int calendarType;
     private final Calendar calendar;
 
-    DateMethodFunctionValues(ValueSource parent, AtomicNumericFieldData data, int calendarType) {
-        super(parent, data);
+    DateMethodFunctionValues(ValueSource parent, MultiValueMode multiValueMode,  AtomicNumericFieldData data, int calendarType) {
+        super(parent, multiValueMode, data);
 
         this.calendarType = calendarType;
         calendar = Calendar.getInstance(TimeZone.getTimeZone("UTC"), Locale.ROOT);

+ 16 - 15
src/main/java/org/elasticsearch/script/expression/DateMethodValueSource.java

@@ -29,14 +29,15 @@ import org.apache.lucene.queries.function.FunctionValues;
 import org.elasticsearch.index.fielddata.AtomicFieldData;
 import org.elasticsearch.index.fielddata.AtomicNumericFieldData;
 import org.elasticsearch.index.fielddata.IndexFieldData;
+import org.elasticsearch.search.MultiValueMode;
 
 class DateMethodValueSource extends FieldDataValueSource {
 
     protected final String methodName;
     protected final int calendarType;
 
-    DateMethodValueSource(IndexFieldData<?> indexFieldData, String methodName, int calendarType) {
-        super(indexFieldData);
+    DateMethodValueSource(IndexFieldData<?> indexFieldData, MultiValueMode multiValueMode, String methodName, int calendarType) {
+        super(indexFieldData, multiValueMode);
 
         Objects.requireNonNull(methodName);
 
@@ -44,6 +45,19 @@ class DateMethodValueSource extends FieldDataValueSource {
         this.calendarType = calendarType;
     }
 
+    @Override
+    public FunctionValues getValues(Map context, LeafReaderContext leaf) throws IOException {
+        AtomicFieldData leafData = fieldData.load(leaf);
+        assert(leafData instanceof AtomicNumericFieldData);
+
+        return new DateMethodFunctionValues(this, multiValueMode, (AtomicNumericFieldData)leafData, calendarType);
+    }
+
+    @Override
+    public String description() {
+        return methodName + ": field(" + fieldData.getFieldNames().toString() + ")";
+    }
+
     @Override
     public boolean equals(Object o) {
         if (this == o) return true;
@@ -64,17 +78,4 @@ class DateMethodValueSource extends FieldDataValueSource {
         result = 31 * result + calendarType;
         return result;
     }
-
-    @Override
-    public FunctionValues getValues(Map context, LeafReaderContext leaf) throws IOException {
-        AtomicFieldData leafData = fieldData.load(leaf);
-        assert(leafData instanceof AtomicNumericFieldData);
-
-        return new DateMethodFunctionValues(this, (AtomicNumericFieldData)leafData, calendarType);
-    }
-
-    @Override
-    public String description() {
-        return methodName + ": field(" + fieldData.getFieldNames().toString() + ")";
-    }
 }

+ 22 - 2
src/main/java/org/elasticsearch/script/expression/ExpressionScriptEngineService.java

@@ -39,6 +39,7 @@ import org.elasticsearch.script.CompiledScript;
 import org.elasticsearch.script.ExecutableScript;
 import org.elasticsearch.script.ScriptEngineService;
 import org.elasticsearch.script.SearchScript;
+import org.elasticsearch.search.MultiValueMode;
 import org.elasticsearch.search.lookup.SearchLookup;
 
 import java.text.ParseException;
@@ -60,6 +61,13 @@ public class ExpressionScriptEngineService extends AbstractComponent implements
     protected static final String GET_MINUTES_METHOD      = "getMinutes";
     protected static final String GET_SECONDS_METHOD      = "getSeconds";
 
+    protected static final String MINIMUM_METHOD          = "min";
+    protected static final String MAXIMUM_METHOD          = "max";
+    protected static final String AVERAGE_METHOD          = "avg";
+    protected static final String MEDIAN_METHOD           = "median";
+    protected static final String SUM_METHOD              = "sum";
+    protected static final String COUNT_METHOD            = "count";
+
     @Inject
     public ExpressionScriptEngineService(Settings settings) {
         super(settings);
@@ -156,7 +164,7 @@ public class ExpressionScriptEngineService extends AbstractComponent implements
 
                 IndexFieldData<?> fieldData = lookup.doc().fieldDataService().getForField((NumberFieldMapper)field);
                 if (methodname == null) {
-                    bindings.add(variable, new FieldDataValueSource(fieldData));
+                    bindings.add(variable, new FieldDataValueSource(fieldData, MultiValueMode.MIN));
                 } else {
                     bindings.add(variable, getMethodValueSource(field, fieldData, fieldname, methodname));
                 }
@@ -180,6 +188,18 @@ public class ExpressionScriptEngineService extends AbstractComponent implements
                 return getDateMethodValueSource(field, fieldData, fieldName, methodName, Calendar.MINUTE);
             case GET_SECONDS_METHOD:
                 return getDateMethodValueSource(field, fieldData, fieldName, methodName, Calendar.SECOND);
+            case MINIMUM_METHOD:
+                return new FieldDataValueSource(fieldData, MultiValueMode.MIN);
+            case MAXIMUM_METHOD:
+                return new FieldDataValueSource(fieldData, MultiValueMode.MAX);
+            case AVERAGE_METHOD:
+                return new FieldDataValueSource(fieldData, MultiValueMode.AVG);
+            case MEDIAN_METHOD:
+                return new FieldDataValueSource(fieldData, MultiValueMode.MEDIAN);
+            case SUM_METHOD:
+                return new FieldDataValueSource(fieldData, MultiValueMode.SUM);
+            case COUNT_METHOD:
+                return new CountMethodValueSource(fieldData);
             default:
                 throw new IllegalArgumentException("Member method [" + methodName + "] does not exist.");
         }
@@ -190,7 +210,7 @@ public class ExpressionScriptEngineService extends AbstractComponent implements
             throw new IllegalArgumentException("Member method [" + methodName + "] can only be used with a date field type, not the field [" + fieldName + "].");
         }
 
-        return new DateMethodValueSource(fieldData, methodName, calendarType);
+        return new DateMethodValueSource(fieldData, MultiValueMode.MIN, methodName, calendarType);
     }
 
     @Override

+ 2 - 2
src/main/java/org/elasticsearch/script/expression/FieldDataFunctionValues.java

@@ -31,9 +31,9 @@ import org.elasticsearch.search.MultiValueMode;
 class FieldDataFunctionValues extends DoubleDocValues {
     NumericDoubleValues dataAccessor;
 
-    FieldDataFunctionValues(ValueSource parent, AtomicNumericFieldData d) {
+    FieldDataFunctionValues(ValueSource parent, MultiValueMode m, AtomicNumericFieldData d) {
         super(parent);
-        dataAccessor = MultiValueMode.MIN.select(d.getDoubleValues(), 0d);
+        dataAccessor = m.select(d.getDoubleValues(), 0d);
     }
 
     @Override

+ 18 - 10
src/main/java/org/elasticsearch/script/expression/FieldDataValueSource.java

@@ -25,6 +25,7 @@ import org.apache.lucene.queries.function.ValueSource;
 import org.elasticsearch.index.fielddata.AtomicFieldData;
 import org.elasticsearch.index.fielddata.AtomicNumericFieldData;
 import org.elasticsearch.index.fielddata.IndexFieldData;
+import org.elasticsearch.search.MultiValueMode;
 
 import java.io.IOException;
 import java.util.Map;
@@ -36,18 +37,14 @@ import java.util.Objects;
 class FieldDataValueSource extends ValueSource {
 
     protected IndexFieldData<?> fieldData;
+    protected MultiValueMode multiValueMode;
 
-    protected FieldDataValueSource(IndexFieldData<?> d) {
+    protected FieldDataValueSource(IndexFieldData<?> d, MultiValueMode m) {
         Objects.requireNonNull(d);
+        Objects.requireNonNull(m);
 
         fieldData = d;
-    }
-
-    @Override
-    public FunctionValues getValues(Map context, LeafReaderContext leaf) throws IOException {
-        AtomicFieldData leafData = fieldData.load(leaf);
-        assert(leafData instanceof AtomicNumericFieldData);
-        return new FieldDataFunctionValues(this, (AtomicNumericFieldData)leafData);
+        multiValueMode = m;
     }
 
     @Override
@@ -57,12 +54,23 @@ class FieldDataValueSource extends ValueSource {
 
         FieldDataValueSource that = (FieldDataValueSource) o;
 
-        return fieldData.equals(that.fieldData);
+        if (!fieldData.equals(that.fieldData)) return false;
+        return multiValueMode == that.multiValueMode;
+
     }
 
     @Override
     public int hashCode() {
-        return fieldData.hashCode();
+        int result = fieldData.hashCode();
+        result = 31 * result + multiValueMode.hashCode();
+        return result;
+    }
+
+    @Override
+    public FunctionValues getValues(Map context, LeafReaderContext leaf) throws IOException {
+        AtomicFieldData leafData = fieldData.load(leaf);
+        assert(leafData instanceof AtomicNumericFieldData);
+        return new FieldDataFunctionValues(this, multiValueMode, (AtomicNumericFieldData)leafData);
     }
 
     @Override

+ 47 - 2
src/main/java/org/elasticsearch/search/MultiValueMode.java

@@ -20,6 +20,7 @@
 
 package org.elasticsearch.search;
 
+import javafx.collections.transformation.SortedList;
 import org.apache.lucene.index.*;
 import org.apache.lucene.search.DocIdSet;
 import org.apache.lucene.search.DocIdSetIterator;
@@ -105,6 +106,46 @@ public enum MultiValueMode {
         }
     },
 
+    /**
+     * Median of the values.
+     *
+     * Note that apply/reduce do not work with MED since median cannot be derived from
+     * an accumulator algorithm without using internal memory.
+     */
+    MEDIAN {
+        @Override
+        protected long pick(SortedNumericDocValues values, long missingValue) {
+            int count = values.count();
+            if (count > 0) {
+                if (count % 2 == 0) {
+                    count /= 2;
+                    return (values.valueAt(count - 1) + values.valueAt(count))/2;
+                } else {
+                    count /= 2;
+                    return values.valueAt(count);
+                }
+            } else {
+                return missingValue;
+            }
+        }
+
+        @Override
+        protected double pick(SortedNumericDoubleValues values, double missingValue) {
+            int count = values.count();
+            if (count > 0) {
+                if (count % 2 == 0) {
+                    count /= 2;
+                    return (values.valueAt(count - 1) + values.valueAt(count))/2;
+                } else {
+                    count /= 2;
+                    return values.valueAt(count);
+                }
+            } else {
+                return missingValue;
+            }
+        }
+    },
+
     /**
      * Pick the lowest value.
      */
@@ -288,7 +329,9 @@ public enum MultiValueMode {
      * @param b another argument
      * @return the result of the function.
      */
-    public abstract double apply(double a, double b);
+    public double apply(double a, double b) {
+        throw new UnsupportedOperationException();
+    }
 
     /**
      * Applies the sort mode and returns the result. This method is meant to be
@@ -302,7 +345,9 @@ public enum MultiValueMode {
      * @param b another argument
      * @return the result of the function.
      */
-    public abstract long apply(long a, long b);
+    public long apply(long a, long b) {
+        throw new UnsupportedOperationException();
+    }
 
     public int applyOrd(int ord1, int ord2) {
         throw new UnsupportedOperationException();

+ 66 - 0
src/test/java/org/elasticsearch/script/expression/ExpressionScriptTests.java

@@ -125,6 +125,72 @@ public class ExpressionScriptTests extends ElasticsearchIntegrationTest {
         assertEquals(1983.0, hits.getAt(1).field("foo").getValue());
     }
 
+    public void testMultiValueMethods() throws Exception {
+        ElasticsearchAssertions.assertAcked(prepareCreate("test").addMapping("doc", "double0", "type=double", "double1", "type=double"));
+        ensureGreen("test");
+        indexRandom(true,
+                client().prepareIndex("test", "doc", "1").setSource("double0", "5.0", "double0", "1.0", "double0", "1.5", "double1", "1.2", "double1", "2.4"),
+                client().prepareIndex("test", "doc", "2").setSource("double0", "5.0", "double1", "3.0"),
+                client().prepareIndex("test", "doc", "3").setSource("double0", "5.0", "double0", "1.0", "double0", "1.5", "double0", "-1.5", "double1", "4.0"));
+
+
+        SearchResponse rsp = buildRequest("doc['double0'].count() + doc['double1'].count()").get();
+        assertSearchResponse(rsp);
+        SearchHits hits = rsp.getHits();
+        assertEquals(3, hits.getTotalHits());
+        assertEquals(5.0, hits.getAt(0).field("foo").getValue());
+        assertEquals(2.0, hits.getAt(1).field("foo").getValue());
+        assertEquals(5.0, hits.getAt(2).field("foo").getValue());
+
+        rsp = buildRequest("doc['double0'].sum()").get();
+        assertSearchResponse(rsp);
+        hits = rsp.getHits();
+        assertEquals(3, hits.getTotalHits());
+        assertEquals(7.5, hits.getAt(0).field("foo").getValue());
+        assertEquals(5.0, hits.getAt(1).field("foo").getValue());
+        assertEquals(6.0, hits.getAt(2).field("foo").getValue());
+
+        rsp = buildRequest("doc['double0'].avg() + doc['double1'].avg()").get();
+        assertSearchResponse(rsp);
+        hits = rsp.getHits();
+        assertEquals(3, hits.getTotalHits());
+        assertEquals(4.3, hits.getAt(0).field("foo").getValue());
+        assertEquals(8.0, hits.getAt(1).field("foo").getValue());
+        assertEquals(5.5, hits.getAt(2).field("foo").getValue());
+
+        rsp = buildRequest("doc['double0'].median()").get();
+        assertSearchResponse(rsp);
+        hits = rsp.getHits();
+        assertEquals(3, hits.getTotalHits());
+        assertEquals(1.5, hits.getAt(0).field("foo").getValue());
+        assertEquals(5.0, hits.getAt(1).field("foo").getValue());
+        assertEquals(1.25, hits.getAt(2).field("foo").getValue());
+
+        rsp = buildRequest("doc['double0'].min()").get();
+        assertSearchResponse(rsp);
+        hits = rsp.getHits();
+        assertEquals(3, hits.getTotalHits());
+        assertEquals(1.0, hits.getAt(0).field("foo").getValue());
+        assertEquals(5.0, hits.getAt(1).field("foo").getValue());
+        assertEquals(-1.5, hits.getAt(2).field("foo").getValue());
+
+        rsp = buildRequest("doc['double0'].max()").get();
+        assertSearchResponse(rsp);
+        hits = rsp.getHits();
+        assertEquals(3, hits.getTotalHits());
+        assertEquals(5.0, hits.getAt(0).field("foo").getValue());
+        assertEquals(5.0, hits.getAt(1).field("foo").getValue());
+        assertEquals(5.0, hits.getAt(2).field("foo").getValue());
+
+        rsp = buildRequest("doc['double0'].sum()/doc['double0'].count()").get();
+        assertSearchResponse(rsp);
+        hits = rsp.getHits();
+        assertEquals(3, hits.getTotalHits());
+        assertEquals(2.5, hits.getAt(0).field("foo").getValue());
+        assertEquals(5.0, hits.getAt(1).field("foo").getValue());
+        assertEquals(1.5, hits.getAt(2).field("foo").getValue());
+    }
+
     public void testInvalidDateMethodCall() throws Exception {
         ElasticsearchAssertions.assertAcked(prepareCreate("test").addMapping("doc", "double", "type=double"));
         ensureGreen("test");

+ 14 - 0
src/test/java/org/elasticsearch/search/MultiValueModeTests.java

@@ -32,6 +32,8 @@ import org.elasticsearch.test.ElasticsearchTestCase;
 
 import java.io.IOException;
 import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Set;
 
 public class MultiValueModeTests extends ElasticsearchTestCase {
 
@@ -122,6 +124,9 @@ public class MultiValueModeTests extends ElasticsearchTestCase {
     private void verify(SortedNumericDocValues values, int maxDoc) {
         for (long missingValue : new long[] { 0, randomLong() }) {
             for (MultiValueMode mode : MultiValueMode.values()) {
+                if (MultiValueMode.MEDIAN.equals(mode)) {
+                    continue;
+                }
                 final NumericDocValues selected = mode.select(values, missingValue);
                 for (int i = 0; i < maxDoc; ++i) {
                     final long actual = selected.get(i);
@@ -147,6 +152,9 @@ public class MultiValueModeTests extends ElasticsearchTestCase {
     private void verify(SortedNumericDocValues values, int maxDoc, FixedBitSet rootDocs, FixedBitSet innerDocs) throws IOException {
         for (long missingValue : new long[] { 0, randomLong() }) {
             for (MultiValueMode mode : MultiValueMode.values()) {
+                if (MultiValueMode.MEDIAN.equals(mode)) {
+                    continue;
+                }
                 final NumericDocValues selected = mode.select(values, missingValue, rootDocs, new BitDocIdSet(innerDocs), maxDoc);
                 int prevRoot = -1;
                 for (int root = rootDocs.nextSetBit(0); root != -1; root = root + 1 < maxDoc ? rootDocs.nextSetBit(root + 1) : -1) {
@@ -239,6 +247,9 @@ public class MultiValueModeTests extends ElasticsearchTestCase {
     private void verify(SortedNumericDoubleValues values, int maxDoc) {
         for (long missingValue : new long[] { 0, randomLong() }) {
             for (MultiValueMode mode : MultiValueMode.values()) {
+                if (MultiValueMode.MEDIAN.equals(mode)) {
+                    continue;
+                }
                 final NumericDoubleValues selected = mode.select(values, missingValue);
                 for (int i = 0; i < maxDoc; ++i) {
                     final double actual = selected.get(i);
@@ -264,6 +275,9 @@ public class MultiValueModeTests extends ElasticsearchTestCase {
     private void verify(SortedNumericDoubleValues values, int maxDoc, FixedBitSet rootDocs, FixedBitSet innerDocs) throws IOException {
         for (long missingValue : new long[] { 0, randomLong() }) {
             for (MultiValueMode mode : MultiValueMode.values()) {
+                if (MultiValueMode.MEDIAN.equals(mode)) {
+                    continue;
+                }
                 final NumericDoubleValues selected = mode.select(values, missingValue, rootDocs, new BitDocIdSet(innerDocs), maxDoc);
                 int prevRoot = -1;
                 for (int root = rootDocs.nextSetBit(0); root != -1; root = root + 1 < maxDoc ? rootDocs.nextSetBit(root + 1) : -1) {