فهرست منبع

Move test of custom sig heuristic to plugin (#50891)

This moves the testing of custom significance heuristic plugins from an
`ESIntegTestCase` to an example plugin. This is *much* more "real" and
can be used as an example for anyone that needs to actually build such a
plugin. The old test had testing concerns and the example all jumbled
together.
Nik Everett 5 سال پیش
والد
کامیت
2762375dc4

+ 28 - 0
plugins/examples/custom-significance-heuristic/build.gradle

@@ -0,0 +1,28 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+apply plugin: 'elasticsearch.testclusters'
+apply plugin: 'elasticsearch.esplugin'
+
+esplugin {
+  name 'custom-significance-heuristic'
+  description 'An example plugin showing how to write and register a custom significance heuristic'
+  classname 'org.elasticsearch.example.customsigheuristic.CustomSignificanceHeuristicPlugin'
+  licenseFile rootProject.file('licenses/APACHE-LICENSE-2.0.txt')
+  noticeFile rootProject.file('NOTICE.txt')
+}

+ 38 - 0
plugins/examples/custom-significance-heuristic/src/main/java/org/elasticsearch/example/customsigheuristic/CustomSignificanceHeuristicPlugin.java

@@ -0,0 +1,38 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.example.customsigheuristic;
+
+import org.elasticsearch.plugins.Plugin;
+import org.elasticsearch.plugins.SearchPlugin;
+import org.elasticsearch.search.aggregations.bucket.significant.heuristics.SignificanceHeuristic;
+
+import java.util.List;
+
+import static java.util.Collections.singletonList;
+
+/**
+ * Plugin declaring a custom {@link SignificanceHeuristic}.
+ */
+public class CustomSignificanceHeuristicPlugin extends Plugin implements SearchPlugin {
+    @Override
+    public List<SignificanceHeuristicSpec<?>> getSignificanceHeuristics() {
+        return singletonList(new SignificanceHeuristicSpec<>(SimpleHeuristic.NAME, SimpleHeuristic::new, SimpleHeuristic.PARSER));
+    }
+}

+ 90 - 0
plugins/examples/custom-significance-heuristic/src/main/java/org/elasticsearch/example/customsigheuristic/SimpleHeuristic.java

@@ -0,0 +1,90 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.example.customsigheuristic;
+
+import org.elasticsearch.common.io.stream.StreamInput;
+import org.elasticsearch.common.io.stream.StreamOutput;
+import org.elasticsearch.common.xcontent.ObjectParser;
+import org.elasticsearch.common.xcontent.XContentBuilder;
+import org.elasticsearch.search.aggregations.bucket.significant.heuristics.SignificanceHeuristic;
+
+import java.io.IOException;
+
+/**
+ * A simple {@linkplain SignificanceHeuristic} used an example of declaring a custom heuristic.
+ */
+public class SimpleHeuristic extends SignificanceHeuristic {
+    public static final String NAME = "simple";
+    public static final ObjectParser<SimpleHeuristic, Void> PARSER = new ObjectParser<>(NAME, SimpleHeuristic::new);
+
+    public SimpleHeuristic() {
+    }
+
+    /**
+     * Read from a stream.
+     */
+    public SimpleHeuristic(StreamInput in) throws IOException {
+        // Nothing to read
+    }
+
+    @Override
+    public void writeTo(StreamOutput out) throws IOException {
+        // Nothing to write
+    }
+
+    @Override
+    public String getWriteableName() {
+        return NAME;
+    }
+
+    @Override
+    public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
+        builder.startObject(NAME).endObject();
+        return builder;
+    }
+
+    @Override
+    public int hashCode() {
+        return 1;
+    }
+
+    @Override
+    public boolean equals(Object obj) {
+        if (obj == null) {
+            return false;
+        }
+        if (getClass() != obj.getClass()) {
+            return false;
+        }
+        return true;
+    }
+
+    /**
+     * @param subsetFreq   The frequency of the term in the selected sample
+     * @param subsetSize   The size of the selected sample (typically number of docs)
+     * @param supersetFreq The frequency of the term in the superset from which the sample was taken
+     * @param supersetSize The size of the superset from which the sample was taken  (typically number of docs)
+     * @return a "significance" score
+     */
+    @Override
+    public double getScore(long subsetFreq, long subsetSize, long supersetFreq, long supersetSize) {
+        return subsetFreq / subsetSize > supersetFreq / supersetSize ? 2.0 : 1.0;
+    }
+}

+ 36 - 0
plugins/examples/custom-significance-heuristic/src/test/java/org/elasticsearch/example/customsigheuristic/CustomSignificanceHeuristicClientYamlTestSuiteIT.java

@@ -0,0 +1,36 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.example.customsigheuristic;
+
+import com.carrotsearch.randomizedtesting.annotations.Name;
+import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
+import org.elasticsearch.test.rest.yaml.ClientYamlTestCandidate;
+import org.elasticsearch.test.rest.yaml.ESClientYamlSuiteTestCase;
+
+public class CustomSignificanceHeuristicClientYamlTestSuiteIT extends ESClientYamlSuiteTestCase {
+    public CustomSignificanceHeuristicClientYamlTestSuiteIT(@Name("yaml") ClientYamlTestCandidate testCandidate) {
+        super(testCandidate);
+    }
+
+    @ParametersFactory
+    public static Iterable<Object[]> parameters() throws Exception {
+        return ESClientYamlSuiteTestCase.createParameters();
+    }
+}

+ 57 - 0
plugins/examples/custom-significance-heuristic/src/test/java/org/elasticsearch/example/customsigheuristic/SimpleHeuristicWireTests.java

@@ -0,0 +1,57 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.example.customsigheuristic;
+
+import org.elasticsearch.common.io.stream.Writeable.Reader;
+import org.elasticsearch.common.xcontent.XContentParser;
+import org.elasticsearch.common.xcontent.XContentParser.Token;
+import org.elasticsearch.test.AbstractSerializingTestCase;
+
+import java.io.IOException;
+
+import static org.hamcrest.Matchers.equalTo;
+
+public class SimpleHeuristicWireTests extends AbstractSerializingTestCase<SimpleHeuristic> {
+    @Override
+    protected SimpleHeuristic doParseInstance(XContentParser parser) throws IOException {
+        /* Because Heuristics are XContent "fragments" we need to throw away
+         * the "extra" stuff before calling the parser. */
+        parser.nextToken();
+        assertThat(parser.currentToken(), equalTo(Token.START_OBJECT));
+        parser.nextToken();
+        assertThat(parser.currentToken(), equalTo(Token.FIELD_NAME));
+        assertThat(parser.currentName(), equalTo("simple"));
+        parser.nextToken();
+        SimpleHeuristic h = SimpleHeuristic.PARSER.apply(parser, null);
+        assertThat(parser.currentToken(), equalTo(Token.END_OBJECT));
+        parser.nextToken();
+        return h;
+    }
+
+    @Override
+    protected Reader<SimpleHeuristic> instanceReader() {
+        return SimpleHeuristic::new;
+    }
+
+    @Override
+    protected SimpleHeuristic createTestInstance() {
+        return new SimpleHeuristic();
+    }
+}

+ 16 - 0
plugins/examples/custom-significance-heuristic/src/test/resources/rest-api-spec/test/custom-significance-heuristic/10_basic.yml

@@ -0,0 +1,16 @@
+# tests that the custom suggester plugin is installed
+---
+"plugin loaded":
+    - skip:
+        reason: "contains is a newly added assertion"
+        features: contains
+
+    # Get master node id
+    - do:
+        cluster.state: {}
+    - set: { master_node: master }
+
+    - do:
+        nodes.info: {}
+
+    - contains: { nodes.$master.plugins: { name: custom-significance-heuristic } }

+ 121 - 0
plugins/examples/custom-significance-heuristic/src/test/resources/rest-api-spec/test/custom-significance-heuristic/20_custom_heuristic.yml

@@ -0,0 +1,121 @@
+setup:
+  - do:
+      indices.create:
+        index: test
+        body:
+          mappings:
+            properties:
+              text:
+                type: text
+                fielddata: true
+              long:
+                type: long
+  - do:
+      bulk:
+        refresh: true
+        index: test
+        body:
+          - '{"index": {"_id": "1"}}'
+          - '{"text": "foo", "long": 1, "class": 1}'
+          - '{"index": {"_id": "2"}}'
+          - '{"text": "foo", "long": 1, "class": 1}'
+          - '{"index": {"_id": "3"}}'
+          - '{"text": "bar", "long": 0, "class": 0}'
+          - '{"index": {"_id": "4"}}'
+          - '{"text": "bar", "long": 0, "class": 0}'
+          - '{"index": {"_id": "5"}}'
+          - '{"text": ["foo", "bar"], "long": [1, 0], "class": 1}'
+          - '{"index": {"_id": "6"}}'
+          - '{"text": ["foo", "bar"], "long": [1, 0], "class": 0}'
+          - '{"index": {"_id": "7"}}'
+          - '{"text": "bar", "long": 0, "class": 0}'
+
+---
+"test custom heuristic on significant_text":
+  - do:
+      search:
+        index: test
+        size: 0
+        body:
+          aggs:
+            class:
+              terms:
+                field: class
+                order: { _key: asc }
+              aggs:
+                sig:
+                  significant_text:
+                    field: text
+                    simple: {}
+                    min_doc_count: 1
+
+  - match: { aggregations.class.buckets.0.key: 0 }
+  - match: { aggregations.class.buckets.0.sig.buckets.0.key: bar }
+  - match: { aggregations.class.buckets.0.sig.buckets.0.score: 2.0 }
+  - match: { aggregations.class.buckets.0.sig.buckets.1.key: foo }
+  - match: { aggregations.class.buckets.0.sig.buckets.1.score: 1.0 }
+  - match: { aggregations.class.buckets.1.key: 1 }
+  - match: { aggregations.class.buckets.1.sig.buckets.0.key: foo }
+  - match: { aggregations.class.buckets.1.sig.buckets.0.score: 2.0 }
+  - match: { aggregations.class.buckets.1.sig.buckets.1.key: bar }
+  - match: { aggregations.class.buckets.1.sig.buckets.1.score: 1.0 }
+
+---
+"test custom heuristic on text significant_terms":
+  - do:
+      search:
+        index: test
+        size: 0
+        body:
+          aggs:
+            class:
+              terms:
+                field: class
+                order: { _key: asc }
+              aggs:
+                sig:
+                  significant_terms:
+                    field: text
+                    simple: {}
+                    min_doc_count: 1
+
+  - match: { aggregations.class.buckets.0.key: 0 }
+  - match: { aggregations.class.buckets.0.sig.buckets.0.key: bar }
+  - match: { aggregations.class.buckets.0.sig.buckets.0.score: 2.0 }
+  - match: { aggregations.class.buckets.0.sig.buckets.1.key: foo }
+  - match: { aggregations.class.buckets.0.sig.buckets.1.score: 1.0 }
+  - match: { aggregations.class.buckets.1.key: 1 }
+  - match: { aggregations.class.buckets.1.sig.buckets.0.key: foo }
+  - match: { aggregations.class.buckets.1.sig.buckets.0.score: 2.0 }
+  - match: { aggregations.class.buckets.1.sig.buckets.1.key: bar }
+  - match: { aggregations.class.buckets.1.sig.buckets.1.score: 1.0 }
+
+---
+"test custom heuristic on long significant_terms":
+  - do:
+      search:
+        index: test
+        size: 0
+        body:
+          aggs:
+            class:
+              terms:
+                field: class
+                order: { _key: asc }
+              aggs:
+                sig:
+                  significant_terms:
+                    field: long
+                    simple: {}
+                    min_doc_count: 1
+
+  - match: { aggregations.class.buckets.0.key: 0 }
+  - match: { aggregations.class.buckets.0.sig.buckets.0.key: 0 }
+  - match: { aggregations.class.buckets.0.sig.buckets.0.score: 2.0 }
+  - match: { aggregations.class.buckets.0.sig.buckets.1.key: 1 }
+  - match: { aggregations.class.buckets.0.sig.buckets.1.score: 1.0 }
+  - match: { aggregations.class.buckets.1.key: 1 }
+  - match: { aggregations.class.buckets.1.sig.buckets.0.key: 1 }
+  - match: { aggregations.class.buckets.1.sig.buckets.0.score: 2.0 }
+  - match: { aggregations.class.buckets.1.sig.buckets.1.key: 0 }
+  - match: { aggregations.class.buckets.1.sig.buckets.1.score: 1.0 }

+ 2 - 143
server/src/test/java/org/elasticsearch/search/aggregations/bucket/SignificantTermsSignificanceScoreIT.java

@@ -22,10 +22,7 @@ import org.elasticsearch.action.index.IndexRequestBuilder;
 import org.elasticsearch.action.search.SearchRequestBuilder;
 import org.elasticsearch.action.search.SearchResponse;
 import org.elasticsearch.common.Strings;
-import org.elasticsearch.common.io.stream.StreamInput;
-import org.elasticsearch.common.io.stream.StreamOutput;
 import org.elasticsearch.common.settings.Settings;
-import org.elasticsearch.common.xcontent.ObjectParser;
 import org.elasticsearch.common.xcontent.ToXContent;
 import org.elasticsearch.common.xcontent.XContentBuilder;
 import org.elasticsearch.common.xcontent.XContentFactory;
@@ -65,7 +62,6 @@ import java.util.Map;
 import java.util.concurrent.ExecutionException;
 import java.util.function.Function;
 
-import static java.util.Collections.singletonList;
 import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_REPLICAS;
 import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_SHARDS;
 import static org.elasticsearch.search.aggregations.AggregationBuilders.filter;
@@ -88,92 +84,14 @@ public class SignificantTermsSignificanceScoreIT extends ESIntegTestCase {
 
     @Override
     protected Collection<Class<? extends Plugin>> nodePlugins() {
-        return Arrays.asList(CustomSignificanceHeuristicPlugin.class);
+        return Arrays.asList(TestScriptPlugin.class);
     }
 
     public String randomExecutionHint() {
         return randomBoolean() ? null : randomFrom(SignificantTermsAggregatorFactory.ExecutionMode.values()).toString();
     }
 
-    public void testPlugin() throws Exception {
-        String type = randomBoolean() ? "text" : "long";
-        String settings = "{\"index.number_of_shards\": 1, \"index.number_of_replicas\": 0}";
-        SharedSignificantTermsTestMethods.index01Docs(type, settings, this);
-        SearchRequestBuilder request;
-        if ("text".equals(type) && randomBoolean()) {
-            // Use significant_text on text fields but occasionally run with alternative of
-            // significant_terms on legacy fieldData=true too.
-            request = client().prepareSearch(INDEX_NAME)
-                    .addAggregation(
-                            terms("class")
-                            .field(CLASS_FIELD)
-                                    .subAggregation((significantText("sig_terms", TEXT_FIELD))
-                                    .significanceHeuristic(new SimpleHeuristic())
-                                    .minDocCount(1)
-                            )
-                    );
-        }else
-        {
-            request = client().prepareSearch(INDEX_NAME)
-                    .addAggregation(
-                            terms("class")
-                            .field(CLASS_FIELD)
-                                    .subAggregation((significantTerms("sig_terms"))
-                                    .field(TEXT_FIELD)
-                                    .significanceHeuristic(new SimpleHeuristic())
-                                    .minDocCount(1)
-                            )
-                    );
-        }
-
-        SearchResponse response = request.get();
-        assertSearchResponse(response);
-        StringTerms classes = response.getAggregations().get("class");
-        assertThat(classes.getBuckets().size(), equalTo(2));
-        for (Terms.Bucket classBucket : classes.getBuckets()) {
-            Map<String, Aggregation> aggs = classBucket.getAggregations().asMap();
-            assertTrue(aggs.containsKey("sig_terms"));
-            SignificantTerms agg = (SignificantTerms) aggs.get("sig_terms");
-            assertThat(agg.getBuckets().size(), equalTo(2));
-            Iterator<SignificantTerms.Bucket> bucketIterator = agg.iterator();
-            SignificantTerms.Bucket sigBucket = bucketIterator.next();
-            String term = sigBucket.getKeyAsString();
-            String classTerm = classBucket.getKeyAsString();
-            assertTrue(term.equals(classTerm));
-            assertThat(sigBucket.getSignificanceScore(), closeTo(2.0, 1.e-8));
-            sigBucket = bucketIterator.next();
-            assertThat(sigBucket.getSignificanceScore(), closeTo(1.0, 1.e-8));
-        }
-
-        // we run the same test again but this time we do not call assertSearchResponse() before the assertions
-        // the reason is that this would trigger toXContent and we would like to check that this has no potential side effects
-
-        response = request.get();
-
-        classes = (StringTerms) response.getAggregations().get("class");
-        assertThat(classes.getBuckets().size(), equalTo(2));
-        for (Terms.Bucket classBucket : classes.getBuckets()) {
-            Map<String, Aggregation> aggs = classBucket.getAggregations().asMap();
-            assertTrue(aggs.containsKey("sig_terms"));
-            SignificantTerms agg = (SignificantTerms) aggs.get("sig_terms");
-            assertThat(agg.getBuckets().size(), equalTo(2));
-            Iterator<SignificantTerms.Bucket> bucketIterator = agg.iterator();
-            SignificantTerms.Bucket sigBucket = bucketIterator.next();
-            String term = sigBucket.getKeyAsString();
-            String classTerm = classBucket.getKeyAsString();
-            assertTrue(term.equals(classTerm));
-            assertThat(sigBucket.getSignificanceScore(), closeTo(2.0, 1.e-8));
-            sigBucket = bucketIterator.next();
-            assertThat(sigBucket.getSignificanceScore(), closeTo(1.0, 1.e-8));
-        }
-    }
-
-    public static class CustomSignificanceHeuristicPlugin extends MockScriptPlugin implements SearchPlugin {
-        @Override
-        public List<SignificanceHeuristicSpec<?>> getSignificanceHeuristics() {
-            return singletonList(new SignificanceHeuristicSpec<>(SimpleHeuristic.NAME, SimpleHeuristic::new, SimpleHeuristic.PARSER));
-        }
-
+    public static class TestScriptPlugin extends MockScriptPlugin implements SearchPlugin {
         @Override
         public Map<String, Function<Map<String, Object>, Object>> pluginScripts() {
             Map<String, Function<Map<String, Object>, Object>> scripts = new HashMap<>();
@@ -203,65 +121,6 @@ public class SignificantTermsSignificanceScoreIT extends ESIntegTestCase {
         }
     }
 
-    public static class SimpleHeuristic extends SignificanceHeuristic {
-        public static final String NAME = "simple";
-        public static final ObjectParser<SimpleHeuristic, Void> PARSER = new ObjectParser<>(NAME, SimpleHeuristic::new);
-
-        public SimpleHeuristic() {
-        }
-
-        /**
-         * Read from a stream.
-         */
-        public SimpleHeuristic(StreamInput in) throws IOException {
-            // Nothing to read
-        }
-
-        @Override
-        public void writeTo(StreamOutput out) throws IOException {
-            // Nothing to write
-        }
-
-        @Override
-        public String getWriteableName() {
-            return NAME;
-        }
-
-        @Override
-        public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
-            builder.startObject(NAME).endObject();
-            return builder;
-        }
-
-        @Override
-        public int hashCode() {
-            return 1;
-        }
-
-        @Override
-        public boolean equals(Object obj) {
-            if (obj == null) {
-                return false;
-            }
-            if (getClass() != obj.getClass()) {
-                return false;
-            }
-            return true;
-        }
-
-        /**
-         * @param subsetFreq   The frequency of the term in the selected sample
-         * @param subsetSize   The size of the selected sample (typically number of docs)
-         * @param supersetFreq The frequency of the term in the superset from which the sample was taken
-         * @param supersetSize The size of the superset from which the sample was taken  (typically number of docs)
-         * @return a "significance" score
-         */
-        @Override
-        public double getScore(long subsetFreq, long subsetSize, long supersetFreq, long supersetSize) {
-            return subsetFreq / subsetSize > supersetFreq / supersetSize ? 2.0 : 1.0;
-        }
-    }
-
     public void testXContentResponse() throws Exception {
         String type = randomBoolean() ? "text" : "long";
         String settings = "{\"index.number_of_shards\": 1, \"index.number_of_replicas\": 0}";