Browse Source

SignificantText aggregation had include/exclude logic back to front (#64520)

SignificantText aggregation had include/exclude logic back to front.
Added test.

Closes #64519
markharwood 5 years ago
parent
commit
b554f23239

+ 1 - 1
server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantTextAggregatorFactory.java

@@ -263,7 +263,7 @@ public class SignificantTextAggregatorFactory extends AggregatorFactory {
                             scratch.clear();
                             scratch.copyChars(termAtt);
                             BytesRef bytes = scratch.get();
-                            if (includeExclude != null && includeExclude.accept(bytes)) {
+                            if (includeExclude != null && false == includeExclude.accept(bytes)) {
                                 continue;
                             }
                             if (inDocTerms.add(bytes) < 0) {

+ 59 - 0
server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantTextAggregatorTests.java

@@ -141,6 +141,65 @@ public class SignificantTextAggregatorTests extends AggregatorTestCase {
         }
     }
 
+    /**
+     * Uses the significant text aggregation to find the keywords in text fields and include/exclude selected terms
+     */
+    public void testIncludeExcludes() throws IOException {
+        TextFieldType textFieldType = new TextFieldType("text");
+        textFieldType.setIndexAnalyzer(new NamedAnalyzer("my_analyzer", AnalyzerScope.GLOBAL, new StandardAnalyzer()));
+        
+        IndexWriterConfig indexWriterConfig = newIndexWriterConfig();
+        indexWriterConfig.setMaxBufferedDocs(100);
+        indexWriterConfig.setRAMBufferSizeMB(100); // flush on open to have a single segment
+        try (Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, indexWriterConfig)) {
+            indexDocuments(w);
+
+            String [] incExcValues = {"duplicate"};
+
+            try (IndexReader reader = DirectoryReader.open(w)) {
+                assertEquals("test expects a single segment", 1, reader.leaves().size());
+                IndexSearcher searcher = new IndexSearcher(reader);
+                
+                // Inclusive of values
+                {
+                    SignificantTextAggregationBuilder sigAgg = new SignificantTextAggregationBuilder("sig_text", "text").
+                        includeExclude(new IncludeExclude(incExcValues, null));
+                    SamplerAggregationBuilder aggBuilder = new SamplerAggregationBuilder("sampler")
+                        .subAggregation(sigAgg);
+                    if(randomBoolean()){
+                        sigAgg.sourceFieldNames(Arrays.asList(new String [] {"json_only_field"}));
+                    }
+                    // Search "even" which should have duplication
+                    InternalSampler sampler = searchAndReduce(searcher, new TermQuery(new Term("text", "even")), aggBuilder, textFieldType);
+                    SignificantTerms terms = sampler.getAggregations().get("sig_text");
+
+                    assertNull(terms.getBucketByKey("even"));
+                    assertNotNull(terms.getBucketByKey("duplicate"));
+                    assertTrue(AggregationInspectionHelper.hasValue(sampler));
+                    
+                }
+                // Exclusive of values
+                {
+                    SignificantTextAggregationBuilder sigAgg = new SignificantTextAggregationBuilder("sig_text", "text").
+                        includeExclude(new IncludeExclude(null, incExcValues));
+                    SamplerAggregationBuilder aggBuilder = new SamplerAggregationBuilder("sampler")
+                        .subAggregation(sigAgg);
+                    if(randomBoolean()){
+                        sigAgg.sourceFieldNames(Arrays.asList(new String [] {"json_only_field"}));
+                    }
+                    // Search "even" which should have duplication
+                    InternalSampler sampler = searchAndReduce(searcher, new TermQuery(new Term("text", "even")), aggBuilder, textFieldType);
+                    SignificantTerms terms = sampler.getAggregations().get("sig_text");
+
+                    assertNotNull(terms.getBucketByKey("even"));
+                    assertNull(terms.getBucketByKey("duplicate"));                    
+                    assertTrue(AggregationInspectionHelper.hasValue(sampler));
+                    
+                }
+            }
+        }
+    }    
+    
     
     public void testMissingField() throws IOException {
         TextFieldType textFieldType = new TextFieldType("text");