瀏覽代碼

Use keyword analyzer for untokenized fields in TermVectorsService (#94518)

TermVectorsService will only return values for indexed fields, but a field that
is indexed but untokenized may not have an index-time analyzer configured
on it (for example, the _ignored metadata field). If a request includes one
of these fields, then TVS will throw an exception because it can't find the
relevant analyzer.

This commit changes TermVectorsService to use a plain keyword analyzer
for these fields.
Alan Woodward 2 年之前
父節點
當前提交
b4df19509a

+ 5 - 0
docs/changelog/94518.yaml

@@ -0,0 +1,5 @@
+pr: 94518
+summary: Use keyword analyzer for untokenized fields in `TermVectorsService`
+area: Search
+type: bug
+issues: []

+ 15 - 0
server/src/internalClusterTest/java/org/elasticsearch/action/termvectors/GetTermVectorsIT.java

@@ -27,6 +27,7 @@ import org.elasticsearch.plugins.Plugin;
 import org.elasticsearch.test.MockKeywordPlugin;
 import org.elasticsearch.xcontent.ToXContent;
 import org.elasticsearch.xcontent.XContentBuilder;
+import org.elasticsearch.xcontent.XContentType;
 
 import java.io.IOException;
 import java.util.ArrayList;
@@ -44,6 +45,7 @@ import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcke
 import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertRequestBuilderThrows;
 import static org.elasticsearch.xcontent.XContentFactory.jsonBuilder;
 import static org.hamcrest.Matchers.equalTo;
+import static org.hamcrest.Matchers.greaterThan;
 import static org.hamcrest.Matchers.notNullValue;
 import static org.hamcrest.Matchers.nullValue;
 
@@ -1038,6 +1040,19 @@ public class GetTermVectorsIT extends AbstractTermVectorsTestCase {
         assertEquals("expected to find term statistics in exactly one shard!", 2, sumDocFreq);
     }
 
+    public void testTermVectorsWithIgnoredField() throws IOException, InterruptedException {
+        // setup indices
+        assertAcked(prepareCreate("index").setMapping("field", "type=long,ignore_malformed=true"));
+        ensureGreen();
+
+        // add a doc with a bad long field
+        indexRandom(true, client().prepareIndex("index").setId("1").setSource("{\"field\":\"foo\"}", XContentType.JSON));
+
+        // do a tv request for all fields, _ignored should be returned
+        TermVectorsResponse resp = client().prepareTermVectors("index", "1").setSelectedFields("*").get();
+        assertThat(resp.getFields().terms("_ignored").size(), greaterThan(0L));
+    }
+
     public void testWithKeywordAndNormalizer() throws IOException, ExecutionException, InterruptedException {
         // setup indices
         String[] indexNames = new String[] { "with_tv", "without_tv" };

+ 2 - 1
server/src/main/java/org/elasticsearch/index/termvectors/TermVectorsService.java

@@ -22,6 +22,7 @@ import org.elasticsearch.action.termvectors.TermVectorsRequest;
 import org.elasticsearch.action.termvectors.TermVectorsResponse;
 import org.elasticsearch.common.Strings;
 import org.elasticsearch.common.document.DocumentField;
+import org.elasticsearch.common.lucene.Lucene;
 import org.elasticsearch.common.lucene.uid.VersionsAndSeqNoResolver.DocIdAndVersion;
 import org.elasticsearch.common.regex.Regex;
 import org.elasticsearch.common.xcontent.XContentHelper;
@@ -241,7 +242,7 @@ public class TermVectorsService {
         } else {
             return mapperService.indexAnalyzer(
                 field,
-                f -> { throw new IllegalArgumentException("No analyzer configured for field " + f); }
+                f -> Lucene.KEYWORD_ANALYZER    // if no analyzer configured it must be untokenized so return a keyword analyzer
             );
         }
     }