Jelajahi Sumber

Fix analyzer alias processing (#19506)

In the lack of tests the analyzer.alias feature was pretty much not working
at all on current master. Issues like #19163 showed some serious problems for users
using this feature upgrading to an alpha version.
This change fixes the processing order and allows aliases to be set for
existing analyzers like `default`. This change also ensures that if `default`
is aliased the correct analyzer is used for `default_search` etc.

Closes #19163
Simon Willnauer 9 tahun lalu
induk
melakukan
302c7a521a

+ 77 - 57
core/src/main/java/org/elasticsearch/index/analysis/AnalysisService.java

@@ -28,8 +28,11 @@ import org.elasticsearch.index.IndexSettings;
 import org.elasticsearch.index.mapper.core.TextFieldMapper;
 
 import java.io.Closeable;
+import java.util.Arrays;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.Map;
+import java.util.Set;
 
 import static java.util.Collections.unmodifiableMap;
 
@@ -58,69 +61,34 @@ public class AnalysisService extends AbstractIndexComponent implements Closeable
         this.tokenFilters = unmodifiableMap(tokenFilterFactoryFactories);
         analyzerProviders = new HashMap<>(analyzerProviders);
 
-        if (!analyzerProviders.containsKey("default")) {
-            analyzerProviders.put("default", new StandardAnalyzerProvider(indexSettings, null, "default", Settings.Builder.EMPTY_SETTINGS));
-        }
-        if (!analyzerProviders.containsKey("default_search")) {
-            analyzerProviders.put("default_search", analyzerProviders.get("default"));
-        }
-        if (!analyzerProviders.containsKey("default_search_quoted")) {
-            analyzerProviders.put("default_search_quoted", analyzerProviders.get("default_search"));
-        }
-
+        Map<String, NamedAnalyzer> analyzerAliases = new HashMap<>();
         Map<String, NamedAnalyzer> analyzers = new HashMap<>();
         for (Map.Entry<String, AnalyzerProvider<?>> entry : analyzerProviders.entrySet()) {
-            AnalyzerProvider<?> analyzerFactory = entry.getValue();
-            String name = entry.getKey();
-            /*
-             * Lucene defaults positionIncrementGap to 0 in all analyzers but
-             * Elasticsearch defaults them to 0 only before version 2.0
-             * and 100 afterwards so we override the positionIncrementGap if it
-             * doesn't match here.
-             */
-            int overridePositionIncrementGap = TextFieldMapper.Defaults.POSITION_INCREMENT_GAP;
-            if (analyzerFactory instanceof CustomAnalyzerProvider) {
-                ((CustomAnalyzerProvider) analyzerFactory).build(this);
-                /*
-                 * Custom analyzers already default to the correct, version
-                 * dependent positionIncrementGap and the user is be able to
-                 * configure the positionIncrementGap directly on the analyzer so
-                 * we disable overriding the positionIncrementGap to preserve the
-                 * user's setting.
-                 */
-                overridePositionIncrementGap = Integer.MIN_VALUE;
-            }
-            Analyzer analyzerF = analyzerFactory.get();
-            if (analyzerF == null) {
-                throw new IllegalArgumentException("analyzer [" + analyzerFactory.name() + "] created null analyzer");
-            }
-            NamedAnalyzer analyzer;
-            if (analyzerF instanceof NamedAnalyzer) {
-                // if we got a named analyzer back, use it...
-                analyzer = (NamedAnalyzer) analyzerF;
-                if (overridePositionIncrementGap >= 0 && analyzer.getPositionIncrementGap(analyzer.name()) != overridePositionIncrementGap) {
-                    // unless the positionIncrementGap needs to be overridden
-                    analyzer = new NamedAnalyzer(analyzer, overridePositionIncrementGap);
-                }
+            processAnalyzerFactory(entry.getKey(), entry.getValue(), analyzerAliases, analyzers);
+        }
+        for (Map.Entry<String, NamedAnalyzer> entry : analyzerAliases.entrySet()) {
+            String key = entry.getKey();
+            if (analyzers.containsKey(key) &&
+                ("default".equals(key) || "default_search".equals(key) || "default_search_quoted".equals(key)) == false) {
+                throw new IllegalStateException("already registered analyzer with name: " + key);
             } else {
-                analyzer = new NamedAnalyzer(name, analyzerFactory.scope(), analyzerF, overridePositionIncrementGap);
-            }
-            if (analyzers.containsKey(name)) {
-                throw new IllegalStateException("already registered analyzer with name: " + name);
-            }
-            analyzers.put(name, analyzer);
-            String strAliases = this.indexSettings.getSettings().get("index.analysis.analyzer." + analyzerFactory.name() + ".alias");
-            if (strAliases != null) {
-                for (String alias : Strings.commaDelimitedListToStringArray(strAliases)) {
-                    analyzers.put(alias, analyzer);
-                }
-            }
-            String[] aliases = this.indexSettings.getSettings().getAsArray("index.analysis.analyzer." + analyzerFactory.name() + ".alias");
-            for (String alias : aliases) {
-                analyzers.put(alias, analyzer);
+                NamedAnalyzer configured = entry.getValue();
+                analyzers.put(key, configured);
             }
         }
 
+        if (!analyzers.containsKey("default")) {
+            processAnalyzerFactory("default", new StandardAnalyzerProvider(indexSettings, null, "default", Settings.Builder.EMPTY_SETTINGS),
+                analyzerAliases, analyzers);
+        }
+        if (!analyzers.containsKey("default_search")) {
+            analyzers.put("default_search", analyzers.get("default"));
+        }
+        if (!analyzers.containsKey("default_search_quoted")) {
+            analyzers.put("default_search_quoted", analyzers.get("default_search"));
+        }
+
+
         NamedAnalyzer defaultAnalyzer = analyzers.get("default");
         if (defaultAnalyzer == null) {
             throw new IllegalArgumentException("no default analyzer configured");
@@ -145,6 +113,58 @@ public class AnalysisService extends AbstractIndexComponent implements Closeable
         this.analyzers = unmodifiableMap(analyzers);
     }
 
+    private void processAnalyzerFactory(String name, AnalyzerProvider<?> analyzerFactory, Map<String, NamedAnalyzer> analyzerAliases, Map<String, NamedAnalyzer> analyzers) {
+        /*
+         * Lucene defaults positionIncrementGap to 0 in all analyzers but
+         * Elasticsearch defaults them to 0 only before version 2.0
+         * and 100 afterwards so we override the positionIncrementGap if it
+         * doesn't match here.
+         */
+        int overridePositionIncrementGap = TextFieldMapper.Defaults.POSITION_INCREMENT_GAP;
+        if (analyzerFactory instanceof CustomAnalyzerProvider) {
+            ((CustomAnalyzerProvider) analyzerFactory).build(this);
+            /*
+             * Custom analyzers already default to the correct, version
+             * dependent positionIncrementGap and the user is be able to
+             * configure the positionIncrementGap directly on the analyzer so
+             * we disable overriding the positionIncrementGap to preserve the
+             * user's setting.
+             */
+            overridePositionIncrementGap = Integer.MIN_VALUE;
+        }
+        Analyzer analyzerF = analyzerFactory.get();
+        if (analyzerF == null) {
+            throw new IllegalArgumentException("analyzer [" + analyzerFactory.name() + "] created null analyzer");
+        }
+        NamedAnalyzer analyzer;
+        if (analyzerF instanceof NamedAnalyzer) {
+            // if we got a named analyzer back, use it...
+            analyzer = (NamedAnalyzer) analyzerF;
+            if (overridePositionIncrementGap >= 0 && analyzer.getPositionIncrementGap(analyzer.name()) != overridePositionIncrementGap) {
+                // unless the positionIncrementGap needs to be overridden
+                analyzer = new NamedAnalyzer(analyzer, overridePositionIncrementGap);
+            }
+        } else {
+            analyzer = new NamedAnalyzer(name, analyzerFactory.scope(), analyzerF, overridePositionIncrementGap);
+        }
+        if (analyzers.containsKey(name)) {
+            throw new IllegalStateException("already registered analyzer with name: " + name);
+        }
+        analyzers.put(name, analyzer);
+        String strAliases = this.indexSettings.getSettings().get("index.analysis.analyzer." + analyzerFactory.name() + ".alias");
+        Set<String> aliases = new HashSet<>();
+        if (strAliases != null) {
+            aliases.addAll(Strings.commaDelimitedListToSet(strAliases));
+        }
+        aliases.addAll(Arrays.asList(this.indexSettings.getSettings()
+            .getAsArray("index.analysis.analyzer." + analyzerFactory.name() + ".alias")));
+        for (String alias : aliases) {
+            if (analyzerAliases.putIfAbsent(alias, analyzer) != null) {
+                throw new IllegalStateException("alias [" + alias + "] is already used by [" + analyzerAliases.get(alias).name() + "]");
+            }
+        }
+    }
+
     @Override
     public void close() {
         for (NamedAnalyzer analyzer : analyzers.values()) {

+ 2 - 2
core/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java

@@ -435,9 +435,9 @@ public abstract class FieldMapper extends Mapper implements Cloneable {
             boolean hasDifferentSearchQuoteAnalyzer = fieldType().searchAnalyzer().name().equals(fieldType().searchQuoteAnalyzer().name()) == false;
             if (includeDefaults || hasDefaultIndexAnalyzer == false || hasDifferentSearchAnalyzer || hasDifferentSearchQuoteAnalyzer) {
                 builder.field("analyzer", fieldType().indexAnalyzer().name());
-                if (hasDifferentSearchAnalyzer || hasDifferentSearchQuoteAnalyzer) {
+                if (includeDefaults || hasDifferentSearchAnalyzer || hasDifferentSearchQuoteAnalyzer) {
                     builder.field("search_analyzer", fieldType().searchAnalyzer().name());
-                    if (hasDifferentSearchQuoteAnalyzer) {
+                    if (includeDefaults || hasDifferentSearchQuoteAnalyzer) {
                         builder.field("search_quote_analyzer", fieldType().searchQuoteAnalyzer().name());
                     }
                 }

+ 42 - 0
core/src/test/java/org/elasticsearch/index/mapper/core/TextFieldMapperTests.java

@@ -29,6 +29,7 @@ import org.apache.lucene.index.Term;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.util.BytesRef;
 import org.elasticsearch.common.compress.CompressedXContent;
+import org.elasticsearch.common.xcontent.ToXContent;
 import org.elasticsearch.common.xcontent.XContentBuilder;
 import org.elasticsearch.common.xcontent.XContentFactory;
 import org.elasticsearch.index.IndexService;
@@ -44,6 +45,7 @@ import org.elasticsearch.test.ESSingleNodeTestCase;
 import org.junit.Before;
 
 import java.io.IOException;
+import java.util.Collections;
 import java.util.Arrays;
 import java.util.HashMap;
 import java.util.Map;
@@ -284,6 +286,46 @@ public class TextFieldMapperTests extends ESSingleNodeTestCase {
 
         mapper = parser.parse("type", new CompressedXContent(mapping));
         assertEquals(mapping,  mapper.mappingSource().toString());
+
+        mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
+            .startObject("properties")
+            .startObject("field")
+            .field("type", "text")
+            .field("analyzer", "keyword")
+            .endObject()
+            .endObject().endObject().endObject().string();
+
+        mapper = parser.parse("type", new CompressedXContent(mapping));
+        assertEquals(mapping,  mapper.mappingSource().toString());
+
+        // special case: default search analyzer
+        mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
+            .startObject("properties")
+            .startObject("field")
+            .field("type", "text")
+            .field("analyzer", "keyword")
+            .field("search_analyzer", "default")
+            .endObject()
+            .endObject().endObject().endObject().string();
+
+        mapper = parser.parse("type", new CompressedXContent(mapping));
+        assertEquals(mapping,  mapper.mappingSource().toString());
+
+        mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
+            .startObject("properties")
+            .startObject("field")
+            .field("type", "text")
+            .field("analyzer", "keyword")
+            .endObject()
+            .endObject().endObject().endObject().string();
+        mapper = parser.parse("type", new CompressedXContent(mapping));
+        XContentBuilder builder = XContentFactory.jsonBuilder();
+
+        mapper.toXContent(builder, new ToXContent.MapParams(Collections.singletonMap("include_defaults", "true")));
+        String mappingString = builder.string();
+        assertTrue(mappingString.contains("analyzer"));
+        assertTrue(mappingString.contains("search_analyzer"));
+        assertTrue(mappingString.contains("search_quote_analyzer"));
     }
 
     public void testSearchQuoteAnalyzerSerialization() throws IOException {

+ 43 - 0
core/src/test/java/org/elasticsearch/index/mapper/string/SimpleStringMappingTests.java

@@ -53,6 +53,7 @@ import org.junit.Before;
 import java.io.IOException;
 import java.util.Arrays;
 import java.util.Collection;
+import java.util.Collections;
 import java.util.Map;
 
 import static java.util.Collections.emptyMap;
@@ -301,6 +302,48 @@ public class SimpleStringMappingTests extends ESSingleNodeTestCase {
 
         mapper = parser.parse("type", new CompressedXContent(mapping));
         assertEquals(mapping,  mapper.mappingSource().toString());
+
+        // special case: default search analyzer
+        mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
+            .startObject("properties")
+            .startObject("field")
+            .field("type", "string")
+            .field("analyzer", "keyword")
+            .endObject()
+            .endObject().endObject().endObject().string();
+
+        mapper = parser.parse("type", new CompressedXContent(mapping));
+        assertEquals(mapping,  mapper.mappingSource().toString());
+
+        // special case: default search analyzer
+        mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
+            .startObject("properties")
+            .startObject("field")
+            .field("type", "string")
+            .field("analyzer", "keyword")
+            .field("search_analyzer", "default")
+            .endObject()
+            .endObject().endObject().endObject().string();
+
+        mapper = parser.parse("type", new CompressedXContent(mapping));
+        assertEquals(mapping,  mapper.mappingSource().toString());
+
+
+        mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
+            .startObject("properties")
+            .startObject("field")
+            .field("type", "string")
+            .field("analyzer", "keyword")
+            .endObject()
+            .endObject().endObject().endObject().string();
+        mapper = parser.parse("type", new CompressedXContent(mapping));
+        XContentBuilder builder = XContentFactory.jsonBuilder();
+
+        mapper.toXContent(builder, new ToXContent.MapParams(Collections.singletonMap("include_defaults", "true")));
+        String mappingString = builder.string();
+        assertTrue(mappingString.contains("analyzer"));
+        assertTrue(mappingString.contains("search_analyzer"));
+        assertTrue(mappingString.contains("search_quote_analyzer"));
     }
 
     private Map<String, Object> getSerializedMap(String fieldName, DocumentMapper mapper) throws Exception {

+ 40 - 7
core/src/test/java/org/elasticsearch/indices/analysis/AnalysisModuleTests.java

@@ -25,6 +25,8 @@ import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.ar.ArabicNormalizationFilter;
 import org.apache.lucene.analysis.core.KeywordAnalyzer;
 import org.apache.lucene.analysis.core.WhitespaceTokenizer;
+import org.apache.lucene.analysis.de.GermanAnalyzer;
+import org.apache.lucene.analysis.en.EnglishAnalyzer;
 import org.apache.lucene.analysis.fa.PersianNormalizationFilter;
 import org.apache.lucene.analysis.hunspell.Dictionary;
 import org.apache.lucene.analysis.miscellaneous.KeywordRepeatFilter;
@@ -52,6 +54,7 @@ import org.elasticsearch.index.analysis.filter1.MyFilterTokenFilterFactory;
 import org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider;
 import org.elasticsearch.plugins.AnalysisPlugin;
 import org.elasticsearch.test.IndexSettingsModule;
+import org.elasticsearch.test.VersionUtils;
 import org.hamcrest.MatcherAssert;
 
 import java.io.BufferedWriter;
@@ -126,29 +129,59 @@ public class AnalysisModuleTests extends ModuleTestCase {
         Settings settings = Settings.builder()
             .put("index.analysis.analyzer.foobar.alias","default")
             .put("index.analysis.analyzer.foobar.type", "keyword")
+            .put("index.analysis.analyzer.foobar_search.alias","default_search")
+            .put("index.analysis.analyzer.foobar_search.type","english")
             .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
-            .put(IndexMetaData.SETTING_VERSION_CREATED, Version.V_2_0_0)
+            .put(IndexMetaData.SETTING_VERSION_CREATED, VersionUtils.randomVersion(random()))
             .build();
         AnalysisRegistry newRegistry = getNewRegistry(settings);
         AnalysisService as = getAnalysisService(newRegistry, settings);
         assertThat(as.analyzer("default").analyzer(), is(instanceOf(KeywordAnalyzer.class)));
+        assertThat(as.analyzer("default_search").analyzer(), is(instanceOf(EnglishAnalyzer.class)));
+    }
 
+    public void testAnalyzerAliasReferencesAlias() throws IOException {
+        Settings settings = Settings.builder()
+            .put("index.analysis.analyzer.foobar.alias","default")
+            .put("index.analysis.analyzer.foobar.type", "german")
+            .put("index.analysis.analyzer.foobar_search.alias","default_search")
+            .put("index.analysis.analyzer.foobar_search.type", "default")
+            .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
+            .put(IndexMetaData.SETTING_VERSION_CREATED, VersionUtils.randomVersion(random()))
+            .build();
+        AnalysisRegistry newRegistry = getNewRegistry(settings);
+        AnalysisService as = getAnalysisService(newRegistry, settings);
+        assertThat(as.analyzer("default").analyzer(), is(instanceOf(GermanAnalyzer.class)));
+        // analyzer types are bound early before we resolve aliases
+        assertThat(as.analyzer("default_search").analyzer(), is(instanceOf(StandardAnalyzer.class)));
     }
 
-    public void testDoubleAlias() throws IOException {
+    public void testAnalyzerAliasDefault() throws IOException {
         Settings settings = Settings.builder()
             .put("index.analysis.analyzer.foobar.alias","default")
             .put("index.analysis.analyzer.foobar.type", "keyword")
-            .put("index.analysis.analyzer.barfoo.alias","default")
-            .put("index.analysis.analyzer.barfoo.type","english")
             .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
-            .put(IndexMetaData.SETTING_VERSION_CREATED, Version.V_2_0_0)
+            .put(IndexMetaData.SETTING_VERSION_CREATED, VersionUtils.randomVersion(random()))
             .build();
         AnalysisRegistry newRegistry = getNewRegistry(settings);
-        String message = expectThrows(IllegalStateException.class, () -> getAnalysisService(newRegistry, settings)).getMessage();
-        assertEquals("already registered analyzer with name: default", message);
+        AnalysisService as = getAnalysisService(newRegistry, settings);
+        assertThat(as.analyzer("default").analyzer(), is(instanceOf(KeywordAnalyzer.class)));
+        assertThat(as.analyzer("default_search").analyzer(), is(instanceOf(KeywordAnalyzer.class)));
     }
 
+    public void testAnalyzerAliasMoreThanOnce() throws IOException {
+        Settings settings = Settings.builder()
+            .put("index.analysis.analyzer.foobar.alias","default")
+            .put("index.analysis.analyzer.foobar.type", "keyword")
+            .put("index.analysis.analyzer.foobar1.alias","default")
+            .put("index.analysis.analyzer.foobar1.type", "english")
+            .put(IndexMetaData.SETTING_VERSION_CREATED, VersionUtils.randomVersion(random()))
+            .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
+            .build();
+        AnalysisRegistry newRegistry = getNewRegistry(settings);
+        IllegalStateException ise = expectThrows(IllegalStateException.class, () -> getAnalysisService(newRegistry, settings));
+        assertEquals("alias [default] is already used by [foobar]", ise.getMessage());
+    }
     public void testVersionedAnalyzers() throws Exception {
         String yaml = "/org/elasticsearch/index/analysis/test1.yml";
         Settings settings2 = Settings.builder()

+ 3 - 0
core/src/test/java/org/elasticsearch/indices/template/SimpleIndexTemplateIT.java

@@ -32,12 +32,15 @@ import org.elasticsearch.common.ParsingException;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.common.xcontent.XContentFactory;
 import org.elasticsearch.index.mapper.MapperParsingException;
+import org.elasticsearch.index.query.MatchQueryBuilder;
 import org.elasticsearch.index.query.QueryBuilders;
+import org.elasticsearch.index.query.TermQueryBuilder;
 import org.elasticsearch.indices.IndexTemplateAlreadyExistsException;
 import org.elasticsearch.indices.InvalidAliasNameException;
 import org.elasticsearch.search.SearchHit;
 import org.elasticsearch.test.ESIntegTestCase;
 
+import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.HashSet;

+ 47 - 0
rest-api-spec/src/main/resources/rest-api-spec/test/indices.put_template/10_basic.yaml

@@ -70,4 +70,51 @@
           settings:
             number_of_shards:   1
             number_of_replicas: 0
+---
+"Put template with analyzer alias":
+
+  - do:
+      indices.put_template:
+        name: test
+        create: true
+        order: 0
+        body:
+          template: test_*
+          settings:
+            index.analysis.analyzer.foobar.alias: "default"
+            index.analysis.analyzer.foobar.type: "keyword"
+            index.analysis.analyzer.foobar_search.alias: "default_search"
+            index.analysis.analyzer.foobar_search.type: "standard"
+
+  - do:
+    index:
+      index:  test_index
+      type:   test
+      body:   { field: "the quick brown fox" }
+
+  - do:
+    indices.refresh:
+      index: test_index
+
+  - do:
+    search:
+      index: test_index
+      type:  test
+      body:
+        query:
+          term:
+            field: "the quick brown fox"
+
+  - match: {hits.total: 1}
+
+  - do:
+    search:
+      index: test_index
+      type:  test
+      body:
+        query:
+          match:
+            field: "the quick brown fox"
+
+  - match: {hits.total: 0}