Browse Source

Merge pull request #19929 from johtani/fix/stop_using_cached_components_in_analyze_api

Stop using cached component in _analyze API
Jun Ohtani 9 years ago
parent
commit
8d4bc0b2a8

+ 21 - 10
core/src/main/java/org/elasticsearch/action/admin/indices/analyze/TransportAnalyzeAction.java

@@ -467,17 +467,21 @@ public class TransportAnalyzeAction extends TransportSingleShardAction<AnalyzeRe
                     // Need to set anonymous "name" of char_filter
                     charFilterFactories[i] = charFilterFactoryFactory.get(getNaIndexSettings(settings), environment, "_anonymous_charfilter_[" + i + "]", settings);
                 } else {
+                    AnalysisModule.AnalysisProvider<CharFilterFactory> charFilterFactoryFactory;
                     if (analysisService == null) {
-                        AnalysisModule.AnalysisProvider<CharFilterFactory> charFilterFactoryFactory = analysisRegistry.getCharFilterProvider(charFilter.name);
+                        charFilterFactoryFactory = analysisRegistry.getCharFilterProvider(charFilter.name);
                         if (charFilterFactoryFactory == null) {
                             throw new IllegalArgumentException("failed to find global char filter under [" + charFilter.name + "]");
                         }
                         charFilterFactories[i] = charFilterFactoryFactory.get(environment, charFilter.name);
                     } else {
-                        charFilterFactories[i] = analysisService.charFilter(charFilter.name);
-                        if (charFilterFactories[i] == null) {
+                        charFilterFactoryFactory = analysisRegistry.getCharFilterProvider(charFilter.name, analysisService.getIndexSettings());
+                        if (charFilterFactoryFactory == null) {
                             throw new IllegalArgumentException("failed to find char filter under [" + charFilter.name + "]");
                         }
+                        charFilterFactories[i] = charFilterFactoryFactory.get(analysisService.getIndexSettings(), environment, charFilter.name,
+                            AnalysisRegistry.getSettingsFromIndexSettings(analysisService.getIndexSettings(),
+                                AnalysisRegistry.INDEX_ANALYSIS_CHAR_FILTER + "." + charFilter.name));
                     }
                 }
                 if (charFilterFactories[i] == null) {
@@ -509,18 +513,21 @@ public class TransportAnalyzeAction extends TransportSingleShardAction<AnalyzeRe
                     // Need to set anonymous "name" of tokenfilter
                     tokenFilterFactories[i] = tokenFilterFactoryFactory.get(getNaIndexSettings(settings), environment, "_anonymous_tokenfilter_[" + i + "]", settings);
                 } else {
+                    AnalysisModule.AnalysisProvider<TokenFilterFactory> tokenFilterFactoryFactory;
                     if (analysisService == null) {
-                        AnalysisModule.AnalysisProvider<TokenFilterFactory> tokenFilterFactoryFactory = analysisRegistry.getTokenFilterProvider(tokenFilter.name);
-
+                        tokenFilterFactoryFactory = analysisRegistry.getTokenFilterProvider(tokenFilter.name);
                         if (tokenFilterFactoryFactory == null) {
                             throw new IllegalArgumentException("failed to find global token filter under [" + tokenFilter.name + "]");
                         }
                         tokenFilterFactories[i] = tokenFilterFactoryFactory.get(environment, tokenFilter.name);
                     } else {
-                        tokenFilterFactories[i] = analysisService.tokenFilter(tokenFilter.name);
-                        if (tokenFilterFactories[i] == null) {
+                        tokenFilterFactoryFactory = analysisRegistry.getTokenFilterProvider(tokenFilter.name, analysisService.getIndexSettings());
+                       if (tokenFilterFactoryFactory == null) {
                             throw new IllegalArgumentException("failed to find token filter under [" + tokenFilter.name + "]");
                         }
+                        tokenFilterFactories[i] = tokenFilterFactoryFactory.get(analysisService.getIndexSettings(), environment, tokenFilter.name,
+                            AnalysisRegistry.getSettingsFromIndexSettings(analysisService.getIndexSettings(),
+                                AnalysisRegistry.INDEX_ANALYSIS_FILTER + "." + tokenFilter.name));
                     }
                 }
                 if (tokenFilterFactories[i] == null) {
@@ -550,17 +557,21 @@ public class TransportAnalyzeAction extends TransportSingleShardAction<AnalyzeRe
             // Need to set anonymous "name" of tokenizer
             tokenizerFactory = tokenizerFactoryFactory.get(getNaIndexSettings(settings), environment, "_anonymous_tokenizer", settings);
         } else {
+            AnalysisModule.AnalysisProvider<TokenizerFactory> tokenizerFactoryFactory;
             if (analysisService == null) {
-                AnalysisModule.AnalysisProvider<TokenizerFactory> tokenizerFactoryFactory = analysisRegistry.getTokenizerProvider(tokenizer.name);
+                tokenizerFactoryFactory = analysisRegistry.getTokenizerProvider(tokenizer.name);
                 if (tokenizerFactoryFactory == null) {
                     throw new IllegalArgumentException("failed to find global tokenizer under [" + tokenizer.name + "]");
                 }
                 tokenizerFactory = tokenizerFactoryFactory.get(environment, tokenizer.name);
             } else {
-                tokenizerFactory = analysisService.tokenizer(tokenizer.name);
-                if (tokenizerFactory == null) {
+                tokenizerFactoryFactory = analysisRegistry.getTokenizerProvider(tokenizer.name, analysisService.getIndexSettings());
+                if (tokenizerFactoryFactory == null) {
                     throw new IllegalArgumentException("failed to find tokenizer under [" + tokenizer.name + "]");
                 }
+                tokenizerFactory = tokenizerFactoryFactory.get(analysisService.getIndexSettings(), environment, tokenizer.name,
+                    AnalysisRegistry.getSettingsFromIndexSettings(analysisService.getIndexSettings(),
+                        AnalysisRegistry.INDEX_ANALYSIS_TOKENIZER + "." + tokenizer.name));
             }
         }
         return tokenizerFactory;

+ 96 - 11
core/src/main/java/org/elasticsearch/index/analysis/AnalysisRegistry.java

@@ -49,6 +49,9 @@ import static java.util.Collections.unmodifiableMap;
  * This class exists per node and allows to create per-index {@link AnalysisService} via {@link #build(IndexSettings)}
  */
 public final class AnalysisRegistry implements Closeable {
+    public static final String INDEX_ANALYSIS_CHAR_FILTER = "index.analysis.char_filter";
+    public static final String INDEX_ANALYSIS_FILTER = "index.analysis.filter";
+    public static final String INDEX_ANALYSIS_TOKENIZER = "index.analysis.tokenizer";
     private final PrebuiltAnalysis prebuiltAnalysis = new PrebuiltAnalysis();
     private final Map<String, Analyzer> cachedAnalyzer = new ConcurrentHashMap<>();
 
@@ -70,6 +73,20 @@ public final class AnalysisRegistry implements Closeable {
         this.analyzers = unmodifiableMap(analyzers);
     }
 
+    /**
+     * Returns a {@link Settings} by groupName from {@link IndexSettings} or a default {@link Settings}
+     * @param indexSettings an index settings
+     * @param groupName tokenizer/token filter/char filter name
+     * @return {@link Settings}
+     */
+    public static Settings getSettingsFromIndexSettings(IndexSettings indexSettings, String groupName) {
+        Settings settings = indexSettings.getSettings().getAsSettings(groupName);
+        if (settings.isEmpty()) {
+            settings = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, indexSettings.getIndexVersionCreated()).build();
+        }
+        return settings;
+    }
+
     /**
      * Returns a registered {@link TokenizerFactory} provider by name or <code>null</code> if the tokenizer was not registered
      */
@@ -122,9 +139,9 @@ public final class AnalysisRegistry implements Closeable {
      * Creates an index-level {@link AnalysisService} from this registry using the given index settings
      */
     public AnalysisService build(IndexSettings indexSettings) throws IOException {
-        final Map<String, Settings> charFiltersSettings = indexSettings.getSettings().getGroups("index.analysis.char_filter");
-        final Map<String, Settings> tokenFiltersSettings = indexSettings.getSettings().getGroups("index.analysis.filter");
-        final Map<String, Settings> tokenizersSettings = indexSettings.getSettings().getGroups("index.analysis.tokenizer");
+        final Map<String, Settings> charFiltersSettings = indexSettings.getSettings().getGroups(INDEX_ANALYSIS_CHAR_FILTER);
+        final Map<String, Settings> tokenFiltersSettings = indexSettings.getSettings().getGroups(INDEX_ANALYSIS_FILTER);
+        final Map<String, Settings> tokenizersSettings = indexSettings.getSettings().getGroups(INDEX_ANALYSIS_TOKENIZER);
         final Map<String, Settings> analyzersSettings = indexSettings.getSettings().getGroups("index.analysis.analyzer");
 
         final Map<String, CharFilterFactory> charFilterFactories = buildMapping(false, "charfilter", indexSettings, charFiltersSettings, charFilters, prebuiltAnalysis.charFilterFactories);
@@ -136,13 +153,76 @@ public final class AnalysisRegistry implements Closeable {
          * instead of building the infrastructure for plugins we rather make it a real exception to not pollute the general interface and
          * hide internal data-structures as much as possible.
          */
-        tokenFilters.put("synonym", requriesAnalysisSettings((is, env, name, settings) -> new SynonymTokenFilterFactory(is, env, tokenizerFactories, name, settings)));
+        tokenFilters.put("synonym", requriesAnalysisSettings((is, env, name, settings) -> new SynonymTokenFilterFactory(is, env, this, name, settings)));
         final Map<String, TokenFilterFactory> tokenFilterFactories = buildMapping(false, "tokenfilter", indexSettings, tokenFiltersSettings, Collections.unmodifiableMap(tokenFilters), prebuiltAnalysis.tokenFilterFactories);
         final Map<String, AnalyzerProvider<?>> analyzierFactories = buildMapping(true, "analyzer", indexSettings, analyzersSettings,
                 analyzers, prebuiltAnalysis.analyzerProviderFactories);
         return new AnalysisService(indexSettings, analyzierFactories, tokenizerFactories, charFilterFactories, tokenFilterFactories);
     }
 
+    /**
+     * Returns a registered {@link TokenizerFactory} provider by {@link IndexSettings}
+     *  or a registered {@link TokenizerFactory} provider by predefined name
+     *  or <code>null</code> if the tokenizer was not registered
+     * @param tokenizer global or defined tokenizer name
+     * @param indexSettings an index settings
+     * @return {@link TokenizerFactory} provider or <code>null</code>
+     */
+    public AnalysisProvider<TokenizerFactory> getTokenizerProvider(String tokenizer, IndexSettings indexSettings) {
+        final Map<String, Settings> tokenizerSettings = indexSettings.getSettings().getGroups("index.analysis.tokenizer");
+        if (tokenizerSettings.containsKey(tokenizer)) {
+            Settings currentSettings = tokenizerSettings.get(tokenizer);
+            return getAnalysisProvider("tokenizer", tokenizers, tokenizer, currentSettings.get("type"));
+        } else {
+            return prebuiltAnalysis.tokenizerFactories.get(tokenizer);
+        }
+    }
+
+    /**
+     * Returns a registered {@link TokenFilterFactory} provider by {@link IndexSettings}
+     *  or a registered {@link TokenFilterFactory} provider by predefined name
+     *  or <code>null</code> if the tokenFilter was not registered
+     * @param tokenFilter global or defined tokenFilter name
+     * @param indexSettings an index settings
+     * @return {@link TokenFilterFactory} provider or <code>null</code>
+     */
+    public AnalysisProvider<TokenFilterFactory> getTokenFilterProvider(String tokenFilter, IndexSettings indexSettings) {
+        final Map<String, Settings> tokenFilterSettings = indexSettings.getSettings().getGroups("index.analysis.filter");
+        if (tokenFilterSettings.containsKey(tokenFilter)) {
+            Settings currentSettings = tokenFilterSettings.get(tokenFilter);
+            String typeName = currentSettings.get("type");
+            /*
+             * synonym is different than everything else since it needs access to the tokenizer factories for this index.
+             * instead of building the infrastructure for plugins we rather make it a real exception to not pollute the general interface and
+             * hide internal data-structures as much as possible.
+             */
+            if ("synonym".equals(typeName)) {
+                return requriesAnalysisSettings((is, env, name, settings) -> new SynonymTokenFilterFactory(is, env, this, name, settings));
+            } else {
+                return getAnalysisProvider("tokenfilter", tokenFilters, tokenFilter, typeName);
+            }
+        } else {
+            return prebuiltAnalysis.tokenFilterFactories.get(tokenFilter);
+        }
+    }
+
+    /**
+     * Returns a registered {@link CharFilterFactory} provider by {@link IndexSettings}
+     *  or a registered {@link CharFilterFactory} provider by predefined name
+     *  or <code>null</code> if the charFilter was not registered
+     * @param charFilter global or defined charFilter name
+     * @param indexSettings an index settings
+     * @return {@link CharFilterFactory} provider or <code>null</code>
+     */
+    public AnalysisProvider<CharFilterFactory> getCharFilterProvider(String charFilter, IndexSettings indexSettings) {
+        final Map<String, Settings> tokenFilterSettings = indexSettings.getSettings().getGroups("index.analysis.char_filter");
+        if (tokenFilterSettings.containsKey(charFilter)) {
+            Settings currentSettings = tokenFilterSettings.get(charFilter);
+            return getAnalysisProvider("charfilter", charFilters, charFilter, currentSettings.get("type"));
+        } else {
+            return prebuiltAnalysis.charFilterFactories.get(charFilter);
+        }
+    }
 
     private static <T> AnalysisModule.AnalysisProvider<T> requriesAnalysisSettings(AnalysisModule.AnalysisProvider<T> provider) {
         return new AnalysisModule.AnalysisProvider<T>() {
@@ -185,13 +265,7 @@ public final class AnalysisRegistry implements Closeable {
                 }
                 factories.put(name, factory);
             }  else {
-                if (typeName == null) {
-                    throw new IllegalArgumentException(toBuild + " [" + name + "] must specify either an analyzer type, or a tokenizer");
-                }
-                AnalysisModule.AnalysisProvider<T> type = providerMap.get(typeName);
-                if (type == null) {
-                    throw new IllegalArgumentException("Unknown " + toBuild + " type [" + typeName + "] for [" + name + "]");
-                }
+                AnalysisProvider<T> type = getAnalysisProvider(toBuild, providerMap, name, typeName);
                 final T factory = type.get(settings, environment, name, currentSettings);
                 factories.put(name, factory);
             }
@@ -232,6 +306,17 @@ public final class AnalysisRegistry implements Closeable {
         return factories;
     }
 
+    private <T> AnalysisProvider<T> getAnalysisProvider(String toBuild, Map<String, AnalysisProvider<T>> providerMap, String name, String typeName) {
+        if (typeName == null) {
+            throw new IllegalArgumentException(toBuild + " [" + name + "] must specify either an analyzer type, or a tokenizer");
+        }
+        AnalysisProvider<T> type = providerMap.get(typeName);
+        if (type == null) {
+            throw new IllegalArgumentException("Unknown " + toBuild + " type [" + typeName + "] for [" + name + "]");
+        }
+        return type;
+    }
+
     private static class PrebuiltAnalysis implements Closeable {
 
         final Map<String, AnalysisModule.AnalysisProvider<AnalyzerProvider<?>>> analyzerProviderFactories;

+ 7 - 5
core/src/main/java/org/elasticsearch/index/analysis/SynonymTokenFilterFactory.java

@@ -32,18 +32,18 @@ import org.elasticsearch.common.io.FastStringReader;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.env.Environment;
 import org.elasticsearch.index.IndexSettings;
+import org.elasticsearch.indices.analysis.AnalysisModule;
 
 import java.io.IOException;
 import java.io.Reader;
 import java.util.List;
-import java.util.Map;
 
 public class SynonymTokenFilterFactory extends AbstractTokenFilterFactory {
 
     private final SynonymMap synonymMap;
     private final boolean ignoreCase;
 
-    public SynonymTokenFilterFactory(IndexSettings indexSettings, Environment env, Map<String, TokenizerFactory> tokenizerFactories,
+    public SynonymTokenFilterFactory(IndexSettings indexSettings, Environment env, AnalysisRegistry analysisRegistry,
                                       String name, Settings settings) throws IOException {
         super(indexSettings, name, settings);
 
@@ -65,11 +65,13 @@ public class SynonymTokenFilterFactory extends AbstractTokenFilterFactory {
         boolean expand = settings.getAsBoolean("expand", true);
 
         String tokenizerName = settings.get("tokenizer", "whitespace");
-        final TokenizerFactory tokenizerFactory = tokenizerFactories.get(tokenizerName);
-        if (tokenizerFactory == null) {
+        AnalysisModule.AnalysisProvider<TokenizerFactory> tokenizerFactoryFactory =
+            analysisRegistry.getTokenizerProvider(tokenizerName, indexSettings);
+        if (tokenizerFactoryFactory == null) {
             throw new IllegalArgumentException("failed to find tokenizer [" + tokenizerName + "] for synonym token filter");
         }
-
+        final TokenizerFactory tokenizerFactory = tokenizerFactoryFactory.get(indexSettings, env, tokenizerName,
+            AnalysisRegistry.getSettingsFromIndexSettings(indexSettings, AnalysisRegistry.INDEX_ANALYSIS_TOKENIZER + "." + tokenizerName));
         Analyzer analyzer = new Analyzer() {
             @Override
             protected TokenStreamComponents createComponents(String fieldName) {

+ 21 - 1
core/src/test/java/org/elasticsearch/action/admin/indices/TransportAnalyzeActionTests.java

@@ -23,6 +23,7 @@ import org.elasticsearch.action.admin.indices.analyze.AnalyzeRequest;
 import org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse;
 import org.elasticsearch.action.admin.indices.analyze.TransportAnalyzeAction;
 import org.elasticsearch.cluster.metadata.IndexMetaData;
+import org.elasticsearch.common.UUIDs;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.env.Environment;
 import org.elasticsearch.index.IndexSettings;
@@ -51,12 +52,21 @@ public class TransportAnalyzeActionTests extends ESTestCase {
 
         Settings indexSettings = Settings.builder()
                 .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
+                .put(IndexMetaData.SETTING_INDEX_UUID, UUIDs.randomBase64UUID())
                 .put("index.analysis.filter.wordDelimiter.type", "word_delimiter")
                 .put("index.analysis.filter.wordDelimiter.split_on_numerics", false)
                 .put("index.analysis.analyzer.custom_analyzer.tokenizer", "whitespace")
                 .putArray("index.analysis.analyzer.custom_analyzer.filter", "lowercase", "wordDelimiter")
                 .put("index.analysis.analyzer.custom_analyzer.tokenizer", "whitespace")
-                .putArray("index.analysis.analyzer.custom_analyzer.filter", "lowercase", "wordDelimiter").build();
+                .putArray("index.analysis.analyzer.custom_analyzer.filter", "lowercase", "wordDelimiter")
+                .put("index.analysis.tokenizer.trigram.type", "ngram")
+                .put("index.analysis.tokenizer.trigram.min_gram", 3)
+                .put("index.analysis.tokenizer.trigram.max_gram", 3)
+                .put("index.analysis.filter.synonym.type", "synonym")
+                .putArray("index.analysis.filter.synonym.synonyms", "kimchy => shay")
+                .put("index.analysis.filter.synonym.tokenizer", "trigram")
+                .put("index.analysis.filter.synonym.min_gram", 3)
+                .put("index.analysis.filter.synonym.max_gram", 3).build();
         IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", indexSettings);
         environment = new Environment(settings);
         registry = new AnalysisModule(environment, emptyList()).getAnalysisRegistry();
@@ -168,6 +178,16 @@ public class TransportAnalyzeActionTests extends ESTestCase {
         assertEquals("brown", tokens.get(2).getTerm());
         assertEquals("fox", tokens.get(3).getTerm());
         assertEquals("dog", tokens.get(4).getTerm());
+
+        request.analyzer(null);
+        request.tokenizer("trigram");
+        request.addTokenFilter("synonym");
+        request.text("kimchy");
+        analyze = TransportAnalyzeAction.analyze(request, AllFieldMapper.NAME, null, analysisService, registry, environment);
+        tokens = analyze.getTokens();
+        assertEquals(2, tokens.size());
+        assertEquals("sha", tokens.get(0).getTerm());
+        assertEquals("hay", tokens.get(1).getTerm());
     }
 
     public void testGetIndexAnalyserWithoutAnalysisService() throws IOException {