Browse Source

Finish exposing FlattenGraphTokenFilter (#22667)

Michael McCandless 8 years ago
parent
commit
1d1bdd476c

+ 5 - 5
core/src/main/java/org/elasticsearch/index/analysis/AnalysisRegistry.java

@@ -166,8 +166,8 @@ public final class AnalysisRegistry implements Closeable {
          * instead of building the infrastructure for plugins we rather make it a real exception to not pollute the general interface and
          * hide internal data-structures as much as possible.
          */
-        tokenFilters.put("synonym", requriesAnalysisSettings((is, env, name, settings) -> new SynonymTokenFilterFactory(is, env, this, name, settings)));
-        tokenFilters.put("synonym_graph", requriesAnalysisSettings((is, env, name, settings) -> new SynonymGraphTokenFilterFactory(is, env, this, name, settings)));
+        tokenFilters.put("synonym", requiresAnalysisSettings((is, env, name, settings) -> new SynonymTokenFilterFactory(is, env, this, name, settings)));
+        tokenFilters.put("synonym_graph", requiresAnalysisSettings((is, env, name, settings) -> new SynonymGraphTokenFilterFactory(is, env, this, name, settings)));
         return buildMapping(Component.FILTER, indexSettings, tokenFiltersSettings, Collections.unmodifiableMap(tokenFilters), prebuiltAnalysis.tokenFilterFactories);
     }
 
@@ -229,9 +229,9 @@ public final class AnalysisRegistry implements Closeable {
              * hide internal data-structures as much as possible.
              */
             if ("synonym".equals(typeName)) {
-                return requriesAnalysisSettings((is, env, name, settings) -> new SynonymTokenFilterFactory(is, env, this, name, settings));
+                return requiresAnalysisSettings((is, env, name, settings) -> new SynonymTokenFilterFactory(is, env, this, name, settings));
             } else if ("synonym_graph".equals(typeName)) {
-                return requriesAnalysisSettings((is, env, name, settings) -> new SynonymGraphTokenFilterFactory(is, env, this, name, settings));
+                return requiresAnalysisSettings((is, env, name, settings) -> new SynonymGraphTokenFilterFactory(is, env, this, name, settings));
             } else {
                 return getAnalysisProvider(Component.FILTER, tokenFilters, tokenFilter, typeName);
             }
@@ -258,7 +258,7 @@ public final class AnalysisRegistry implements Closeable {
         }
     }
 
-    private static <T> AnalysisModule.AnalysisProvider<T> requriesAnalysisSettings(AnalysisModule.AnalysisProvider<T> provider) {
+    private static <T> AnalysisModule.AnalysisProvider<T> requiresAnalysisSettings(AnalysisModule.AnalysisProvider<T> provider) {
         return new AnalysisModule.AnalysisProvider<T>() {
             @Override
             public T get(IndexSettings indexSettings, Environment environment, String name, Settings settings) throws IOException {

+ 2 - 0
core/src/main/java/org/elasticsearch/indices/analysis/AnalysisModule.java

@@ -60,6 +60,7 @@ import org.elasticsearch.index.analysis.EnglishAnalyzerProvider;
 import org.elasticsearch.index.analysis.FingerprintAnalyzerProvider;
 import org.elasticsearch.index.analysis.FingerprintTokenFilterFactory;
 import org.elasticsearch.index.analysis.FinnishAnalyzerProvider;
+import org.elasticsearch.index.analysis.FlattenGraphTokenFilterFactory;
 import org.elasticsearch.index.analysis.FrenchAnalyzerProvider;
 import org.elasticsearch.index.analysis.FrenchStemTokenFilterFactory;
 import org.elasticsearch.index.analysis.GalicianAnalyzerProvider;
@@ -226,6 +227,7 @@ public final class AnalysisModule {
         tokenFilters.register("word_delimiter", WordDelimiterTokenFilterFactory::new);
         tokenFilters.register("delimited_payload_filter", DelimitedPayloadTokenFilterFactory::new);
         tokenFilters.register("elision", ElisionTokenFilterFactory::new);
+        tokenFilters.register("flatten_graph", FlattenGraphTokenFilterFactory::new);
         tokenFilters.register("keep", requriesAnalysisSettings(KeepWordFilterFactory::new));
         tokenFilters.register("keep_types", requriesAnalysisSettings(KeepTypesFilterFactory::new));
         tokenFilters.register("pattern_capture", requriesAnalysisSettings(PatternCaptureGroupTokenFilterFactory::new));

+ 2 - 0
docs/reference/analysis/tokenfilters.asciidoc

@@ -13,6 +13,8 @@ include::tokenfilters/standard-tokenfilter.asciidoc[]
 
 include::tokenfilters/asciifolding-tokenfilter.asciidoc[]
 
+include::tokenfilters/flatten-graph-tokenfilter.asciidoc[]
+
 include::tokenfilters/length-tokenfilter.asciidoc[]
 
 include::tokenfilters/lowercase-tokenfilter.asciidoc[]

+ 20 - 0
docs/reference/analysis/tokenfilters/flatten-graph-tokenfilter.asciidoc

@@ -0,0 +1,20 @@
+[[analysis-flatten-graph-tokenfilter]]
+=== Flatten Graph Token Filter
+
+experimental[]
+
+The `flatten_graph` token filter accepts an arbitrary graph token
+stream, such as that produced by
+<<analysis-synonym-graph-tokenfilter>>, and flattens it into a single
+linear chain of tokens suitable for indexing.
+
+This is a lossy process, as separate side paths are squashed on top of
+one another, but it is necessary if you use a graph token stream
+during indexing because a Lucene index cannot currently represent a
+graph.  For this reason, it's best to apply graph analyzers only at
+search time because that preserves the full graph structure and gives
+correct matches for proximity queries.
+
+For more information on this topic and its various complexities,
+please read the http://blog.mikemccandless.com/2012/04/lucenes-tokenstreams-are-actually.html[Lucene's
+TokenStreams are actually graphs] blog post.

+ 2 - 3
docs/reference/analysis/tokenfilters/synonym-graph-tokenfilter.asciidoc

@@ -8,9 +8,8 @@ including multi-word synonyms correctly during the analysis process.
 
 In order to properly handle multi-word synonyms this token filter
 creates a "graph token stream" during processing.  For more information
-on this topic and it's various complexities, please read
-http://blog.mikemccandless.com/2012/04/lucenes-tokenstreams-are-actually.html[Lucene's TokenStreams are actually graphs!]
-by Michael McCandless.
+on this topic and its various complexities, please read the
+http://blog.mikemccandless.com/2012/04/lucenes-tokenstreams-are-actually.html[Lucene's TokenStreams are actually graphs] blog post.
 
 ["NOTE",id="synonym-graph-index-note"]
 ===============================