2 years ago · a8cf4d6006
--- a/docs/changelog/96588.yaml
+++ b/docs/changelog/96588.yaml
@@ -0,0 +1,5 @@
 
				+pr: 96588
			
 
				+summary: Support for patter_replace filter in keyword normalizer
			
 
				+area: Search
			
 
				+type: enhancement
			
 
				+issues: []
			
--- a/docs/reference/analysis/normalizers.asciidoc
+++ b/docs/reference/analysis/normalizers.asciidoc
@@ -9,7 +9,7 @@ allowed, but not a stemming filter, which needs to look at the keyword as a
 
				 whole. The current list of filters that can be used in a normalizer is
			
 
				 following: `arabic_normalization`, `asciifolding`, `bengali_normalization`,
			
 
				 `cjk_width`, `decimal_digit`, `elision`, `german_normalization`,
			
 
				-`hindi_normalization`, `indic_normalization`, `lowercase`,
			
 
				+`hindi_normalization`, `indic_normalization`, `lowercase`, `pattern_replace`,
			
 
				 `persian_normalization`, `scandinavian_folding`, `serbian_normalization`,
			
 
				 `sorani_normalization`, `uppercase`.
			
 
				 
			
--- a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/PatternReplaceTokenFilterFactory.java
+++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/PatternReplaceTokenFilterFactory.java
@@ -15,10 +15,11 @@ import org.elasticsearch.common.settings.Settings;
 
				 import org.elasticsearch.env.Environment;
			
 
				 import org.elasticsearch.index.IndexSettings;
			
 
				 import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
			
 
				+import org.elasticsearch.index.analysis.NormalizingTokenFilterFactory;
			
 
				 
			
 
				 import java.util.regex.Pattern;
			
 
				 
			
 
				-public class PatternReplaceTokenFilterFactory extends AbstractTokenFilterFactory {
			
 
				+public class PatternReplaceTokenFilterFactory extends AbstractTokenFilterFactory implements NormalizingTokenFilterFactory {
			
 
				 
			
 
				     private final Pattern pattern;
			
 
				     private final String replacement;
			
--- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/PatternReplaceTokenFilterTests.java
+++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/PatternReplaceTokenFilterTests.java
@@ -0,0 +1,41 @@
 
				+/*
			
 
				+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
			
 
				+ * or more contributor license agreements. Licensed under the Elastic License
			
 
				+ * 2.0 and the Server Side Public License, v 1; you may not use this file except
			
 
				+ * in compliance with, at your election, the Elastic License 2.0 or the Server
			
 
				+ * Side Public License, v 1.
			
 
				+ */
			
 
				+
			
 
				+package org.elasticsearch.analysis.common;
			
 
				+
			
 
				+import org.apache.lucene.util.BytesRef;
			
 
				+import org.elasticsearch.common.settings.Settings;
			
 
				+import org.elasticsearch.env.Environment;
			
 
				+import org.elasticsearch.index.analysis.AnalysisTestsHelper;
			
 
				+import org.elasticsearch.index.analysis.NamedAnalyzer;
			
 
				+import org.elasticsearch.test.ESTestCase;
			
 
				+import org.elasticsearch.test.ESTokenStreamTestCase;
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+
			
 
				+public class PatternReplaceTokenFilterTests extends ESTokenStreamTestCase {
			
 
				+
			
 
				+    public void testNormalizer() throws IOException {
			
 
				+        Settings settings = Settings.builder()
			
 
				+            .putList("index.analysis.normalizer.my_normalizer.filter", "replace_zeros")
			
 
				+            .put("index.analysis.filter.replace_zeros.type", "pattern_replace")
			
 
				+            .put("index.analysis.filter.replace_zeros.pattern", "0+")
			
 
				+            .put("index.analysis.filter.replace_zeros.replacement", "")
			
 
				+            .put("index.analysis.filter.replace_zeros.all", true)
			
 
				+            .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
			
 
				+            .build();
			
 
				+        ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings, new CommonAnalysisPlugin());
			
 
				+        assertNull(analysis.indexAnalyzers.get("my_normalizer"));
			
 
				+        NamedAnalyzer normalizer = analysis.indexAnalyzers.getNormalizer("my_normalizer");
			
 
				+        assertNotNull(normalizer);
			
 
				+        assertEquals("my_normalizer", normalizer.name());
			
 
				+        assertTokenStreamContents(normalizer.tokenStream("foo", "0000111"), new String[] { "111" });
			
 
				+        assertEquals(new BytesRef("111"), normalizer.normalize("foo", "0000111"));
			
 
				+    }
			
 
				+
			
 
				+}
			
--- a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/analysis-common/40_token_filters.yml
+++ b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/analysis-common/40_token_filters.yml
@@ -1683,3 +1683,44 @@
 
				     - length: { tokens: 6 }
			
 
				     - match: { tokens.0.token: the }
			
 
				     - match: { tokens.1.token: THE }
			
 
				+
			
 
				+---
			
 
				+"pattern_replace_filter":
			
 
				+  - do:
			
 
				+      indices.create:
			
 
				+        index: test
			
 
				+        body:
			
 
				+          settings:
			
 
				+            analysis:
			
 
				+              normalizer:
			
 
				+                my_normalizer:
			
 
				+                  type: custom
			
 
				+                  filter: ["replace_zeros"]
			
 
				+              filter:
			
 
				+                replace_zeros:
			
 
				+                  type: pattern_replace
			
 
				+                  pattern: "0+"
			
 
				+                  replacement: ""
			
 
				+                  all: true
			
 
				+          mappings:
			
 
				+            properties:
			
 
				+              pagerank:
			
 
				+                type: keyword
			
 
				+                normalizer: my_normalizer
			
 
				+
			
 
				+  - do:
			
 
				+      index:
			
 
				+        index:  test
			
 
				+        id:     "1"
			
 
				+        body:   { pagerank: "000000111"}
			
 
				+
			
 
				+  - do:
			
 
				+      indices.refresh:
			
 
				+        index: [ test ]
			
 
				+
			
 
				+  - do:
			
 
				+      search:
			
 
				+        index: test
			
 
				+        q: pagerank:111
			
 
				+
			
 
				+  - match: {hits.total.value: 1}