Browse Source

Deprecate dutch_kp and lovins stemmer as they are removed in Lucene 10 (#113143) (#113815)

Lucene 10 has upgraded its Snowball stemming support, as part of those
upgrades, two no longer supported stemmers were removed, `KpStemmer` and
`LovinsStemmer`. These are `dutch_kp` and `lovins`, respectively.

We will deprecate in 8.16 and will remove support for these in a future
version.

Co-authored-by: Elastic Machine <elasticmachine@users.noreply.github.com>
Benjamin Trent 1 year ago
parent
commit
6902296282

+ 10 - 0
docs/changelog/113143.yaml

@@ -0,0 +1,10 @@
+pr: 113143
+summary: Deprecate dutch_kp and lovins stemmer as they are removed in Lucene 10
+area: Analysis
+type: deprecation
+issues: []
+deprecation:
+  title: Deprecate dutch_kp and lovins stemmer as they are removed in Lucene 10
+  area: Analysis
+  details: kp, dutch_kp, dutchKp and lovins stemmers are deprecated and will be removed.
+  impact: These stemmers will be removed and will be no longer supported.

+ 3 - 1
docs/reference/analysis/tokenfilters/snowball-tokenfilter.asciidoc

@@ -11,6 +11,8 @@ values: `Arabic`, `Armenian`, `Basque`, `Catalan`, `Danish`, `Dutch`, `English`,
 `Lithuanian`, `Lovins`, `Norwegian`, `Porter`, `Portuguese`, `Romanian`,
 `Russian`, `Serbian`, `Spanish`, `Swedish`, `Turkish`.
 
+deprecated:[8.16.0, `Kp` and `Lovins` support will be removed in a future version]
+
 For example:
 
 [source,console]
@@ -28,7 +30,7 @@ PUT /my-index-000001
       "filter": {
         "my_snow": {
           "type": "snowball",
-          "language": "Lovins"
+          "language": "English"
         }
       }
     }

+ 2 - 2
docs/reference/analysis/tokenfilters/stemmer-tokenfilter.asciidoc

@@ -144,12 +144,12 @@ https://snowballstem.org/algorithms/danish/stemmer.html[*`danish`*]
 
 Dutch::
 https://snowballstem.org/algorithms/dutch/stemmer.html[*`dutch`*],
-https://snowballstem.org/algorithms/kraaij_pohlmann/stemmer.html[`dutch_kp`]
+https://snowballstem.org/algorithms/kraaij_pohlmann/stemmer.html[`dutch_kp`] deprecated:[8.16.0, `dutch_kp` will be removed in a future version]
 
 English::
 https://snowballstem.org/algorithms/porter/stemmer.html[*`english`*],
 https://ciir.cs.umass.edu/pubfiles/ir-35.pdf[`light_english`],
-https://snowballstem.org/algorithms/lovins/stemmer.html[`lovins`],
+https://snowballstem.org/algorithms/lovins/stemmer.html[`lovins`] deprecated:[8.16.0, `lovins` will be removed in a future version],
 https://www.researchgate.net/publication/220433848_How_effective_is_suffixing[`minimal_english`],
 https://snowballstem.org/algorithms/english/stemmer.html[`porter2`],
 {lucene-analysis-docs}/en/EnglishPossessiveFilter.html[`possessive_english`]

+ 18 - 0
modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/StemmerTokenFilterFactory.java

@@ -47,6 +47,8 @@ import org.apache.lucene.analysis.ru.RussianLightStemFilter;
 import org.apache.lucene.analysis.snowball.SnowballFilter;
 import org.apache.lucene.analysis.sv.SwedishLightStemFilter;
 import org.elasticsearch.common.Strings;
+import org.elasticsearch.common.logging.DeprecationCategory;
+import org.elasticsearch.common.logging.DeprecationLogger;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.env.Environment;
 import org.elasticsearch.index.IndexSettings;
@@ -81,6 +83,8 @@ import java.io.IOException;
 
 public class StemmerTokenFilterFactory extends AbstractTokenFilterFactory {
 
+    private static final DeprecationLogger deprecationLogger = DeprecationLogger.getLogger(StemmerTokenFilterFactory.class);
+
     private static final TokenStream EMPTY_TOKEN_STREAM = new EmptyTokenStream();
 
     private String language;
@@ -90,6 +94,20 @@ public class StemmerTokenFilterFactory extends AbstractTokenFilterFactory {
         this.language = Strings.capitalize(settings.get("language", settings.get("name", "porter")));
         // check that we have a valid language by trying to create a TokenStream
         create(EMPTY_TOKEN_STREAM).close();
+        if ("lovins".equalsIgnoreCase(language)) {
+            deprecationLogger.critical(
+                DeprecationCategory.ANALYSIS,
+                "lovins_deprecation",
+                "The [lovins] stemmer is deprecated and will be removed in a future version."
+            );
+        }
+        if ("dutch_kp".equalsIgnoreCase(language) || "dutchKp".equalsIgnoreCase(language) || "kp".equalsIgnoreCase(language)) {
+            deprecationLogger.critical(
+                DeprecationCategory.ANALYSIS,
+                "dutch_kp_deprecation",
+                "The [dutch_kp] stemmer is deprecated and will be removed in a future version."
+            );
+        }
     }
 
     @Override

+ 26 - 1
modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/StemmerTokenFilterFactoryTests.java

@@ -32,7 +32,6 @@ import static org.elasticsearch.cluster.metadata.IndexMetadata.SETTING_VERSION_C
 import static org.hamcrest.Matchers.instanceOf;
 
 public class StemmerTokenFilterFactoryTests extends ESTokenStreamTestCase {
-
     private static final CommonAnalysisPlugin PLUGIN = new CommonAnalysisPlugin();
 
     public void testEnglishFilterFactory() throws IOException {
@@ -103,4 +102,30 @@ public class StemmerTokenFilterFactoryTests extends ESTokenStreamTestCase {
         );
         assertEquals("Invalid stemmer class specified: [english, light_english]", e.getMessage());
     }
+
+    public void testKpDeprecation() throws IOException {
+        IndexVersion v = IndexVersionUtils.randomVersion(random());
+        Settings settings = Settings.builder()
+            .put("index.analysis.filter.my_kp.type", "stemmer")
+            .put("index.analysis.filter.my_kp.language", "kp")
+            .put(SETTING_VERSION_CREATED, v)
+            .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
+            .build();
+
+        AnalysisTestsHelper.createTestAnalysisFromSettings(settings, PLUGIN);
+        assertCriticalWarnings("The [dutch_kp] stemmer is deprecated and will be removed in a future version.");
+    }
+
+    public void testLovinsDeprecation() throws IOException {
+        IndexVersion v = IndexVersionUtils.randomVersion(random());
+        Settings settings = Settings.builder()
+            .put("index.analysis.filter.my_lovins.type", "stemmer")
+            .put("index.analysis.filter.my_lovins.language", "lovins")
+            .put(SETTING_VERSION_CREATED, v)
+            .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
+            .build();
+
+        AnalysisTestsHelper.createTestAnalysisFromSettings(settings, PLUGIN);
+        assertCriticalWarnings("The [lovins] stemmer is deprecated and will be removed in a future version.");
+    }
 }