Browse Source

Merge pull request #18948 from jimferenczi/bm25

Change default similarity to BM25
Jim Ferenczi 9 years ago
parent
commit
cc91014dee

+ 3 - 3
core/src/main/java/org/elasticsearch/index/similarity/SimilarityService.java

@@ -36,7 +36,7 @@ import java.util.function.BiFunction;
 
 public final class SimilarityService extends AbstractIndexComponent {
 
-    public final static String DEFAULT_SIMILARITY = "classic";
+    public final static String DEFAULT_SIMILARITY = "BM25";
     private final Similarity defaultSimilarity;
     private final Similarity baseSimilarity;
     private final Map<String, SimilarityProvider> similarities;
@@ -121,8 +121,8 @@ public final class SimilarityService extends AbstractIndexComponent {
         return similarities.get(name);
     }
 
-    public SimilarityProvider getDefaultSimilarity() {
-        return similarities.get("default");
+    Similarity getDefaultSimilarity() {
+        return defaultSimilarity;
     }
 
     static class PerFieldSimilarity extends PerFieldSimilarityWrapper {

+ 13 - 3
core/src/test/java/org/elasticsearch/index/similarity/SimilarityServiceTests.java

@@ -18,6 +18,8 @@
  */
 package org.elasticsearch.index.similarity;
 
+import org.apache.lucene.search.similarities.BM25Similarity;
+import org.apache.lucene.search.similarities.ClassicSimilarity;
 import org.elasticsearch.Version;
 import org.elasticsearch.cluster.metadata.IndexMetaData;
 import org.elasticsearch.common.settings.Settings;
@@ -27,7 +29,15 @@ import org.elasticsearch.test.IndexSettingsModule;
 
 import java.util.Collections;
 
+import static org.hamcrest.Matchers.instanceOf;
+
 public class SimilarityServiceTests extends ESTestCase {
+    public void testDefaultSimilarity() {
+        Settings settings = Settings.builder().build();
+        IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("test", settings);
+        SimilarityService service = new SimilarityService(indexSettings, Collections.emptyMap());
+        assertThat(service.getDefaultSimilarity(), instanceOf(BM25Similarity.class));
+    }
 
     // Tests #16594
     public void testOverrideBuiltInSimilarity() {
@@ -53,10 +63,10 @@ public class SimilarityServiceTests extends ESTestCase {
     }
 
     // Tests #16594
-    public void testDefaultSimilarity() {
-        Settings settings = Settings.builder().put("index.similarity.default.type", "BM25").build();
+    public void testOverrideDefaultSimilarity() {
+        Settings settings = Settings.builder().put("index.similarity.default.type", "classic").build();
         IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("test", settings);
         SimilarityService service = new SimilarityService(indexSettings, Collections.emptyMap());
-        assertTrue(service.getDefaultSimilarity() instanceof BM25SimilarityProvider);
+        assertTrue(service.getDefaultSimilarity() instanceof ClassicSimilarity);
     }
 }

+ 17 - 17
docs/reference/index-modules/similarity.asciidoc

@@ -47,25 +47,11 @@ Here we configure the DFRSimilarity so it can be referenced as
 [float]
 === Available similarities
 
-[float]
-[[classic-similarity]]
-==== Classic similarity
-
-The classic similarity that is based on the TF/IDF model. This
-similarity has the following option:
-
-`discount_overlaps`::
-    Determines whether overlap tokens (Tokens with
-    0 position increment) are ignored when computing norm. By default this
-    is true, meaning overlap tokens do not count when computing norms.
-
-Type name: `classic`
-
 [float]
 [[bm25]]
-==== BM25 similarity
+==== BM25 similarity (*default*)
 
-Another TF/IDF based similarity that has built-in tf normalization and
+TF/IDF based similarity that has built-in tf normalization and
 is supposed to work better for short fields (like names). See
 http://en.wikipedia.org/wiki/Okapi_BM25[Okapi_BM25] for more details.
 This similarity has the following options:
@@ -86,6 +72,20 @@ This similarity has the following options:
 
 Type name: `BM25`
 
+[float]
+[[classic-similarity]]
+==== Classic similarity
+
+The classic similarity that is based on the TF/IDF model. This
+similarity has the following option:
+
+`discount_overlaps`::
+    Determines whether overlap tokens (Tokens with
+    0 position increment) are ignored when computing norm. By default this
+    is true, meaning overlap tokens do not count when computing norms.
+
+Type name: `classic`
+
 [float]
 [[drf]]
 ==== DFR similarity
@@ -178,5 +178,5 @@ You can change the default similarity for all fields by putting the following se
 
 [source,js]
 --------------------------------------------------
-index.similarity.default.type: BM25
+index.similarity.default.type: classic
 --------------------------------------------------

+ 4 - 0
docs/reference/migration/migrate_5_0/search.asciidoc

@@ -196,3 +196,7 @@ The <<search-request-preference,search preference>> `_prefer_node` has
 been superseded by `_prefer_nodes`. By specifying a single node,
 `_prefer_nodes` provides the same functionality as `_prefer_node` but
 also supports specifying multiple nodes.
+
+==== Default similarity
+
+The default similarity has been changed to `BM25`.