Browse Source

Update lucene to r1725675

Adds DFI (divergence from independence) provider.
Fixes test bugs passing invalid values for BM25 parameters.
Robert Muir 9 years ago
parent
commit
6e7e3a2274
50 changed files with 107 additions and 27 deletions
  1. 1 1
      buildSrc/version.properties
  2. 51 0
      core/src/main/java/org/elasticsearch/index/similarity/DFISimilarityProvider.java
  3. 1 0
      core/src/main/java/org/elasticsearch/index/similarity/SimilarityService.java
  4. 1 1
      core/src/main/resources/org/elasticsearch/bootstrap/security.policy
  5. 1 1
      core/src/main/resources/org/elasticsearch/bootstrap/test-framework.policy
  6. 2 0
      core/src/test/java/org/elasticsearch/index/analysis/AnalysisFactoryTests.java
  7. 21 2
      core/src/test/java/org/elasticsearch/index/similarity/SimilarityTests.java
  8. 1 1
      core/src/test/java/org/elasticsearch/similarity/SimilarityIT.java
  9. 0 1
      distribution/licenses/lucene-analyzers-common-5.5.0-snapshot-1721183.jar.sha1
  10. 1 0
      distribution/licenses/lucene-analyzers-common-5.5.0-snapshot-1725675.jar.sha1
  11. 0 1
      distribution/licenses/lucene-backward-codecs-5.5.0-snapshot-1721183.jar.sha1
  12. 1 0
      distribution/licenses/lucene-backward-codecs-5.5.0-snapshot-1725675.jar.sha1
  13. 0 1
      distribution/licenses/lucene-core-5.5.0-snapshot-1721183.jar.sha1
  14. 1 0
      distribution/licenses/lucene-core-5.5.0-snapshot-1725675.jar.sha1
  15. 0 1
      distribution/licenses/lucene-grouping-5.5.0-snapshot-1721183.jar.sha1
  16. 1 0
      distribution/licenses/lucene-grouping-5.5.0-snapshot-1725675.jar.sha1
  17. 0 1
      distribution/licenses/lucene-highlighter-5.5.0-snapshot-1721183.jar.sha1
  18. 1 0
      distribution/licenses/lucene-highlighter-5.5.0-snapshot-1725675.jar.sha1
  19. 0 1
      distribution/licenses/lucene-join-5.5.0-snapshot-1721183.jar.sha1
  20. 1 0
      distribution/licenses/lucene-join-5.5.0-snapshot-1725675.jar.sha1
  21. 0 1
      distribution/licenses/lucene-memory-5.5.0-snapshot-1721183.jar.sha1
  22. 1 0
      distribution/licenses/lucene-memory-5.5.0-snapshot-1725675.jar.sha1
  23. 0 1
      distribution/licenses/lucene-misc-5.5.0-snapshot-1721183.jar.sha1
  24. 1 0
      distribution/licenses/lucene-misc-5.5.0-snapshot-1725675.jar.sha1
  25. 0 1
      distribution/licenses/lucene-queries-5.5.0-snapshot-1721183.jar.sha1
  26. 1 0
      distribution/licenses/lucene-queries-5.5.0-snapshot-1725675.jar.sha1
  27. 0 1
      distribution/licenses/lucene-queryparser-5.5.0-snapshot-1721183.jar.sha1
  28. 1 0
      distribution/licenses/lucene-queryparser-5.5.0-snapshot-1725675.jar.sha1
  29. 0 1
      distribution/licenses/lucene-sandbox-5.5.0-snapshot-1721183.jar.sha1
  30. 1 0
      distribution/licenses/lucene-sandbox-5.5.0-snapshot-1725675.jar.sha1
  31. 0 1
      distribution/licenses/lucene-spatial-5.5.0-snapshot-1721183.jar.sha1
  32. 1 0
      distribution/licenses/lucene-spatial-5.5.0-snapshot-1725675.jar.sha1
  33. 0 1
      distribution/licenses/lucene-spatial3d-5.5.0-snapshot-1721183.jar.sha1
  34. 1 0
      distribution/licenses/lucene-spatial3d-5.5.0-snapshot-1725675.jar.sha1
  35. 0 1
      distribution/licenses/lucene-suggest-5.5.0-snapshot-1721183.jar.sha1
  36. 1 0
      distribution/licenses/lucene-suggest-5.5.0-snapshot-1725675.jar.sha1
  37. 7 0
      docs/reference/index-modules/similarity.asciidoc
  38. 1 1
      docs/reference/mapping/params/similarity.asciidoc
  39. 0 1
      modules/lang-expression/licenses/lucene-expressions-5.5.0-snapshot-1721183.jar.sha1
  40. 1 0
      modules/lang-expression/licenses/lucene-expressions-5.5.0-snapshot-1725675.jar.sha1
  41. 0 1
      plugins/analysis-icu/licenses/lucene-analyzers-icu-5.5.0-snapshot-1721183.jar.sha1
  42. 1 0
      plugins/analysis-icu/licenses/lucene-analyzers-icu-5.5.0-snapshot-1725675.jar.sha1
  43. 0 1
      plugins/analysis-kuromoji/licenses/lucene-analyzers-kuromoji-5.5.0-snapshot-1721183.jar.sha1
  44. 1 0
      plugins/analysis-kuromoji/licenses/lucene-analyzers-kuromoji-5.5.0-snapshot-1725675.jar.sha1
  45. 0 1
      plugins/analysis-phonetic/licenses/lucene-analyzers-phonetic-5.5.0-snapshot-1721183.jar.sha1
  46. 1 0
      plugins/analysis-phonetic/licenses/lucene-analyzers-phonetic-5.5.0-snapshot-1725675.jar.sha1
  47. 0 1
      plugins/analysis-smartcn/licenses/lucene-analyzers-smartcn-5.5.0-snapshot-1721183.jar.sha1
  48. 1 0
      plugins/analysis-smartcn/licenses/lucene-analyzers-smartcn-5.5.0-snapshot-1725675.jar.sha1
  49. 0 1
      plugins/analysis-stempel/licenses/lucene-analyzers-stempel-5.5.0-snapshot-1721183.jar.sha1
  50. 1 0
      plugins/analysis-stempel/licenses/lucene-analyzers-stempel-5.5.0-snapshot-1725675.jar.sha1

+ 1 - 1
buildSrc/version.properties

@@ -1,5 +1,5 @@
 elasticsearch     = 3.0.0-SNAPSHOT
-lucene            = 5.5.0-snapshot-1721183
+lucene            = 5.5.0-snapshot-1725675
 
 # optional dependencies
 spatial4j         = 0.5

+ 51 - 0
core/src/main/java/org/elasticsearch/index/similarity/DFISimilarityProvider.java

@@ -0,0 +1,51 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.index.similarity;
+
+import org.apache.lucene.search.similarities.DFISimilarity;
+import org.apache.lucene.search.similarities.Similarity;
+import org.elasticsearch.common.settings.Settings;
+
+/**
+ * {@link SimilarityProvider} for the {@link DFISimilarity}.
+ * <p>
+ * Configuration options available:
+ * <ul>
+ *     <li>discount_overlaps</li>
+ * </ul>
+ * @see DFISimilarity For more information about configuration
+ */
+public class DFISimilarityProvider extends AbstractSimilarityProvider {
+
+    private final DFISimilarity similarity;
+
+    public DFISimilarityProvider(String name, Settings settings) {
+        super(name);
+        boolean discountOverlaps = settings.getAsBoolean("discount_overlaps", true);
+
+        this.similarity = new DFISimilarity();
+        this.similarity.setDiscountOverlaps(discountOverlaps);
+    }
+
+    @Override
+    public Similarity get() {
+        return similarity;
+    }
+}

+ 1 - 0
core/src/main/java/org/elasticsearch/index/similarity/SimilarityService.java

@@ -52,6 +52,7 @@ public final class SimilarityService extends AbstractIndexComponent {
         buildIn.put("IB", IBSimilarityProvider::new);
         buildIn.put("LMDirichlet", LMDirichletSimilarityProvider::new);
         buildIn.put("LMJelinekMercer", LMJelinekMercerSimilarityProvider::new);
+        buildIn.put("DFI", DFISimilarityProvider::new);
         DEFAULTS = Collections.unmodifiableMap(defaults);
         BUILT_IN = Collections.unmodifiableMap(buildIn);
     }

+ 1 - 1
core/src/main/resources/org/elasticsearch/bootstrap/security.policy

@@ -31,7 +31,7 @@ grant codeBase "${codebase.securesm-1.0.jar}" {
 //// Very special jar permissions:
 //// These are dangerous permissions that we don't want to grant to everything.
 
-grant codeBase "${codebase.lucene-core-5.5.0-snapshot-1721183.jar}" {
+grant codeBase "${codebase.lucene-core-5.5.0-snapshot-1725675.jar}" {
   // needed to allow MMapDirectory's "unmap hack"
   permission java.lang.RuntimePermission "accessClassInPackage.sun.misc";
   permission java.lang.reflect.ReflectPermission "suppressAccessChecks";

+ 1 - 1
core/src/main/resources/org/elasticsearch/bootstrap/test-framework.policy

@@ -31,7 +31,7 @@ grant codeBase "${codebase.securemock-1.2.jar}" {
   permission java.lang.reflect.ReflectPermission "suppressAccessChecks";
 };
 
-grant codeBase "${codebase.lucene-test-framework-5.5.0-snapshot-1721183.jar}" {
+grant codeBase "${codebase.lucene-test-framework-5.5.0-snapshot-1725675.jar}" {
   // needed by RamUsageTester
   permission java.lang.reflect.ReflectPermission "suppressAccessChecks";
 };

+ 2 - 0
core/src/test/java/org/elasticsearch/index/analysis/AnalysisFactoryTests.java

@@ -179,6 +179,8 @@ public class AnalysisFactoryTests extends ESTestCase {
         put("typeaspayload",             Void.class);
         // fingerprint
         put("fingerprint",               Void.class);
+        // for tee-sinks
+        put("daterecognizer",            Void.class);
     }};
     
     public void testTokenFilters() {

+ 21 - 2
core/src/test/java/org/elasticsearch/index/similarity/SimilarityTests.java

@@ -20,6 +20,7 @@
 package org.elasticsearch.index.similarity;
 
 import org.apache.lucene.search.similarities.ClassicSimilarity;
+import org.apache.lucene.search.similarities.DFISimilarity;
 import org.apache.lucene.search.similarities.AfterEffectL;
 import org.apache.lucene.search.similarities.BM25Similarity;
 import org.apache.lucene.search.similarities.BasicModelG;
@@ -38,6 +39,7 @@ import org.elasticsearch.common.xcontent.XContentFactory;
 import org.elasticsearch.index.IndexService;
 import org.elasticsearch.index.mapper.DocumentMapper;
 import org.elasticsearch.index.mapper.DocumentMapperParser;
+import org.elasticsearch.index.mapper.MappedFieldType;
 import org.elasticsearch.index.mapper.MapperParsingException;
 import org.elasticsearch.plugins.Plugin;
 import org.elasticsearch.test.ESSingleNodeTestCase;
@@ -93,7 +95,7 @@ public class SimilarityTests extends ESSingleNodeTestCase {
         Settings indexSettings = Settings.settingsBuilder()
             .put("index.similarity.my_similarity.type", "BM25")
             .put("index.similarity.my_similarity.k1", 2.0f)
-            .put("index.similarity.my_similarity.b", 1.5f)
+            .put("index.similarity.my_similarity.b", 0.5f)
             .put("index.similarity.my_similarity.discount_overlaps", false)
             .build();
         IndexService indexService = createIndex("foo", indexSettings);
@@ -102,7 +104,7 @@ public class SimilarityTests extends ESSingleNodeTestCase {
 
         BM25Similarity similarity = (BM25Similarity) documentMapper.mappers().getMapper("field1").fieldType().similarity().get();
         assertThat(similarity.getK1(), equalTo(2.0f));
-        assertThat(similarity.getB(), equalTo(1.5f));
+        assertThat(similarity.getB(), equalTo(0.5f));
         assertThat(similarity.getDiscountOverlaps(), equalTo(false));
     }
 
@@ -156,6 +158,23 @@ public class SimilarityTests extends ESSingleNodeTestCase {
         assertThat(((NormalizationH2) similarity.getNormalization()).getC(), equalTo(3f));
     }
 
+    public void testResolveSimilaritiesFromMapping_DFI() throws IOException {
+        String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
+            .startObject("properties")
+            .startObject("field1").field("type", "string").field("similarity", "my_similarity").endObject()
+            .endObject()
+            .endObject().endObject().string();
+
+        Settings indexSettings = Settings.settingsBuilder()
+            .put("index.similarity.my_similarity.type", "DFI")
+            .build();
+        IndexService indexService = createIndex("foo", indexSettings);
+        DocumentMapper documentMapper = indexService.mapperService().documentMapperParser().parse("type", new CompressedXContent(mapping));
+        MappedFieldType fieldType = documentMapper.mappers().getMapper("field1").fieldType();
+        assertThat(fieldType.similarity(), instanceOf(DFISimilarityProvider.class));
+        assertThat(fieldType.similarity().get(), instanceOf(DFISimilarity.class));
+    }
+
     public void testResolveSimilaritiesFromMapping_LMDirichlet() throws IOException {
         String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
             .startObject("properties")

+ 1 - 1
core/src/test/java/org/elasticsearch/similarity/SimilarityIT.java

@@ -55,7 +55,7 @@ public class SimilarityIT extends ESIntegTestCase {
                         .put("index.number_of_replicas", 0)
                         .put("similarity.custom.type", "BM25")
                         .put("similarity.custom.k1", 2.0f)
-                        .put("similarity.custom.b", 1.5f)
+                        .put("similarity.custom.b", 0.5f)
                 ).execute().actionGet();
 
         client().prepareIndex("test", "type1", "1").setSource("field1", "the quick brown fox jumped over the lazy dog",

+ 0 - 1
distribution/licenses/lucene-analyzers-common-5.5.0-snapshot-1721183.jar.sha1

@@ -1 +0,0 @@
-69e187ef1d2d9c9570363eb4186821e0341df5b8

+ 1 - 0
distribution/licenses/lucene-analyzers-common-5.5.0-snapshot-1725675.jar.sha1

@@ -0,0 +1 @@
+528a695bb8882dbc3d9866335ac1bb3905cba4e3

+ 0 - 1
distribution/licenses/lucene-backward-codecs-5.5.0-snapshot-1721183.jar.sha1

@@ -1 +0,0 @@
-0fa00a45ff9bc6a4df44db81f2e4e44ea94bf88e

+ 1 - 0
distribution/licenses/lucene-backward-codecs-5.5.0-snapshot-1725675.jar.sha1

@@ -0,0 +1 @@
+3fb1bcc1001a10b74ae91848c8558572891c1409

+ 0 - 1
distribution/licenses/lucene-core-5.5.0-snapshot-1721183.jar.sha1

@@ -1 +0,0 @@
-f6854c65c7f4c6d9de583f4daa4fd3ae8a3800f1

+ 1 - 0
distribution/licenses/lucene-core-5.5.0-snapshot-1725675.jar.sha1

@@ -0,0 +1 @@
+9eff7f186877882f8b68f031f610bd7ab8c5c1fb

+ 0 - 1
distribution/licenses/lucene-grouping-5.5.0-snapshot-1721183.jar.sha1

@@ -1 +0,0 @@
-e996e6c723eb415ba2cfa7f5e98bbf194a4918dd

+ 1 - 0
distribution/licenses/lucene-grouping-5.5.0-snapshot-1725675.jar.sha1

@@ -0,0 +1 @@
+6e6253936522f27b35ba4d8485806f517ef2df45

+ 0 - 1
distribution/licenses/lucene-highlighter-5.5.0-snapshot-1721183.jar.sha1

@@ -1 +0,0 @@
-3b7a5d97b10885f16eb53deb15d64c942b9f9fdb

+ 1 - 0
distribution/licenses/lucene-highlighter-5.5.0-snapshot-1725675.jar.sha1

@@ -0,0 +1 @@
+8a313aa34b0070d3f7d48005e7677b680db1b09d

+ 0 - 1
distribution/licenses/lucene-join-5.5.0-snapshot-1721183.jar.sha1

@@ -1 +0,0 @@
-e4dda3eeb76e340aa4713a3b20d68c4a1504e505

+ 1 - 0
distribution/licenses/lucene-join-5.5.0-snapshot-1725675.jar.sha1

@@ -0,0 +1 @@
+bf4c5a17cfb265d321ef4cfb0f3d7c1a6a6651de

+ 0 - 1
distribution/licenses/lucene-memory-5.5.0-snapshot-1721183.jar.sha1

@@ -1 +0,0 @@
-800442a5d7612ce4c8748831871b4d436a50554e

+ 1 - 0
distribution/licenses/lucene-memory-5.5.0-snapshot-1725675.jar.sha1

@@ -0,0 +1 @@
+2713a319d0aa696c65a32a36fda830bc482a5880

+ 0 - 1
distribution/licenses/lucene-misc-5.5.0-snapshot-1721183.jar.sha1

@@ -1 +0,0 @@
-bdf184de9b5773c7af3ae908af78eeb1e512470c

+ 1 - 0
distribution/licenses/lucene-misc-5.5.0-snapshot-1725675.jar.sha1

@@ -0,0 +1 @@
+88251ecdbf877c15a94d4013aa5157f5b5ce4cea

+ 0 - 1
distribution/licenses/lucene-queries-5.5.0-snapshot-1721183.jar.sha1

@@ -1 +0,0 @@
-fc59de52bd2c7e420edfd235723cb8b0dd44e92d

+ 1 - 0
distribution/licenses/lucene-queries-5.5.0-snapshot-1725675.jar.sha1

@@ -0,0 +1 @@
+bf9e522244c7c4eee6c3bcc3212ff057f7b88000

+ 0 - 1
distribution/licenses/lucene-queryparser-5.5.0-snapshot-1721183.jar.sha1

@@ -1 +0,0 @@
-1d341e6a4f11f3170773ccffdbe6815b45967e3d

+ 1 - 0
distribution/licenses/lucene-queryparser-5.5.0-snapshot-1725675.jar.sha1

@@ -0,0 +1 @@
+12d71cf10a4b79231dc488af16d723dfca5ab64b

+ 0 - 1
distribution/licenses/lucene-sandbox-5.5.0-snapshot-1721183.jar.sha1

@@ -1 +0,0 @@
-a1b02c2b595ac92f45f0d2be03841a3a7fcae1f1

+ 1 - 0
distribution/licenses/lucene-sandbox-5.5.0-snapshot-1725675.jar.sha1

@@ -0,0 +1 @@
+f903d67d042904527a7e2e8a75c55afe36a04251

+ 0 - 1
distribution/licenses/lucene-spatial-5.5.0-snapshot-1721183.jar.sha1

@@ -1 +0,0 @@
-e3ea422b56734329fb6974e9cf9f66478adb5793

+ 1 - 0
distribution/licenses/lucene-spatial-5.5.0-snapshot-1725675.jar.sha1

@@ -0,0 +1 @@
+2f5758bbcf97048ab62d2d4ae73867d06f1ed03f

+ 0 - 1
distribution/licenses/lucene-spatial3d-5.5.0-snapshot-1721183.jar.sha1

@@ -1 +0,0 @@
-5eadbd4e63120b59ab6445e39489205f98420471

+ 1 - 0
distribution/licenses/lucene-spatial3d-5.5.0-snapshot-1725675.jar.sha1

@@ -0,0 +1 @@
+2cc29e4658be151658fac6e5ed7915982b6de861

+ 0 - 1
distribution/licenses/lucene-suggest-5.5.0-snapshot-1721183.jar.sha1

@@ -1 +0,0 @@
-a336287e65d082535f02a8427666dbe46b1b9b74

+ 1 - 0
distribution/licenses/lucene-suggest-5.5.0-snapshot-1725675.jar.sha1

@@ -0,0 +1 @@
+f490a09ca056aba42e8751a469ef114df64aae0d

+ 7 - 0
docs/reference/index-modules/similarity.asciidoc

@@ -107,6 +107,13 @@ All options but the first option need a normalization value.
 
 Type name: `DFR`
 
+[float]
+[[dfi]]
+==== DFI similarity
+
+Similarity that implements the http://trec.nist.gov/pubs/trec21/papers/irra.web.nb.pdf[divergence from independence] 
+model (normalized chi-squared distance)
+
 [float]
 [[ib]]
 ==== IB similarity.

+ 1 - 1
docs/reference/mapping/params/similarity.asciidoc

@@ -8,7 +8,7 @@ algorithm other than the default TF/IDF, such as `BM25`.
 Similarities are mostly useful for <<string,`string`>> fields, especially
 `analyzed` string fields, but can also apply to other field types.
 
-Custom similarites can be configured by tuning the parameters of the built-in
+Custom similarities can be configured by tuning the parameters of the built-in
 similarities. For more details about this expert options, see the
 <<index-modules-similarity,similarity module>>.
 

+ 0 - 1
modules/lang-expression/licenses/lucene-expressions-5.5.0-snapshot-1721183.jar.sha1

@@ -1 +0,0 @@
-60e056d2dd04a81440482b047af0737bc41593d9

+ 1 - 0
modules/lang-expression/licenses/lucene-expressions-5.5.0-snapshot-1725675.jar.sha1

@@ -0,0 +1 @@
+31db8e49e4089772eae8ab2db0ac59bab6fbcd2f

+ 0 - 1
plugins/analysis-icu/licenses/lucene-analyzers-icu-5.5.0-snapshot-1721183.jar.sha1

@@ -1 +0,0 @@
-1fce4e9b5c4482bb95e8b275c825d112640d6f1e

+ 1 - 0
plugins/analysis-icu/licenses/lucene-analyzers-icu-5.5.0-snapshot-1725675.jar.sha1

@@ -0,0 +1 @@
+4504d3d993f094ed70585124df56c2be86c2615a

+ 0 - 1
plugins/analysis-kuromoji/licenses/lucene-analyzers-kuromoji-5.5.0-snapshot-1721183.jar.sha1

@@ -1 +0,0 @@
-f104f306fef9d3033db026705043e9cbd145aba5

+ 1 - 0
plugins/analysis-kuromoji/licenses/lucene-analyzers-kuromoji-5.5.0-snapshot-1725675.jar.sha1

@@ -0,0 +1 @@
+15555d41d27bb398b6736be85a5eca4ca224b85d

+ 0 - 1
plugins/analysis-phonetic/licenses/lucene-analyzers-phonetic-5.5.0-snapshot-1721183.jar.sha1

@@ -1 +0,0 @@
-40b2034a6aed4c3fe0509016fab4f7bbb37a5fc8

+ 1 - 0
plugins/analysis-phonetic/licenses/lucene-analyzers-phonetic-5.5.0-snapshot-1725675.jar.sha1

@@ -0,0 +1 @@
+9d43a338338a6c88e8071a0e3eeb51f4d9d0364a

+ 0 - 1
plugins/analysis-smartcn/licenses/lucene-analyzers-smartcn-5.5.0-snapshot-1721183.jar.sha1

@@ -1 +0,0 @@
-e117a87f4338be80b0a052d2ce454d5086aa57f1

+ 1 - 0
plugins/analysis-smartcn/licenses/lucene-analyzers-smartcn-5.5.0-snapshot-1725675.jar.sha1

@@ -0,0 +1 @@
+b66c95032c5ca41ce7b85519c64aab4e9a233f78

+ 0 - 1
plugins/analysis-stempel/licenses/lucene-analyzers-stempel-5.5.0-snapshot-1721183.jar.sha1

@@ -1 +0,0 @@
-703dd91fccdc1c4662c80e412a449097c0578d83

+ 1 - 0
plugins/analysis-stempel/licenses/lucene-analyzers-stempel-5.5.0-snapshot-1725675.jar.sha1

@@ -0,0 +1 @@
+4f41bacd77ce372f10f2c57ab516b2ce9aa71173