3 years ago · 3c1b070329
--- a/docs/changelog/89016.yaml
+++ b/docs/changelog/89016.yaml
@@ -0,0 +1,6 @@
 
				+pr: 89016
			
 
				+summary: Avoid negative scores with `cross_fields` type
			
 
				+area: Ranking
			
 
				+type: bug
			
 
				+issues:
			
 
				+ - 44700
			
--- a/docs/reference/query-dsl/multi-match-query.asciidoc
+++ b/docs/reference/query-dsl/multi-match-query.asciidoc
@@ -388,11 +388,12 @@ explanation:
 
				 Also, accepts `analyzer`, `boost`, `operator`, `minimum_should_match`,
			
 
				 `lenient` and `zero_terms_query`.
			
 
				 
			
 
				-WARNING: The `cross_fields` type blends field statistics in a way that does
			
 
				-not always produce well-formed scores (for example scores can become
			
 
				-negative). As an alternative, you can consider the
			
 
				-<<query-dsl-combined-fields-query,`combined_fields`>> query, which is also
			
 
				-term-centric but combines field statistics in a more robust way.
			
 
				+WARNING: The `cross_fields` type blends field statistics in a complex way that
			
 
				+can be hard to interpret. The score combination can even be incorrect, in
			
 
				+particular when some documents contain some of the search fields, but not all
			
 
				+of them. You should consider the
			
 
				+<<query-dsl-combined-fields-query,`combined_fields`>> query as an alternative,
			
 
				+which is also term-centric but combines field statistics in a more robust way.
			
 
				 
			
 
				 [[cross-field-analysis]]
			
 
				 ===== `cross_field` and analysis
			
--- a/server/src/main/java/org/elasticsearch/lucene/queries/BlendedTermQuery.java
+++ b/server/src/main/java/org/elasticsearch/lucene/queries/BlendedTermQuery.java
@@ -148,7 +148,10 @@ public abstract class BlendedTermQuery extends Query {
 
				             if (prev > current) {
			
 
				                 actualDf++;
			
 
				             }
			
 
				-            contexts[i] = ctx = adjustDF(reader.getContext(), ctx, Math.min(maxDoc, actualDf));
			
 
				+
			
 
				+            int docCount = reader.getDocCount(terms[i].field());
			
 
				+            int newDocFreq = Math.min(actualDf, docCount);
			
 
				+            contexts[i] = ctx = adjustDF(reader.getContext(), ctx, newDocFreq);
			
 
				             prev = current;
			
 
				             sumTTF += ctx.totalTermFreq();
			
 
				         }
			
--- a/server/src/test/java/org/elasticsearch/lucene/queries/BlendedTermQueryTests.java
+++ b/server/src/test/java/org/elasticsearch/lucene/queries/BlendedTermQueryTests.java
@@ -248,6 +248,39 @@ public class BlendedTermQueryTests extends ESTestCase {
 
				         dir.close();
			
 
				     }
			
 
				 
			
 
				+    public void testMissingFields() throws IOException {
			
 
				+        Directory dir = newDirectory();
			
 
				+        IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
			
 
				+        FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
			
 
				+        ft.freeze();
			
 
				+
			
 
				+        for (int i = 0; i < 10; i++) {
			
 
				+            Document d = new Document();
			
 
				+            d.add(new TextField("id", Integer.toString(i), Field.Store.YES));
			
 
				+            d.add(new Field("dense", "foo", ft));
			
 
				+            // Add a sparse field with high totalTermFreq but low docCount
			
 
				+            if (i % 5 == 0) {
			
 
				+                d.add(new Field("sparse", "foo", ft));
			
 
				+                d.add(new Field("sparse", "one two three four five size", ft));
			
 
				+            }
			
 
				+            w.addDocument(d);
			
 
				+        }
			
 
				+        w.commit();
			
 
				+
			
 
				+        DirectoryReader reader = DirectoryReader.open(w);
			
 
				+        IndexSearcher searcher = setSimilarity(newSearcher(reader));
			
 
				+
			
 
				+        String[] fields = new String[] { "dense", "sparse" };
			
 
				+        Query query = BlendedTermQuery.dismaxBlendedQuery(toTerms(fields, "foo"), 0.1f);
			
 
				+        TopDocs search = searcher.search(query, 10);
			
 
				+        ScoreDoc[] scoreDocs = search.scoreDocs;
			
 
				+        assertEquals(Integer.toString(0), reader.document(scoreDocs[0].doc).getField("id").stringValue());
			
 
				+
			
 
				+        reader.close();
			
 
				+        w.close();
			
 
				+        dir.close();
			
 
				+    }
			
 
				+
			
 
				     public void testEqualsAndHash() {
			
 
				         String[] fields = new String[1 + random().nextInt(10)];
			
 
				         for (int i = 0; i < fields.length; i++) {