|
@@ -19,8 +19,20 @@
|
|
|
|
|
|
package org.apache.lucene.queries;
|
|
|
|
|
|
+import org.apache.lucene.index.IndexReader;
|
|
|
+import org.apache.lucene.index.Term;
|
|
|
+import org.apache.lucene.index.TermContext;
|
|
|
+import org.apache.lucene.search.BooleanClause;
|
|
|
import org.apache.lucene.search.BooleanClause.Occur;
|
|
|
+import org.apache.lucene.search.BooleanQuery;
|
|
|
+import org.apache.lucene.search.Query;
|
|
|
+import org.apache.lucene.search.TermQuery;
|
|
|
+import org.apache.lucene.util.Version;
|
|
|
+import org.elasticsearch.common.lucene.Lucene;
|
|
|
import org.elasticsearch.common.lucene.search.Queries;
|
|
|
+import org.elasticsearch.index.mapper.FieldMapper;
|
|
|
+
|
|
|
+import java.io.IOException;
|
|
|
|
|
|
/**
|
|
|
* Extended version of {@link CommonTermsQuery} that allows to pass in a
|
|
@@ -29,12 +41,11 @@ import org.elasticsearch.common.lucene.search.Queries;
|
|
|
*/
|
|
|
public class ExtendedCommonTermsQuery extends CommonTermsQuery {
|
|
|
|
|
|
- public ExtendedCommonTermsQuery(Occur highFreqOccur, Occur lowFreqOccur, float maxTermFrequency, boolean disableCoord) {
|
|
|
- super(highFreqOccur, lowFreqOccur, maxTermFrequency, disableCoord);
|
|
|
- }
|
|
|
+ private final FieldMapper<?> mapper;
|
|
|
|
|
|
- public ExtendedCommonTermsQuery(Occur highFreqOccur, Occur lowFreqOccur, float maxTermFrequency) {
|
|
|
- super(highFreqOccur, lowFreqOccur, maxTermFrequency);
|
|
|
+ public ExtendedCommonTermsQuery(Occur highFreqOccur, Occur lowFreqOccur, float maxTermFrequency, boolean disableCoord, FieldMapper<?> mapper) {
|
|
|
+ super(highFreqOccur, lowFreqOccur, maxTermFrequency, disableCoord);
|
|
|
+ this.mapper = mapper;
|
|
|
}
|
|
|
|
|
|
private String lowFreqMinNumShouldMatchSpec;
|
|
@@ -72,4 +83,94 @@ public class ExtendedCommonTermsQuery extends CommonTermsQuery {
|
|
|
public String getLowFreqMinimumNumberShouldMatchSpec() {
|
|
|
return lowFreqMinNumShouldMatchSpec;
|
|
|
}
|
|
|
+
|
|
|
+ // LUCENE-UPGRADE: remove this method if on 4.8
|
|
|
+ @Override
|
|
|
+ public Query rewrite(IndexReader reader) throws IOException {
|
|
|
+ if (this.terms.isEmpty()) {
|
|
|
+ return new BooleanQuery();
|
|
|
+ } else if (this.terms.size() == 1) {
|
|
|
+ final Query tq = newTermQuery(this.terms.get(0), null);
|
|
|
+ tq.setBoost(getBoost());
|
|
|
+ return tq;
|
|
|
+ }
|
|
|
+ return super.rewrite(reader);
|
|
|
+ }
|
|
|
+
|
|
|
+ // LUCENE-UPGRADE: remove this method if on 4.8
|
|
|
+ @Override
|
|
|
+ protected Query buildQuery(final int maxDoc,
|
|
|
+ final TermContext[] contextArray, final Term[] queryTerms) {
|
|
|
+ BooleanQuery lowFreq = new BooleanQuery(disableCoord);
|
|
|
+ BooleanQuery highFreq = new BooleanQuery(disableCoord);
|
|
|
+ highFreq.setBoost(highFreqBoost);
|
|
|
+ lowFreq.setBoost(lowFreqBoost);
|
|
|
+ BooleanQuery query = new BooleanQuery(true);
|
|
|
+ for (int i = 0; i < queryTerms.length; i++) {
|
|
|
+ TermContext termContext = contextArray[i];
|
|
|
+ if (termContext == null) {
|
|
|
+ lowFreq.add(newTermQuery(queryTerms[i], null), lowFreqOccur);
|
|
|
+ } else {
|
|
|
+ if ((maxTermFrequency >= 1f && termContext.docFreq() > maxTermFrequency)
|
|
|
+ || (termContext.docFreq() > (int) Math.ceil(maxTermFrequency * (float) maxDoc))) {
|
|
|
+ highFreq.add(newTermQuery(queryTerms[i], termContext), highFreqOccur);
|
|
|
+ } else {
|
|
|
+ lowFreq.add(newTermQuery(queryTerms[i], termContext), lowFreqOccur);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ }
|
|
|
+ final int numLowFreqClauses = lowFreq.clauses().size();
|
|
|
+ final int numHighFreqClauses = highFreq.clauses().size();
|
|
|
+ if (lowFreqOccur == Occur.SHOULD && numLowFreqClauses > 0) {
|
|
|
+ int minMustMatch = calcLowFreqMinimumNumberShouldMatch(numLowFreqClauses);
|
|
|
+ lowFreq.setMinimumNumberShouldMatch(minMustMatch);
|
|
|
+ }
|
|
|
+ if (highFreqOccur == Occur.SHOULD && numHighFreqClauses > 0) {
|
|
|
+ int minMustMatch = calcHighFreqMinimumNumberShouldMatch(numHighFreqClauses);
|
|
|
+ highFreq.setMinimumNumberShouldMatch(minMustMatch);
|
|
|
+ }
|
|
|
+ if (lowFreq.clauses().isEmpty()) {
|
|
|
+ /*
|
|
|
+ * if lowFreq is empty we rewrite the high freq terms in a conjunction to
|
|
|
+ * prevent slow queries.
|
|
|
+ */
|
|
|
+ if (highFreq.getMinimumNumberShouldMatch() == 0 && highFreqOccur != Occur.MUST) {
|
|
|
+ for (BooleanClause booleanClause : highFreq) {
|
|
|
+ booleanClause.setOccur(Occur.MUST);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ highFreq.setBoost(getBoost());
|
|
|
+ return highFreq;
|
|
|
+ } else if (highFreq.clauses().isEmpty()) {
|
|
|
+ // only do low freq terms - we don't have high freq terms
|
|
|
+ lowFreq.setBoost(getBoost());
|
|
|
+ return lowFreq;
|
|
|
+ } else {
|
|
|
+ query.add(highFreq, Occur.SHOULD);
|
|
|
+ query.add(lowFreq, Occur.MUST);
|
|
|
+ query.setBoost(getBoost());
|
|
|
+ return query;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ static {
|
|
|
+ assert Version.LUCENE_47.onOrAfter(Lucene.VERSION) : "Remove obsolete code after upgrade to lucene 4.8";
|
|
|
+ }
|
|
|
+
|
|
|
+ //@Override
|
|
|
+ // LUCENE-UPGRADE: remove this method if on 4.8
|
|
|
+ protected Query newTermQuery(Term term, TermContext context) {
|
|
|
+ if (mapper == null) {
|
|
|
+ // this should be super.newTermQuery(term, context) once it's available in the super class
|
|
|
+ return context == null ? new TermQuery(term) : new TermQuery(term, context);
|
|
|
+ }
|
|
|
+ final Query query = mapper.queryStringTermQuery(term);
|
|
|
+ if (query == null) {
|
|
|
+ // this should be super.newTermQuery(term, context) once it's available in the super class
|
|
|
+ return context == null ? new TermQuery(term) : new TermQuery(term, context);
|
|
|
+ } else {
|
|
|
+ return query;
|
|
|
+ }
|
|
|
+ }
|
|
|
}
|