Преглед изворни кода

Merge pull request #14042 from jpountz/cleanup/Lucene_exists

Cleanup the Lucene utility class.
Adrien Grand пре 10 година
родитељ
комит
8c10856f7a

+ 6 - 1
core/src/main/java/org/elasticsearch/action/exists/TransportExistsAction.java

@@ -174,7 +174,12 @@ public class TransportExistsAction extends TransportBroadcastAction<ExistsReques
             }
             context.preProcess();
             try {
-                boolean exists = Lucene.exists(context, context.query(), Lucene.createExistsCollector());
+                boolean exists;
+                try {
+                    exists = Lucene.exists(context.searcher(), context.query());
+                } finally {
+                    context.clearReleasables(SearchContext.Lifetime.COLLECTION);
+                }
                 return new ShardExistsResponse(request.shardId(), exists);
             } catch (Exception e) {
                 throw new QueryPhaseExecutionException(context, "failed to execute exists", e);

+ 16 - 120
core/src/main/java/org/elasticsearch/common/lucene/Lucene.java

@@ -46,14 +46,11 @@ import org.elasticsearch.common.util.iterable.Iterables;
 import org.elasticsearch.index.analysis.AnalyzerScope;
 import org.elasticsearch.index.analysis.NamedAnalyzer;
 import org.elasticsearch.index.fielddata.IndexFieldData;
-import org.elasticsearch.search.internal.SearchContext;
 
 import java.io.IOException;
 import java.text.ParseException;
 import java.util.*;
 
-import static org.elasticsearch.common.lucene.search.NoopCollector.NOOP_COLLECTOR;
-
 /**
  *
  */
@@ -229,27 +226,6 @@ public class Lucene {
         }.run();
     }
 
-    public static long count(IndexSearcher searcher, Query query) throws IOException {
-        return searcher.count(query);
-    }
-
-    /**
-     * Performs a count on the <code>searcher</code> for <code>query</code>. Terminates
-     * early when the count has reached <code>terminateAfter</code>
-     */
-    public static long count(IndexSearcher searcher, Query query, int terminateAfterCount) throws IOException {
-        EarlyTerminatingCollector countCollector = createCountBasedEarlyTerminatingCollector(terminateAfterCount);
-        countWithEarlyTermination(searcher, query, countCollector);
-        return countCollector.count();
-    }
-
-    /**
-     * Creates count based early termination collector with a threshold of <code>maxCountHits</code>
-     */
-    public final static EarlyTerminatingCollector createCountBasedEarlyTerminatingCollector(int maxCountHits) {
-        return new EarlyTerminatingCollector(maxCountHits);
-    }
-
     /**
      * Wraps <code>delegate</code> with count based early termination collector with a threshold of <code>maxCountHits</code>
      */
@@ -265,99 +241,27 @@ public class Lucene {
     }
 
     /**
-     * Performs an exists (count &gt; 0) query on the <code>searcher</code> for <code>query</code>
-     * with <code>filter</code> using the given <code>collector</code>
-     *
-     * The <code>collector</code> can be instantiated using <code>Lucene.createExistsCollector()</code>
+     * Check whether there is one or more documents matching the provided query.
      */
-    public static boolean exists(IndexSearcher searcher, Query query, Filter filter,
-                                 EarlyTerminatingCollector collector) throws IOException {
-        collector.reset();
-        countWithEarlyTermination(searcher, filter, query, collector);
-        return collector.exists();
-    }
-
-
-    /**
-     * Performs an exists (count &gt; 0) query on the <code>searcher</code> for <code>query</code>
-     * using the given <code>collector</code>
-     *
-     * The <code>collector</code> can be instantiated using <code>Lucene.createExistsCollector()</code>
-     */
-    public static boolean exists(IndexSearcher searcher, Query query, EarlyTerminatingCollector collector) throws IOException {
-        collector.reset();
-        countWithEarlyTermination(searcher, query, collector);
-        return collector.exists();
-    }
-
-    /**
-     * Calls <code>countWithEarlyTermination(searcher, null, query, collector)</code>
-     */
-    public static boolean countWithEarlyTermination(IndexSearcher searcher, Query query,
-                                                  EarlyTerminatingCollector collector) throws IOException {
-        return countWithEarlyTermination(searcher, null, query, collector);
-    }
-
-    /**
-     * Performs a count on <code>query</code> and <code>filter</code> with early termination using <code>searcher</code>.
-     * The early termination threshold is specified by the provided <code>collector</code>
-     */
-    public static boolean countWithEarlyTermination(IndexSearcher searcher, Filter filter, Query query,
-                                                        EarlyTerminatingCollector collector) throws IOException {
-        try {
-            if (filter == null) {
-                searcher.search(query, collector);
-            } else {
-                searcher.search(query, filter, collector);
+    public static boolean exists(IndexSearcher searcher, Query query) throws IOException {
+        final Weight weight = searcher.createNormalizedWeight(query, false);
+        // the scorer API should be more efficient at stopping after the first
+        // match than the bulk scorer API
+        for (LeafReaderContext context : searcher.getIndexReader().leaves()) {
+            final Scorer scorer = weight.scorer(context);
+            if (scorer == null) {
+                continue;
+            }
+            final Bits liveDocs = context.reader().getLiveDocs();
+            for (int doc = scorer.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = scorer.nextDoc()) {
+                if (liveDocs == null || liveDocs.get(doc)) {
+                    return true;
+                }
             }
-        } catch (EarlyTerminationException e) {
-            // early termination
-            return true;
         }
         return false;
     }
 
-    /**
-     * Performs an exists (count &gt; 0) query on the searcher from the <code>searchContext</code> for <code>query</code>
-     * using the given <code>collector</code>
-     *
-     * The <code>collector</code> can be instantiated using <code>Lucene.createExistsCollector()</code>
-     */
-    public static boolean exists(SearchContext searchContext, Query query, EarlyTerminatingCollector collector) throws IOException {
-        collector.reset();
-        try {
-            searchContext.searcher().search(query, collector);
-        } catch (EarlyTerminationException e) {
-            // ignore, just early termination...
-        } finally {
-            searchContext.clearReleasables(SearchContext.Lifetime.COLLECTION);
-        }
-        return collector.exists();
-    }
-
-    /**
-     * Creates an {@link org.elasticsearch.common.lucene.Lucene.EarlyTerminatingCollector}
-     * with a threshold of <code>1</code>
-     */
-    public final static EarlyTerminatingCollector createExistsCollector() {
-        return createCountBasedEarlyTerminatingCollector(1);
-    }
-
-    /**
-     * Closes the index writer, returning <tt>false</tt> if it failed to close.
-     */
-    public static boolean safeClose(IndexWriter writer) {
-        if (writer == null) {
-            return true;
-        }
-        try {
-            writer.close();
-            return true;
-        } catch (Throwable e) {
-            return false;
-        }
-    }
-
     public static TopDocs readTopDocs(StreamInput in) throws IOException {
         if (in.readBoolean()) {
             int totalHits = in.readVInt();
@@ -612,19 +516,11 @@ public class Lucene {
         private int count = 0;
         private LeafCollector leafCollector;
 
-        EarlyTerminatingCollector(int maxCountHits) {
-            this.maxCountHits = maxCountHits;
-            this.delegate = NOOP_COLLECTOR;
-        }
-
         EarlyTerminatingCollector(final Collector delegate, int maxCountHits) {
             this.maxCountHits = maxCountHits;
-            this.delegate = (delegate == null) ? NOOP_COLLECTOR : delegate;
+            this.delegate = Objects.requireNonNull(delegate);
         }
 
-        public void reset() {
-            count = 0;
-        }
         public int count() {
             return count;
         }

+ 0 - 51
core/src/main/java/org/elasticsearch/common/lucene/search/NoopCollector.java

@@ -1,51 +0,0 @@
-/*
- * Licensed to Elasticsearch under one or more contributor
- * license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright
- * ownership. Elasticsearch licenses this file to you under
- * the Apache License, Version 2.0 (the "License"); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.elasticsearch.common.lucene.search;
-
-import org.apache.lucene.index.LeafReaderContext;
-import org.apache.lucene.search.Scorer;
-import org.apache.lucene.search.SimpleCollector;
-
-import java.io.IOException;
-
-/**
- *
- */
-public class NoopCollector extends SimpleCollector {
-
-    public static final NoopCollector NOOP_COLLECTOR = new NoopCollector();
-
-    @Override
-    public void setScorer(Scorer scorer) throws IOException {
-    }
-
-    @Override
-    public void collect(int doc) throws IOException {
-    }
-
-    @Override
-    protected void doSetNextReader(LeafReaderContext context) throws IOException {
-    }
-
-    @Override
-    public boolean needsScores() {
-        return false;
-    }
-}

+ 24 - 23
core/src/main/java/org/elasticsearch/percolator/PercolatorService.java

@@ -25,6 +25,7 @@ import org.apache.lucene.index.ReaderUtil;
 import org.apache.lucene.index.memory.ExtendedMemoryIndex;
 import org.apache.lucene.index.memory.MemoryIndex;
 import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanClause.Occur;
 import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.ConstantScoreQuery;
 import org.apache.lucene.search.MatchAllDocsQuery;
@@ -457,22 +458,22 @@ public class PercolatorService extends AbstractComponent {
         @Override
         public PercolateShardResponse doPercolate(PercolateShardRequest request, PercolateContext context, boolean isNested) {
             long count = 0;
-            Lucene.EarlyTerminatingCollector collector = Lucene.createExistsCollector();
             for (Map.Entry<BytesRef, Query> entry : context.percolateQueries().entrySet()) {
                 try {
+                    Query existsQuery = entry.getValue();
                     if (isNested) {
-                        Lucene.exists(context.docSearcher(), entry.getValue(), Queries.newNonNestedFilter(), collector);
-                    } else {
-                        Lucene.exists(context.docSearcher(), entry.getValue(), collector);
+                        existsQuery = new BooleanQuery.Builder()
+                            .add(existsQuery, Occur.MUST)
+                            .add(Queries.newNonNestedFilter(), Occur.FILTER)
+                            .build();
+                    }
+                    if (Lucene.exists(context.docSearcher(), existsQuery)) {
+                        count ++;
                     }
                 } catch (Throwable e) {
                     logger.debug("[" + entry.getKey() + "] failed to execute query", e);
                     throw new PercolateException(context.indexShard().shardId(), "failed to execute", e);
                 }
-
-                if (collector.exists()) {
-                    count++;
-                }
             }
             return new PercolateShardResponse(count, context, request.shardId());
         }
@@ -552,7 +553,6 @@ public class PercolatorService extends AbstractComponent {
             long count = 0;
             List<BytesRef> matches = new ArrayList<>();
             List<Map<String, HighlightField>> hls = new ArrayList<>();
-            Lucene.EarlyTerminatingCollector collector = Lucene.createExistsCollector();
 
             for (Map.Entry<BytesRef, Query> entry : context.percolateQueries().entrySet()) {
                 if (context.highlight() != null) {
@@ -560,26 +560,27 @@ public class PercolatorService extends AbstractComponent {
                     context.hitContext().cache().clear();
                 }
                 try {
+                    Query existsQuery = entry.getValue();
                     if (isNested) {
-                        Lucene.exists(context.docSearcher(), entry.getValue(), Queries.newNonNestedFilter(), collector);
-                    } else {
-                        Lucene.exists(context.docSearcher(), entry.getValue(), collector);
+                        existsQuery = new BooleanQuery.Builder()
+                            .add(existsQuery, Occur.MUST)
+                            .add(Queries.newNonNestedFilter(), Occur.FILTER)
+                            .build();
+                    }
+                    if (Lucene.exists(context.docSearcher(), existsQuery)) {
+                        if (!context.limit || count < context.size()) {
+                            matches.add(entry.getKey());
+                            if (context.highlight() != null) {
+                                highlightPhase.hitExecute(context, context.hitContext());
+                                hls.add(context.hitContext().hit().getHighlightFields());
+                            }
+                        }
+                        count++;
                     }
                 } catch (Throwable e) {
                     logger.debug("[" + entry.getKey() + "] failed to execute query", e);
                     throw new PercolateException(context.indexShard().shardId(), "failed to execute", e);
                 }
-
-                if (collector.exists()) {
-                    if (!context.limit || count < context.size()) {
-                        matches.add(entry.getKey());
-                        if (context.highlight() != null) {
-                            highlightPhase.hitExecute(context, context.hitContext());
-                            hls.add(context.hitContext().hit().getHighlightFields());
-                        }
-                    }
-                    count++;
-                }
             }
 
             BytesRef[] finalMatches = matches.toArray(new BytesRef[matches.size()]);

+ 34 - 25
core/src/main/java/org/elasticsearch/percolator/QueryCollector.java

@@ -19,8 +19,10 @@
 package org.elasticsearch.percolator;
 
 import com.carrotsearch.hppc.FloatArrayList;
+
 import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.search.*;
+import org.apache.lucene.search.BooleanClause.Occur;
 import org.apache.lucene.util.BytesRef;
 import org.elasticsearch.common.logging.ESLogger;
 import org.elasticsearch.common.lucene.Lucene;
@@ -54,7 +56,6 @@ abstract class QueryCollector extends SimpleCollector {
     final ESLogger logger;
     boolean isNestedDoc = false;
 
-    final Lucene.EarlyTerminatingCollector collector = Lucene.createExistsCollector();
     BytesRef current;
 
     SortedBinaryDocValues values;
@@ -166,6 +167,13 @@ abstract class QueryCollector extends SimpleCollector {
                 // log???
                 return;
             }
+            Query existsQuery = query;
+            if (isNestedDoc) {
+                existsQuery = new BooleanQuery.Builder()
+                    .add(existsQuery, Occur.MUST)
+                    .add(Queries.newNonNestedFilter(), Occur.FILTER)
+                    .build();
+            }
             // run the query
             try {
                 if (context.highlight() != null) {
@@ -173,12 +181,7 @@ abstract class QueryCollector extends SimpleCollector {
                     context.hitContext().cache().clear();
                 }
 
-                if (isNestedDoc) {
-                    Lucene.exists(searcher, query, Queries.newNonNestedFilter(), collector);
-                } else {
-                    Lucene.exists(searcher, query, collector);
-                }
-                if (collector.exists()) {
+                if (Lucene.exists(searcher, existsQuery)) {
                     if (!limit || counter < size) {
                         matches.add(BytesRef.deepCopyOf(current));
                         if (context.highlight() != null) {
@@ -230,14 +233,16 @@ abstract class QueryCollector extends SimpleCollector {
                 // log???
                 return;
             }
+            Query existsQuery = query;
+            if (isNestedDoc) {
+                existsQuery = new BooleanQuery.Builder()
+                    .add(existsQuery, Occur.MUST)
+                    .add(Queries.newNonNestedFilter(), Occur.FILTER)
+                    .build();
+            }
             // run the query
             try {
-                if (isNestedDoc) {
-                    Lucene.exists(searcher, query, Queries.newNonNestedFilter(), collector);
-                } else {
-                    Lucene.exists(searcher, query, collector);
-                }
-                if (collector.exists()) {
+                if (Lucene.exists(searcher, existsQuery)) {
                     topDocsLeafCollector.collect(doc);
                     postMatch(doc);
                 }
@@ -298,18 +303,20 @@ abstract class QueryCollector extends SimpleCollector {
                 // log???
                 return;
             }
+            Query existsQuery = query;
+            if (isNestedDoc) {
+                existsQuery = new BooleanQuery.Builder()
+                    .add(existsQuery, Occur.MUST)
+                    .add(Queries.newNonNestedFilter(), Occur.FILTER)
+                    .build();
+            }
             // run the query
             try {
                 if (context.highlight() != null) {
                     context.parsedQuery(new ParsedQuery(query));
                     context.hitContext().cache().clear();
                 }
-                if (isNestedDoc) {
-                    Lucene.exists(searcher, query, Queries.newNonNestedFilter(), collector);
-                } else {
-                    Lucene.exists(searcher, query, collector);
-                }
-                if (collector.exists()) {
+                if (Lucene.exists(searcher, existsQuery)) {
                     if (!limit || counter < size) {
                         matches.add(BytesRef.deepCopyOf(current));
                         scores.add(scorer.score());
@@ -363,14 +370,16 @@ abstract class QueryCollector extends SimpleCollector {
                 // log???
                 return;
             }
+            Query existsQuery = query;
+            if (isNestedDoc) {
+                existsQuery = new BooleanQuery.Builder()
+                    .add(existsQuery, Occur.MUST)
+                    .add(Queries.newNonNestedFilter(), Occur.FILTER)
+                    .build();
+            }
             // run the query
             try {
-                if (isNestedDoc) {
-                    Lucene.exists(searcher, query, Queries.newNonNestedFilter(), collector);
-                } else {
-                    Lucene.exists(searcher, query, collector);
-                }
-                if (collector.exists()) {
+                if (Lucene.exists(searcher, existsQuery)) {
                     counter++;
                     postMatch(doc);
                 }

+ 1 - 2
core/src/main/java/org/elasticsearch/search/suggest/phrase/PhraseSuggester.java

@@ -104,7 +104,6 @@ public final class PhraseSuggester extends Suggester<PhraseSuggestionContext> {
             response.addTerm(resultEntry);
 
             final BytesRefBuilder byteSpare = new BytesRefBuilder();
-            final EarlyTerminatingCollector collector = Lucene.createExistsCollector();
             final CompiledScript collateScript = suggestion.getCollateQueryScript();
             final boolean collatePrune = (collateScript != null) && suggestion.collatePrune();
             for (int i = 0; i < checkerResult.corrections.length; i++) {
@@ -119,7 +118,7 @@ public final class PhraseSuggester extends Suggester<PhraseSuggestionContext> {
                     final ExecutableScript executable = scriptService.executable(collateScript, vars);
                     final BytesReference querySource = (BytesReference) executable.run();
                     final ParsedQuery parsedQuery = suggestion.getQueryParserService().parse(querySource);
-                    collateMatch = Lucene.exists(searcher, parsedQuery.query(), collector);
+                    collateMatch = Lucene.exists(searcher, parsedQuery.query());
                 }
                 if (!collateMatch && !collatePrune) {
                     continue;

+ 37 - 0
core/src/test/java/org/elasticsearch/common/lucene/LuceneTests.java

@@ -20,10 +20,14 @@ package org.elasticsearch.common.lucene;
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
+import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.document.StringField;
 import org.apache.lucene.document.TextField;
 import org.apache.lucene.index.*;
 import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.MatchAllDocsQuery;
 import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.MockDirectoryWrapper;
 import org.apache.lucene.util.Version;
 import org.elasticsearch.test.ESTestCase;
@@ -322,4 +326,37 @@ public class LuceneTests extends ESTestCase {
         writer.close();
         dir.close();
     }
+
+    public void testCount() throws Exception {
+        Directory dir = newDirectory();
+        RandomIndexWriter w = new RandomIndexWriter(getRandom(), dir);
+
+        try (DirectoryReader reader = w.getReader()) {
+            // match_all does not match anything on an empty index
+            IndexSearcher searcher = newSearcher(reader);
+            assertFalse(Lucene.exists(searcher, new MatchAllDocsQuery()));
+        }
+
+        Document doc = new Document();
+        w.addDocument(doc);
+
+        doc.add(new StringField("foo", "bar", Store.NO));
+        w.addDocument(doc);
+
+        try (DirectoryReader reader = w.getReader()) {
+            IndexSearcher searcher = newSearcher(reader);
+            assertTrue(Lucene.exists(searcher, new MatchAllDocsQuery()));
+            assertFalse(Lucene.exists(searcher, new TermQuery(new Term("baz", "bar"))));
+            assertTrue(Lucene.exists(searcher, new TermQuery(new Term("foo", "bar"))));
+        }
+
+        w.deleteDocuments(new Term("foo", "bar"));
+        try (DirectoryReader reader = w.getReader()) {
+            IndexSearcher searcher = newSearcher(reader);
+            assertFalse(Lucene.exists(searcher, new TermQuery(new Term("foo", "bar"))));
+        }
+
+        w.close();
+        dir.close();
+    }
 }

+ 4 - 4
core/src/test/java/org/elasticsearch/common/lucene/search/MultiPhrasePrefixQueryTests.java

@@ -44,23 +44,23 @@ public class MultiPhrasePrefixQueryTests extends ESTestCase {
 
         MultiPhrasePrefixQuery query = new MultiPhrasePrefixQuery();
         query.add(new Term("field", "aa"));
-        assertThat(Lucene.count(searcher, query), equalTo(1l));
+        assertThat(searcher.count(query), equalTo(1));
 
         query = new MultiPhrasePrefixQuery();
         query.add(new Term("field", "aaa"));
         query.add(new Term("field", "bb"));
-        assertThat(Lucene.count(searcher, query), equalTo(1l));
+        assertThat(searcher.count(query), equalTo(1));
 
         query = new MultiPhrasePrefixQuery();
         query.setSlop(1);
         query.add(new Term("field", "aaa"));
         query.add(new Term("field", "cc"));
-        assertThat(Lucene.count(searcher, query), equalTo(1l));
+        assertThat(searcher.count(query), equalTo(1));
 
         query = new MultiPhrasePrefixQuery();
         query.setSlop(1);
         query.add(new Term("field", "xxx"));
-        assertThat(Lucene.count(searcher, query), equalTo(0l));
+        assertThat(searcher.count(query), equalTo(0));
     }
 
     @Test

+ 1 - 1
core/src/test/java/org/elasticsearch/common/lucene/search/morelikethis/MoreLikeThisQueryTests.java

@@ -65,7 +65,7 @@ public class MoreLikeThisQueryTests extends ESTestCase {
         mltQuery.setLikeText("lucene");
         mltQuery.setMinTermFrequency(1);
         mltQuery.setMinDocFreq(1);
-        long count = Lucene.count(searcher, mltQuery);
+        long count = searcher.count(mltQuery);
         assertThat(count, equalTo(2l));
 
         reader.close();

+ 1 - 2
core/src/test/java/org/elasticsearch/index/engine/EngineSearcherTotalHitsMatcher.java

@@ -20,7 +20,6 @@
 package org.elasticsearch.index.engine;
 
 import org.apache.lucene.search.Query;
-import org.elasticsearch.common.lucene.Lucene;
 import org.elasticsearch.common.lucene.search.Queries;
 import org.hamcrest.Description;
 import org.hamcrest.Matcher;
@@ -46,7 +45,7 @@ public final class EngineSearcherTotalHitsMatcher extends TypeSafeMatcher<Engine
     @Override
     public boolean matchesSafely(Engine.Searcher searcher) {
         try {
-            this.count = (int) Lucene.count(searcher.searcher(), query);
+            this.count = (int) searcher.searcher().count(query);
             return count == totalHits;
         } catch (IOException e) {
             return false;