|
@@ -23,22 +23,19 @@ import org.apache.lucene.analysis.TokenStream;
|
|
|
import org.apache.lucene.index.IndexReader;
|
|
|
import org.apache.lucene.index.MultiFields;
|
|
|
import org.apache.lucene.index.Terms;
|
|
|
+import org.apache.lucene.search.IndexSearcher;
|
|
|
import org.apache.lucene.search.spell.DirectSpellChecker;
|
|
|
import org.apache.lucene.util.BytesRef;
|
|
|
import org.apache.lucene.util.BytesRefBuilder;
|
|
|
import org.apache.lucene.util.CharsRefBuilder;
|
|
|
-import org.elasticsearch.ElasticsearchException;
|
|
|
-import org.elasticsearch.action.search.MultiSearchRequestBuilder;
|
|
|
-import org.elasticsearch.action.search.MultiSearchResponse;
|
|
|
-import org.elasticsearch.action.search.SearchRequestBuilder;
|
|
|
-import org.elasticsearch.action.search.SearchResponse;
|
|
|
-import org.elasticsearch.client.Client;
|
|
|
import org.elasticsearch.common.bytes.BytesReference;
|
|
|
import org.elasticsearch.common.inject.Inject;
|
|
|
+import org.elasticsearch.common.lucene.Lucene;
|
|
|
+import org.elasticsearch.common.lucene.Lucene.EarlyTerminatingCollector;
|
|
|
import org.elasticsearch.common.text.StringText;
|
|
|
import org.elasticsearch.common.text.Text;
|
|
|
import org.elasticsearch.index.query.QueryBuilders;
|
|
|
-import org.elasticsearch.index.query.QueryBuilders;
|
|
|
+import org.elasticsearch.index.query.ParsedQuery;
|
|
|
import org.elasticsearch.script.CompiledScript;
|
|
|
import org.elasticsearch.script.ExecutableScript;
|
|
|
import org.elasticsearch.script.ScriptService;
|
|
@@ -58,12 +55,10 @@ import java.util.Map;
|
|
|
public final class PhraseSuggester extends Suggester<PhraseSuggestionContext> {
|
|
|
private final BytesRef SEPARATOR = new BytesRef(" ");
|
|
|
private static final String SUGGESTION_TEMPLATE_VAR_NAME = "suggestion";
|
|
|
- private final Client client;
|
|
|
private final ScriptService scriptService;
|
|
|
|
|
|
@Inject
|
|
|
- public PhraseSuggester(Client client, ScriptService scriptService) {
|
|
|
- this.client = client;
|
|
|
+ public PhraseSuggester(ScriptService scriptService) {
|
|
|
this.scriptService = scriptService;
|
|
|
}
|
|
|
|
|
@@ -76,11 +71,11 @@ public final class PhraseSuggester extends Suggester<PhraseSuggestionContext> {
|
|
|
* - phonetic filters could be interesting here too for candidate selection
|
|
|
*/
|
|
|
@Override
|
|
|
- public Suggestion<? extends Entry<? extends Option>> innerExecute(String name, PhraseSuggestionContext suggestion,
|
|
|
- IndexReader indexReader, CharsRefBuilder spare) throws IOException {
|
|
|
+ public Suggestion<? extends Entry<? extends Option>> innerExecute(String name, PhraseSuggestionContext suggestion, IndexSearcher searcher,
|
|
|
+ CharsRefBuilder spare) throws IOException {
|
|
|
double realWordErrorLikelihood = suggestion.realworldErrorLikelyhood();
|
|
|
final PhraseSuggestion response = new PhraseSuggestion(name, suggestion.getSize());
|
|
|
-
|
|
|
+ final IndexReader indexReader = searcher.getIndexReader();
|
|
|
List<PhraseSuggestionContext.DirectCandidateGenerator> generators = suggestion.generators();
|
|
|
final int numGenerators = generators.size();
|
|
|
final List<CandidateGenerator> gens = new ArrayList<>(generators.size());
|
|
@@ -103,31 +98,52 @@ public final class PhraseSuggester extends Suggester<PhraseSuggestionContext> {
|
|
|
WordScorer wordScorer = suggestion.model().newScorer(indexReader, suggestTerms, suggestField, realWordErrorLikelihood, separator);
|
|
|
Result checkerResult = checker.getCorrections(stream, new MultiCandidateGeneratorWrapper(suggestion.getShardSize(),
|
|
|
gens.toArray(new CandidateGenerator[gens.size()])), suggestion.maxErrors(),
|
|
|
- suggestion.getShardSize(), indexReader,wordScorer , separator, suggestion.confidence(), suggestion.gramSize());
|
|
|
+ suggestion.getShardSize(), wordScorer, suggestion.confidence(), suggestion.gramSize());
|
|
|
|
|
|
PhraseSuggestion.Entry resultEntry = buildResultEntry(suggestion, spare, checkerResult.cutoffScore);
|
|
|
response.addTerm(resultEntry);
|
|
|
|
|
|
- BytesRefBuilder byteSpare = new BytesRefBuilder();
|
|
|
-
|
|
|
- MultiSearchResponse multiSearchResponse = collate(suggestion, checkerResult, byteSpare, spare);
|
|
|
- final boolean collateEnabled = multiSearchResponse != null;
|
|
|
- final boolean collatePrune = suggestion.collatePrune();
|
|
|
-
|
|
|
+ final BytesRefBuilder byteSpare = new BytesRefBuilder();
|
|
|
+ final EarlyTerminatingCollector collector = Lucene.createExistsCollector();
|
|
|
+ final CompiledScript collateScript;
|
|
|
+ if (suggestion.getCollateQueryScript() != null) {
|
|
|
+ collateScript = suggestion.getCollateQueryScript();
|
|
|
+ } else if (suggestion.getCollateFilterScript() != null) {
|
|
|
+ collateScript = suggestion.getCollateFilterScript();
|
|
|
+ } else {
|
|
|
+ collateScript = null;
|
|
|
+ }
|
|
|
+ final boolean collatePrune = (collateScript != null) && suggestion.collatePrune();
|
|
|
for (int i = 0; i < checkerResult.corrections.length; i++) {
|
|
|
- boolean collateMatch = hasMatchingDocs(multiSearchResponse, i);
|
|
|
+ Correction correction = checkerResult.corrections[i];
|
|
|
+ spare.copyUTF8Bytes(correction.join(SEPARATOR, byteSpare, null, null));
|
|
|
+ boolean collateMatch = true;
|
|
|
+ if (collateScript != null) {
|
|
|
+ // Checks if the template query collateScript yields any documents
|
|
|
+ // from the index for a correction, collateMatch is updated
|
|
|
+ final Map<String, Object> vars = suggestion.getCollateScriptParams();
|
|
|
+ vars.put(SUGGESTION_TEMPLATE_VAR_NAME, spare.toString());
|
|
|
+ final ExecutableScript executable = scriptService.executable(collateScript, vars);
|
|
|
+ final BytesReference querySource = (BytesReference) executable.run();
|
|
|
+ final ParsedQuery parsedQuery;
|
|
|
+ if (suggestion.getCollateFilterScript() != null) {
|
|
|
+ parsedQuery = suggestion.getQueryParserService().parse(
|
|
|
+ QueryBuilders.constantScoreQuery(QueryBuilders.wrapperQuery(querySource)));
|
|
|
+ } else {
|
|
|
+ parsedQuery = suggestion.getQueryParserService().parse(querySource);
|
|
|
+ }
|
|
|
+ collateMatch = Lucene.exists(searcher, parsedQuery.query(), collector);
|
|
|
+ }
|
|
|
if (!collateMatch && !collatePrune) {
|
|
|
continue;
|
|
|
}
|
|
|
- Correction correction = checkerResult.corrections[i];
|
|
|
- spare.copyUTF8Bytes(correction.join(SEPARATOR, byteSpare, null, null));
|
|
|
Text phrase = new StringText(spare.toString());
|
|
|
Text highlighted = null;
|
|
|
if (suggestion.getPreTag() != null) {
|
|
|
spare.copyUTF8Bytes(correction.join(SEPARATOR, byteSpare, suggestion.getPreTag(), suggestion.getPostTag()));
|
|
|
highlighted = new StringText(spare.toString());
|
|
|
}
|
|
|
- if (collateEnabled && collatePrune) {
|
|
|
+ if (collatePrune) {
|
|
|
resultEntry.addOption(new Suggestion.Entry.Option(phrase, highlighted, (float) (correction.score), collateMatch));
|
|
|
} else {
|
|
|
resultEntry.addOption(new Suggestion.Entry.Option(phrase, highlighted, (float) (correction.score)));
|
|
@@ -144,67 +160,6 @@ public final class PhraseSuggester extends Suggester<PhraseSuggestionContext> {
|
|
|
return new PhraseSuggestion.Entry(new StringText(spare.toString()), 0, spare.length(), cutoffScore);
|
|
|
}
|
|
|
|
|
|
- private MultiSearchResponse collate(PhraseSuggestionContext suggestion, Result checkerResult, BytesRefBuilder byteSpare, CharsRefBuilder spare) throws IOException {
|
|
|
- CompiledScript collateQueryScript = suggestion.getCollateQueryScript();
|
|
|
- CompiledScript collateFilterScript = suggestion.getCollateFilterScript();
|
|
|
- MultiSearchResponse multiSearchResponse = null;
|
|
|
- if (collateQueryScript != null) {
|
|
|
- multiSearchResponse = fetchMatchingDocCountResponses(checkerResult.corrections, collateQueryScript, false, suggestion, byteSpare, spare);
|
|
|
- } else if (collateFilterScript != null) {
|
|
|
- multiSearchResponse = fetchMatchingDocCountResponses(checkerResult.corrections, collateFilterScript, true, suggestion, byteSpare, spare);
|
|
|
- }
|
|
|
- return multiSearchResponse;
|
|
|
- }
|
|
|
-
|
|
|
- private MultiSearchResponse fetchMatchingDocCountResponses(Correction[] corrections, CompiledScript collateScript,
|
|
|
- boolean isFilter, PhraseSuggestionContext suggestions,
|
|
|
- BytesRefBuilder byteSpare, CharsRefBuilder spare) throws IOException {
|
|
|
- Map<String, Object> vars = suggestions.getCollateScriptParams();
|
|
|
- MultiSearchResponse multiSearchResponse = null;
|
|
|
- MultiSearchRequestBuilder multiSearchRequestBuilder = client.prepareMultiSearch();
|
|
|
- boolean requestAdded = false;
|
|
|
- SearchRequestBuilder req;
|
|
|
- for (Correction correction : corrections) {
|
|
|
- spare.copyUTF8Bytes(correction.join(SEPARATOR, byteSpare, null, null));
|
|
|
- vars.put(SUGGESTION_TEMPLATE_VAR_NAME, spare.toString());
|
|
|
- ExecutableScript executable = scriptService.executable(collateScript, vars);
|
|
|
- BytesReference querySource = (BytesReference) executable.run();
|
|
|
- requestAdded = true;
|
|
|
- if (isFilter) {
|
|
|
- req = client.prepareSearch()
|
|
|
- .setPreference(suggestions.getPreference())
|
|
|
- .setQuery(QueryBuilders.constantScoreQuery(QueryBuilders.wrapperQuery(querySource)))
|
|
|
- .setSize(0)
|
|
|
- .setTerminateAfter(1);
|
|
|
- } else {
|
|
|
- req = client.prepareSearch()
|
|
|
- .setPreference(suggestions.getPreference())
|
|
|
- .setQuery(querySource)
|
|
|
- .setSize(0)
|
|
|
- .setTerminateAfter(1);
|
|
|
- }
|
|
|
- multiSearchRequestBuilder.add(req);
|
|
|
- }
|
|
|
- if (requestAdded) {
|
|
|
- multiSearchResponse = multiSearchRequestBuilder.get();
|
|
|
- }
|
|
|
-
|
|
|
- return multiSearchResponse;
|
|
|
- }
|
|
|
-
|
|
|
- private static boolean hasMatchingDocs(MultiSearchResponse multiSearchResponse, int index) {
|
|
|
- if (multiSearchResponse == null) {
|
|
|
- return true;
|
|
|
- }
|
|
|
- MultiSearchResponse.Item item = multiSearchResponse.getResponses()[index];
|
|
|
- if (!item.isFailure()) {
|
|
|
- SearchResponse resp = item.getResponse();
|
|
|
- return resp.getHits().totalHits() > 0;
|
|
|
- } else {
|
|
|
- throw new ElasticsearchException("Collate request failed: " + item.getFailureMessage());
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
ScriptService scriptService() {
|
|
|
return scriptService;
|
|
|
}
|