Browse Source

Upgrade to Lucene 4.8

Closes #5932
Robert Muir 11 years ago
parent
commit
8e0a479316
79 changed files with 768 additions and 794 deletions
  1. 0 6
      core-signatures.txt
  2. 7 15
      docs/reference/analysis/tokenfilters/hunspell-tokenfilter.asciidoc
  3. 1 2
      pom.xml
  4. 0 56
      src/main/java/org/apache/lucene/analysis/miscellaneous/TruncateTokenFilter.java
  5. 1 1
      src/main/java/org/apache/lucene/index/TrackingSerialMergeScheduler.java
  6. 3 85
      src/main/java/org/apache/lucene/queries/ExtendedCommonTermsQuery.java
  7. 12 12
      src/main/java/org/apache/lucene/search/suggest/analyzing/XAnalyzingSuggester.java
  8. 2 2
      src/main/java/org/elasticsearch/Version.java
  9. 27 49
      src/main/java/org/elasticsearch/common/io/FileSystemUtils.java
  10. 2 1
      src/main/java/org/elasticsearch/common/lucene/Directories.java
  11. 4 1
      src/main/java/org/elasticsearch/common/lucene/Lucene.java
  12. 1 24
      src/main/java/org/elasticsearch/common/lucene/SegmentReaderUtils.java
  13. 2 3
      src/main/java/org/elasticsearch/common/lucene/all/AllTermQuery.java
  14. 1 1
      src/main/java/org/elasticsearch/common/lucene/search/MatchNoDocsQuery.java
  15. 5 5
      src/main/java/org/elasticsearch/common/lucene/search/XFilteredQuery.java
  16. 2 2
      src/main/java/org/elasticsearch/common/lucene/search/function/FiltersFunctionScoreQuery.java
  17. 2 2
      src/main/java/org/elasticsearch/common/lucene/search/function/FunctionScoreQuery.java
  18. 0 97
      src/main/java/org/elasticsearch/common/lucene/store/ChecksumIndexOutput.java
  19. 55 5
      src/main/java/org/elasticsearch/common/util/CollectionUtils.java
  20. 24 1
      src/main/java/org/elasticsearch/env/NodeEnvironment.java
  21. 10 14
      src/main/java/org/elasticsearch/index/analysis/HunspellTokenFilterFactory.java
  22. 13 4
      src/main/java/org/elasticsearch/index/analysis/WordDelimiterTokenFilterFactory.java
  23. 19 6
      src/main/java/org/elasticsearch/index/codec/postingsformat/BloomFilterPostingsFormat.java
  24. 4 5
      src/main/java/org/elasticsearch/index/engine/internal/InternalEngine.java
  25. 2 1
      src/main/java/org/elasticsearch/index/fielddata/plain/BinaryDVAtomicFieldData.java
  26. 2 1
      src/main/java/org/elasticsearch/index/fielddata/plain/BinaryDVNumericAtomicFieldData.java
  27. 2 1
      src/main/java/org/elasticsearch/index/fielddata/plain/BytesBinaryDVAtomicFieldData.java
  28. 2 1
      src/main/java/org/elasticsearch/index/fielddata/plain/GeoPointBinaryDVAtomicFieldData.java
  29. 2 1
      src/main/java/org/elasticsearch/index/fielddata/plain/NumericDVAtomicFieldData.java
  30. 2 4
      src/main/java/org/elasticsearch/index/fielddata/plain/SortedSetDVAtomicFieldData.java
  31. 5 26
      src/main/java/org/elasticsearch/index/merge/EnableMergeScheduler.java
  32. 0 107
      src/main/java/org/elasticsearch/index/merge/Merges.java
  33. 2 2
      src/main/java/org/elasticsearch/index/merge/scheduler/SerialMergeSchedulerProvider.java
  34. 1 1
      src/main/java/org/elasticsearch/index/search/child/ChildrenConstantScoreQuery.java
  35. 1 1
      src/main/java/org/elasticsearch/index/search/child/ChildrenQuery.java
  36. 1 1
      src/main/java/org/elasticsearch/index/search/child/CustomQueryWrappingFilter.java
  37. 1 1
      src/main/java/org/elasticsearch/index/search/child/DeleteByQueryWrappingFilter.java
  38. 1 1
      src/main/java/org/elasticsearch/index/search/child/ParentConstantScoreQuery.java
  39. 1 1
      src/main/java/org/elasticsearch/index/search/child/ParentQuery.java
  40. 1 1
      src/main/java/org/elasticsearch/index/search/child/TopChildrenQuery.java
  41. 2 2
      src/main/java/org/elasticsearch/index/search/nested/IncludeNestedDocsQuery.java
  42. 14 5
      src/main/java/org/elasticsearch/index/store/Store.java
  43. 13 23
      src/main/java/org/elasticsearch/indices/analysis/HunspellService.java
  44. 5 5
      src/main/java/org/elasticsearch/indices/analysis/IndicesAnalysisModule.java
  45. 15 6
      src/main/java/org/elasticsearch/indices/analysis/PreBuiltTokenFilters.java
  46. 1 1
      src/main/java/org/elasticsearch/indices/recovery/RecoveryTarget.java
  47. 0 1
      src/main/java/org/elasticsearch/monitor/dump/AbstractDump.java
  48. 1 2
      src/main/java/org/elasticsearch/monitor/dump/SimpleDump.java
  49. 1 7
      src/main/java/org/elasticsearch/monitor/dump/SimpleDumpGenerator.java
  50. 3 2
      src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java
  51. 20 52
      src/main/java/org/elasticsearch/search/aggregations/support/ValuesSource.java
  52. 7 6
      src/main/java/org/elasticsearch/search/facet/terms/strings/HashedAggregator.java
  53. 1 1
      src/main/java/org/elasticsearch/search/scan/ScanContext.java
  54. 18 8
      src/main/java/org/elasticsearch/search/suggest/completion/AnalyzingCompletionLookupProvider.java
  55. 13 3
      src/main/java/org/elasticsearch/search/suggest/completion/Completion090PostingsFormat.java
  56. 6 3
      src/main/java/org/elasticsearch/search/suggest/completion/CompletionTokenStream.java
  57. 2 3
      src/test/java/org/apache/lucene/TrackingSerialMergeSchedulerTests.java
  58. 4 4
      src/test/java/org/apache/lucene/queries/BlendedTermQueryTest.java
  59. 1 2
      src/test/java/org/elasticsearch/benchmark/fielddata/LongFieldDataBenchmark.java
  60. 1 2
      src/test/java/org/elasticsearch/common/lucene/uid/VersionsTests.java
  61. 5 5
      src/test/java/org/elasticsearch/common/util/BytesRefHashTests.java
  62. 70 3
      src/test/java/org/elasticsearch/common/util/CollectionUtilsTests.java
  63. 220 0
      src/test/java/org/elasticsearch/index/analysis/AnalysisFactoryTests.java
  64. 0 43
      src/test/java/org/elasticsearch/index/analysis/HunspellTokenFilterFactoryTests.java
  65. 32 0
      src/test/java/org/elasticsearch/index/analysis/WordDelimiterTokenFilterFactoryTests.java
  66. 1 2
      src/test/java/org/elasticsearch/index/codec/postingformat/DefaultPostingsFormatTests.java
  67. 2 2
      src/test/java/org/elasticsearch/index/codec/postingformat/ElasticsearchPostingsFormatTest.java
  68. 5 10
      src/test/java/org/elasticsearch/index/engine/internal/InternalEngineTests.java
  69. 5 5
      src/test/java/org/elasticsearch/index/fielddata/AbstractStringFieldDataTests.java
  70. 3 4
      src/test/java/org/elasticsearch/index/fielddata/FilterFieldDataTest.java
  71. 1 2
      src/test/java/org/elasticsearch/index/fielddata/LongFieldDataTests.java
  72. 17 16
      src/test/java/org/elasticsearch/indices/analyze/HunspellServiceTests.java
  73. 1 1
      src/test/java/org/elasticsearch/indices/leaks/IndicesLeaksTests.java
  74. 29 2
      src/test/java/org/elasticsearch/search/aggregations/support/FieldDataSourceTests.java
  75. 0 5
      src/test/java/org/elasticsearch/search/child/SimpleChildQuerySearchTests.java
  76. 1 1
      src/test/java/org/elasticsearch/search/highlight/HighlighterSearchTests.java
  77. 2 2
      src/test/java/org/elasticsearch/search/sort/SimpleSortTests.java
  78. 22 2
      src/test/java/org/elasticsearch/search/suggest/completion/CompletionPostingsFormatTest.java
  79. 1 1
      src/test/java/org/elasticsearch/test/TestCluster.java

+ 0 - 6
core-signatures.txt

@@ -25,12 +25,6 @@ org.apache.lucene.index.IndexReader#decRef()
 org.apache.lucene.index.IndexReader#incRef()
 org.apache.lucene.index.IndexReader#tryIncRef()
 
-org.apache.lucene.index.IndexWriter#maybeMerge() @ use Merges#maybeMerge
-org.apache.lucene.index.IndexWriter#forceMerge(int) @ use Merges#forceMerge
-org.apache.lucene.index.IndexWriter#forceMerge(int,boolean) @ use Merges#forceMerge
-org.apache.lucene.index.IndexWriter#forceMergeDeletes() @ use Merges#forceMergeDeletes
-org.apache.lucene.index.IndexWriter#forceMergeDeletes(boolean) @ use Merges#forceMergeDeletes
-
 @defaultMessage QueryWrapperFilter is cachable by default - use Queries#wrap instead
 org.apache.lucene.search.QueryWrapperFilter#<init>(org.apache.lucene.search.Query)
 

+ 7 - 15
docs/reference/analysis/tokenfilters/hunspell-tokenfilter.asciidoc

@@ -23,26 +23,19 @@ The location of the hunspell directory can be configured using the
 `indices.analysis.hunspell.dictionary.location` settings in
 _elasticsearch.yml_.
 
-Each dictionary can be configured with two settings:
+Each dictionary can be configured with one setting:
 
 `ignore_case`:: 
     If true, dictionary matching will be case insensitive
     (defaults to `false`)
 
-`strict_affix_parsing`::
-    Determines whether errors while reading a
-    affix rules file will cause exception or simple be ignored (defaults to
-    `true`)
+This setting can be configured globally in `elasticsearch.yml` using
 
-These settings can be configured globally in `elasticsearch.yml` using
-
-* `indices.analysis.hunspell.dictionary.ignore_case` and
-* `indices.analysis.hunspell.dictionary.strict_affix_parsing`
+* `indices.analysis.hunspell.dictionary.ignore_case`
 
 or for specific dictionaries:
 
-* `indices.analysis.hunspell.dictionary.en_US.ignore_case` and
-* `indices.analysis.hunspell.dictionary.en_US.strict_affix_parsing`.
+* `indices.analysis.hunspell.dictionary.en_US.ignore_case`.
 
 It is also possible to add `settings.yml` file under the dictionary
 directory which holds these settings (this will override any other
@@ -87,10 +80,9 @@ The hunspell token filter accepts four options:
     If only unique terms should be returned, this needs to be
     set to `true`. Defaults to `true`.
 
-`recursion_level`:: 
-    Configures the recursion level a
-    stemmer can go into. Defaults to `2`. Some languages (for example czech)
-    give better results when set to `1` or `0`, so you should test it out.
+`longest_only`:: 
+    If only the longest term should be returned, set this to `true`.
+    Defaults to `false`: all possible stems are returned.
 
 NOTE: As opposed to the snowball stemmers (which are algorithm based)
 this is a dictionary lookup based stemmer and therefore the quality of

+ 1 - 2
pom.xml

@@ -31,7 +31,7 @@
     </parent>
 
     <properties>
-        <lucene.version>4.7.2</lucene.version>
+        <lucene.version>4.8.0</lucene.version>
         <tests.jvms>auto</tests.jvms>
         <tests.shuffle>true</tests.shuffle>
         <tests.output>onerror</tests.output>
@@ -1059,7 +1059,6 @@
                                 <exclude>org/elasticsearch/plugins/PluginManager.class</exclude>
                                 <exclude>org/elasticsearch/bootstrap/Bootstrap.class</exclude>
                                 <exclude>org/elasticsearch/Version.class</exclude>
-                                <exclude>org/elasticsearch/index/merge/Merges.class</exclude>
                                 <exclude>org/elasticsearch/common/lucene/search/Queries$QueryWrapperFilterFactory.class</exclude>
                                 <!-- end excludes for valid system-out -->
                                 <!-- start excludes for Unsafe -->

+ 0 - 56
src/main/java/org/apache/lucene/analysis/miscellaneous/TruncateTokenFilter.java

@@ -1,56 +0,0 @@
-/*
- * Licensed to Elasticsearch under one or more contributor
- * license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright
- * ownership. Elasticsearch licenses this file to you under
- * the Apache License, Version 2.0 (the "License"); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.lucene.analysis.miscellaneous;
-
-import org.apache.lucene.analysis.TokenFilter;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-
-import java.io.IOException;
-
-/**
- * A token filter that truncates tokens.
- */
-public class TruncateTokenFilter extends TokenFilter {
-
-    private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class);
-
-    private final int size;
-
-    public TruncateTokenFilter(TokenStream in, int size) {
-        super(in);
-        this.size = size;
-    }
-
-    @Override
-    public final boolean incrementToken() throws IOException {
-        if (input.incrementToken()) {
-            final int length = termAttribute.length();
-            if (length > size) {
-                termAttribute.setLength(size);
-            }
-            return true;
-        } else {
-            return false;
-        }
-    }
-}
-
-

+ 1 - 1
src/main/java/org/apache/lucene/index/TrackingSerialMergeScheduler.java

@@ -91,7 +91,7 @@ public class TrackingSerialMergeScheduler extends MergeScheduler {
      * multiple threads, only one merge may run at a time.
      */
     @Override
-    synchronized public void merge(IndexWriter writer) throws CorruptIndexException, IOException {
+    synchronized public void merge(IndexWriter writer, MergeTrigger trigger, boolean newMergesFound) throws CorruptIndexException, IOException {
         int cycle = 0;
         while (cycle++ < maxMergeAtOnce) {
             MergePolicy.OneMerge merge = writer.getNextMerge();

+ 3 - 85
src/main/java/org/apache/lucene/queries/ExtendedCommonTermsQuery.java

@@ -22,13 +22,8 @@ package org.apache.lucene.queries;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.TermContext;
-import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.*;
 import org.apache.lucene.search.BooleanClause.Occur;
-import org.apache.lucene.search.BooleanQuery;
-import org.apache.lucene.search.Query;
-import org.apache.lucene.search.TermQuery;
-import org.apache.lucene.util.Version;
-import org.elasticsearch.common.lucene.Lucene;
 import org.elasticsearch.common.lucene.search.Queries;
 import org.elasticsearch.index.mapper.FieldMapper;
 
@@ -84,91 +79,14 @@ public class ExtendedCommonTermsQuery extends CommonTermsQuery {
         return lowFreqMinNumShouldMatchSpec;
     }
 
-    // LUCENE-UPGRADE: remove this method if on 4.8
     @Override
-    public Query rewrite(IndexReader reader) throws IOException {
-        if (this.terms.isEmpty()) {
-            return new BooleanQuery();
-        } else if (this.terms.size() == 1) {
-            final Query tq = newTermQuery(this.terms.get(0), null);
-            tq.setBoost(getBoost());
-            return tq;
-        }
-        return super.rewrite(reader);
-    }
-
-    // LUCENE-UPGRADE: remove this method if on 4.8
-    @Override
-    protected Query buildQuery(final int maxDoc,
-                               final TermContext[] contextArray, final Term[] queryTerms) {
-        BooleanQuery lowFreq = new BooleanQuery(disableCoord);
-        BooleanQuery highFreq = new BooleanQuery(disableCoord);
-        highFreq.setBoost(highFreqBoost);
-        lowFreq.setBoost(lowFreqBoost);
-        BooleanQuery query = new BooleanQuery(true);
-        for (int i = 0; i < queryTerms.length; i++) {
-            TermContext termContext = contextArray[i];
-            if (termContext == null) {
-                lowFreq.add(newTermQuery(queryTerms[i], null), lowFreqOccur);
-            } else {
-                if ((maxTermFrequency >= 1f && termContext.docFreq() > maxTermFrequency)
-                        || (termContext.docFreq() > (int) Math.ceil(maxTermFrequency * (float) maxDoc))) {
-                    highFreq.add(newTermQuery(queryTerms[i], termContext), highFreqOccur);
-                } else {
-                    lowFreq.add(newTermQuery(queryTerms[i], termContext), lowFreqOccur);
-                }
-            }
-
-        }
-        final int numLowFreqClauses = lowFreq.clauses().size();
-        final int numHighFreqClauses = highFreq.clauses().size();
-        if (lowFreqOccur == Occur.SHOULD && numLowFreqClauses > 0) {
-            int minMustMatch = calcLowFreqMinimumNumberShouldMatch(numLowFreqClauses);
-            lowFreq.setMinimumNumberShouldMatch(minMustMatch);
-        }
-        if (highFreqOccur == Occur.SHOULD && numHighFreqClauses > 0) {
-            int minMustMatch = calcHighFreqMinimumNumberShouldMatch(numHighFreqClauses);
-            highFreq.setMinimumNumberShouldMatch(minMustMatch);
-        }
-        if (lowFreq.clauses().isEmpty()) {
-          /*
-           * if lowFreq is empty we rewrite the high freq terms in a conjunction to
-           * prevent slow queries.
-           */
-            if (highFreq.getMinimumNumberShouldMatch() == 0 && highFreqOccur != Occur.MUST) {
-                for (BooleanClause booleanClause : highFreq) {
-                    booleanClause.setOccur(Occur.MUST);
-                }
-            }
-            highFreq.setBoost(getBoost());
-            return highFreq;
-        } else if (highFreq.clauses().isEmpty()) {
-            // only do low freq terms - we don't have high freq terms
-            lowFreq.setBoost(getBoost());
-            return lowFreq;
-        } else {
-            query.add(highFreq, Occur.SHOULD);
-            query.add(lowFreq, Occur.MUST);
-            query.setBoost(getBoost());
-            return query;
-        }
-    }
-
-    static {
-        assert Version.LUCENE_47.onOrAfter(Lucene.VERSION) : "Remove obsolete code after upgrade to lucene 4.8";
-    }
-
-    //@Override
-    // LUCENE-UPGRADE: remove this method if on 4.8
     protected Query newTermQuery(Term term, TermContext context) {
         if (mapper == null) {
-            // this should be super.newTermQuery(term, context) once it's available in the super class
-            return context == null ? new TermQuery(term) : new TermQuery(term, context);
+            return super.newTermQuery(term, context);
         }
         final Query query = mapper.queryStringTermQuery(term);
         if (query == null) {
-            // this should be super.newTermQuery(term, context) once it's available in the super class
-            return context == null ? new TermQuery(term) : new TermQuery(term, context);
+            return super.newTermQuery(term, context);
         } else {
             return query;
         }

+ 12 - 12
src/main/java/org/apache/lucene/search/suggest/analyzing/XAnalyzingSuggester.java

@@ -24,14 +24,14 @@ import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.TokenStreamToAutomaton;
 import org.apache.lucene.search.suggest.InputIterator;
 import org.apache.lucene.search.suggest.Lookup;
-import org.apache.lucene.search.suggest.Sort;
 import org.apache.lucene.store.*;
 import org.apache.lucene.util.*;
 import org.apache.lucene.util.automaton.*;
 import org.apache.lucene.util.fst.*;
 import org.apache.lucene.util.fst.FST.BytesReader;
 import org.apache.lucene.util.fst.PairOutputs.Pair;
-import org.apache.lucene.util.fst.Util.MinResult;
+import org.apache.lucene.util.fst.Util.Result;
+import org.apache.lucene.util.fst.Util.TopResults;
 import org.elasticsearch.common.collect.HppcMaps;
 
 import java.io.File;
@@ -419,14 +419,14 @@ public class XAnalyzingSuggester extends Lookup {
   @Override
   public void build(InputIterator iterator) throws IOException {
     String prefix = getClass().getSimpleName();
-    File directory = Sort.defaultTempDir();
+    File directory = OfflineSorter.defaultTempDir();
     File tempInput = File.createTempFile(prefix, ".input", directory);
     File tempSorted = File.createTempFile(prefix, ".sorted", directory);
 
     hasPayloads = iterator.hasPayloads();
 
-    Sort.ByteSequencesWriter writer = new Sort.ByteSequencesWriter(tempInput);
-    Sort.ByteSequencesReader reader = null;
+    OfflineSorter.ByteSequencesWriter writer = new OfflineSorter.ByteSequencesWriter(tempInput);
+    OfflineSorter.ByteSequencesReader reader = null;
     BytesRef scratch = new BytesRef();
 
     TokenStreamToAutomaton ts2a = getTokenStreamToAutomaton();
@@ -502,12 +502,12 @@ public class XAnalyzingSuggester extends Lookup {
       writer.close();
 
       // Sort all input/output pairs (required by FST.Builder):
-      new Sort(new AnalyzingComparator(hasPayloads)).sort(tempInput, tempSorted);
+      new OfflineSorter(new AnalyzingComparator(hasPayloads)).sort(tempInput, tempSorted);
 
       // Free disk space:
       tempInput.delete();
 
-      reader = new Sort.ByteSequencesReader(tempSorted);
+      reader = new OfflineSorter.ByteSequencesReader(tempSorted);
      
       PairOutputs<Long,BytesRef> outputs = new PairOutputs<>(PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton());
       Builder<Pair<Long,BytesRef>> builder = new Builder<>(FST.INPUT_TYPE.BYTE1, outputs);
@@ -692,7 +692,7 @@ public class XAnalyzingSuggester extends Lookup {
   }
 
   @Override
-  public List<LookupResult> lookup(final CharSequence key, boolean onlyMorePopular, int num) {
+  public List<LookupResult> lookup(final CharSequence key, Set<BytesRef> contexts, boolean onlyMorePopular, int num) {
     assert num > 0;
 
     if (onlyMorePopular) {
@@ -763,7 +763,7 @@ public class XAnalyzingSuggester extends Lookup {
           }
         }
 
-        MinResult<Pair<Long,BytesRef>> completions[] = searcher.search();
+        Util.TopResults<Pair<Long,BytesRef>> completions = searcher.search();
 
         // NOTE: this is rather inefficient: we enumerate
         // every matching "exactly the same analyzed form"
@@ -777,7 +777,7 @@ public class XAnalyzingSuggester extends Lookup {
         // seach: it's bounded by how many prefix start
         // nodes we have and the
         // maxSurfaceFormsPerAnalyzedForm:
-        for(MinResult<Pair<Long,BytesRef>> completion : completions) {
+        for(Result<Pair<Long,BytesRef>> completion : completions) {
           BytesRef output2 = completion.output.output2;
           if (sameSurfaceForm(utf8Key, output2)) {
             results.add(getLookupResult(completion.output.output1, output2, spare));
@@ -832,9 +832,9 @@ public class XAnalyzingSuggester extends Lookup {
         searcher.addStartPaths(path.fstNode, path.output, true, path.input);
       }
 
-      MinResult<Pair<Long,BytesRef>> completions[] = searcher.search();
+      TopResults<Pair<Long,BytesRef>> completions = searcher.search();
 
-      for(MinResult<Pair<Long,BytesRef>> completion : completions) {
+      for(Result<Pair<Long,BytesRef>> completion : completions) {
 
         LookupResult result = getLookupResult(completion.output.output1, completion.output.output2, spare);
 

+ 2 - 2
src/main/java/org/elasticsearch/Version.java

@@ -177,9 +177,9 @@ public class Version implements Serializable {
     public static final int V_1_1_2_ID = /*00*/1010299;
     public static final Version V_1_1_2 = new Version(V_1_1_2_ID, false, org.apache.lucene.util.Version.LUCENE_47);
     public static final int V_1_2_0_ID = /*00*/1020099;
-    public static final Version V_1_2_0 = new Version(V_1_2_0_ID, false, org.apache.lucene.util.Version.LUCENE_47);
+    public static final Version V_1_2_0 = new Version(V_1_2_0_ID, false, org.apache.lucene.util.Version.LUCENE_48);
     public static final int V_2_0_0_ID = /*00*/2000099;
-    public static final Version V_2_0_0 = new Version(V_2_0_0_ID, true, org.apache.lucene.util.Version.LUCENE_47);
+    public static final Version V_2_0_0 = new Version(V_2_0_0_ID, true, org.apache.lucene.util.Version.LUCENE_48);
 
     public static final Version CURRENT = V_2_0_0;
 

+ 27 - 49
src/main/java/org/elasticsearch/common/io/FileSystemUtils.java

@@ -19,14 +19,12 @@
 
 package org.elasticsearch.common.io;
 
-import org.apache.lucene.util.Constants;
-import org.apache.lucene.util.ThreadInterruptedException;
-import org.elasticsearch.Version;
+import org.apache.lucene.util.IOUtils;
 import org.elasticsearch.common.logging.ESLogger;
 
-import java.io.*;
-import java.nio.channels.FileChannel;
-import java.nio.file.StandardOpenOption;
+import java.io.File;
+import java.io.FileFilter;
+import java.io.IOException;
 
 /**
  *
@@ -79,6 +77,7 @@ public class FileSystemUtils {
      * the given root files will be deleted as well. Otherwise only their content is deleted.
      */
     public static boolean deleteRecursively(File[] roots, boolean deleteRoots) {
+
         boolean deleted = true;
         for (File root : roots) {
             deleted &= deleteRecursively(root, deleteRoots);
@@ -86,6 +85,27 @@ public class FileSystemUtils {
         return deleted;
     }
 
+    /**
+     * Deletes all subdirectories of the given roots recursively.
+     */
+    public static boolean deleteSubDirectories(File[] roots) {
+
+        boolean deleted = true;
+        for (File root : roots) {
+            if (root.isDirectory()) {
+                File[] files = root.listFiles(new FileFilter() {
+                    @Override
+                    public boolean accept(File pathname) {
+                        return pathname.isDirectory();
+                    }
+                });
+                deleted &= deleteRecursively(files, true);
+            }
+
+        }
+        return deleted;
+    }
+
     /**
      * Deletes the given files recursively including the given roots.
      */
@@ -122,10 +142,6 @@ public class FileSystemUtils {
         return false;
     }
 
-    static {
-        assert Version.CURRENT.luceneVersion == org.apache.lucene.util.Version.LUCENE_47 : "Use IOUtils#fsync instead of syncFile in Lucene 4.8";
-    }
-
     /**
      * Ensure that any writes to the given file is written to the storage device that contains it.
      * @param fileToSync the file to fsync
@@ -133,45 +149,7 @@ public class FileSystemUtils {
      *  because not all file systems and operating systems allow to fsync on a directory)
      */
     public static void syncFile(File fileToSync, boolean isDir) throws IOException {
-        IOException exc = null;
-
-        // If the file is a directory we have to open read-only, for regular files we must open r/w for the fsync to have an effect.
-        // See http://blog.httrack.com/blog/2013/11/15/everything-you-always-wanted-to-know-about-fsync/
-        try (final FileChannel file = FileChannel.open(fileToSync.toPath(), isDir ? StandardOpenOption.READ : StandardOpenOption.WRITE)) {
-            for (int retry = 0; retry < 5; retry++) {
-                try {
-                    file.force(true);
-                    return;
-                } catch (IOException ioe) {
-                    if (exc == null) {
-                        exc = ioe;
-                    }
-                    try {
-                        // Pause 5 msec
-                        Thread.sleep(5L);
-                    } catch (InterruptedException ie) {
-                        ThreadInterruptedException ex = new ThreadInterruptedException(ie);
-                        ex.addSuppressed(exc);
-                        throw ex;
-                    }
-                }
-            }
-        } catch (IOException ioe) {
-            if (exc == null) {
-                exc = ioe;
-            }
-        }
-
-        if (isDir) {
-            assert (Constants.LINUX || Constants.MAC_OS_X) == false :
-                    "On Linux and MacOSX fsyncing a directory should not throw IOException, "+
-                            "we just don't want to rely on that in production (undocumented). Got: " + exc;
-            // Ignore exception if it is a directory
-            return;
-        }
-
-        // Throw original exception
-        throw exc;
+        IOUtils.fsync(fileToSync, isDir);
     }
 
     /**

+ 2 - 1
src/main/java/org/elasticsearch/common/lucene/Directories.java

@@ -23,6 +23,7 @@ import org.apache.lucene.store.Directory;
 
 import java.io.FileNotFoundException;
 import java.io.IOException;
+import java.nio.file.NoSuchFileException;
 
 /**
  * A set of utilities for Lucene {@link Directory}.
@@ -40,7 +41,7 @@ public class Directories {
         for (String file : files) {
             try {
                 estimatedSize += directory.fileLength(file);
-            } catch (FileNotFoundException e) {
+            } catch (NoSuchFileException | FileNotFoundException e) {
                 // ignore, the file is not there no more
             }
         }

+ 4 - 1
src/main/java/org/elasticsearch/common/lucene/Lucene.java

@@ -45,7 +45,7 @@ import java.io.IOException;
  */
 public class Lucene {
 
-    public static final Version VERSION = Version.LUCENE_47;
+    public static final Version VERSION = Version.LUCENE_48;
     public static final Version ANALYZER_VERSION = VERSION;
     public static final Version QUERYPARSER_VERSION = VERSION;
 
@@ -61,6 +61,9 @@ public class Lucene {
         if (version == null) {
             return defaultVersion;
         }
+        if ("4.8".equals(version)) {
+            return VERSION.LUCENE_48;
+        }
         if ("4.7".equals(version)) {
             return VERSION.LUCENE_47;
         }

+ 1 - 24
src/main/java/org/elasticsearch/common/lucene/SegmentReaderUtils.java

@@ -21,30 +21,11 @@ package org.elasticsearch.common.lucene;
 import org.apache.lucene.index.AtomicReader;
 import org.apache.lucene.index.FilterAtomicReader;
 import org.apache.lucene.index.SegmentReader;
-import org.apache.lucene.util.Version;
 import org.elasticsearch.ElasticsearchIllegalStateException;
 import org.elasticsearch.common.Nullable;
 
-import java.lang.reflect.Field;
-
 public class SegmentReaderUtils {
 
-    private static final Field FILTER_ATOMIC_READER_IN;
-
-    static {
-        assert Version.LUCENE_47.onOrAfter(Lucene.VERSION) : "Lucene 4.8 has FilterAtomicReader.unwrap";
-
-        Field in = null;
-        try { // and another one bites the dust...
-            in = FilterAtomicReader.class.getDeclaredField("in");
-            in.setAccessible(true);
-        } catch (NoSuchFieldException e) {
-            assert false : "Failed to get field: " + e.getMessage();
-        }
-        FILTER_ATOMIC_READER_IN = in;
-
-    }
-
     /**
      * Tries to extract a segment reader from the given index reader.
      * If no SegmentReader can be extracted an {@link org.elasticsearch.ElasticsearchIllegalStateException} is thrown.
@@ -80,11 +61,7 @@ public class SegmentReaderUtils {
             return (SegmentReader) reader;
         } else if (reader instanceof FilterAtomicReader) {
             final FilterAtomicReader fReader = (FilterAtomicReader) reader;
-            try {
-                return FILTER_ATOMIC_READER_IN == null ? null :
-                        segmentReader((AtomicReader) FILTER_ATOMIC_READER_IN.get(fReader));
-            } catch (IllegalAccessException e) {
-            }
+            return segmentReader(FilterAtomicReader.unwrap(fReader));
         }
         if (fail) {
             // hard fail - we can't get a SegmentReader

+ 2 - 3
src/main/java/org/elasticsearch/common/lucene/all/AllTermQuery.java

@@ -62,8 +62,7 @@ public class AllTermQuery extends SpanTermQuery {
         }
 
         @Override
-        public AllTermSpanScorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder,
-                boolean topScorer, Bits acceptDocs) throws IOException {
+        public AllTermSpanScorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException {
             if (this.stats == null) {
                 return null;
             }
@@ -147,7 +146,7 @@ public class AllTermQuery extends SpanTermQuery {
         
         @Override
         public Explanation explain(AtomicReaderContext context, int doc) throws IOException{
-            AllTermSpanScorer scorer = scorer(context, true, false, context.reader().getLiveDocs());
+            AllTermSpanScorer scorer = scorer(context, context.reader().getLiveDocs());
             if (scorer != null) {
               int newDoc = scorer.advance(doc);
               if (newDoc == doc) {

+ 1 - 1
src/main/java/org/elasticsearch/common/lucene/search/MatchNoDocsQuery.java

@@ -57,7 +57,7 @@ public final class MatchNoDocsQuery extends Query {
         }
 
         @Override
-        public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder, boolean topScorer, Bits acceptDocs) throws IOException {
+        public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException {
             return null;
         }
 

+ 5 - 5
src/main/java/org/elasticsearch/common/lucene/search/XFilteredQuery.java

@@ -211,14 +211,14 @@ public final class XFilteredQuery extends Query {
         }
 
         @Override
-        public Scorer filteredScorer(AtomicReaderContext context, boolean scoreDocsInOrder, boolean topScorer, Weight weight, DocIdSet docIdSet) throws IOException {
+        public Scorer filteredScorer(AtomicReaderContext context, Weight weight, DocIdSet docIdSet) throws IOException {
             // CHANGE: If threshold is 0, always pass down the accept docs, don't pay the price of calling nextDoc even...
             if (threshold == 0) {
                 final Bits filterAcceptDocs = docIdSet.bits();
                 if (filterAcceptDocs != null) {
-                    return weight.scorer(context, scoreDocsInOrder, topScorer, filterAcceptDocs);
+                    return weight.scorer(context, filterAcceptDocs);
                 } else {
-                    return FilteredQuery.LEAP_FROG_QUERY_FIRST_STRATEGY.filteredScorer(context, scoreDocsInOrder, topScorer, weight, docIdSet);
+                    return FilteredQuery.LEAP_FROG_QUERY_FIRST_STRATEGY.filteredScorer(context, weight, docIdSet);
                 }
             }
 
@@ -226,11 +226,11 @@ public final class XFilteredQuery extends Query {
             if (threshold == -1) {
                 // default  value, don't iterate on only apply filter after query if its not a "fast" docIdSet
                 if (!DocIdSets.isFastIterator(docIdSet)) {
-                    return FilteredQuery.QUERY_FIRST_FILTER_STRATEGY.filteredScorer(context, scoreDocsInOrder, topScorer, weight, docIdSet);
+                    return FilteredQuery.QUERY_FIRST_FILTER_STRATEGY.filteredScorer(context, weight, docIdSet);
                 }
             }
 
-            return super.filteredScorer(context, scoreDocsInOrder, topScorer, weight, docIdSet);
+            return super.filteredScorer(context, weight, docIdSet);
         }
 
         /**

+ 2 - 2
src/main/java/org/elasticsearch/common/lucene/search/function/FiltersFunctionScoreQuery.java

@@ -150,11 +150,11 @@ public class FiltersFunctionScoreQuery extends Query {
         }
 
         @Override
-        public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder, boolean topScorer, Bits acceptDocs) throws IOException {
+        public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException {
             // we ignore scoreDocsInOrder parameter, because we need to score in
             // order if documents are scored with a script. The
             // ShardLookup depends on in order scoring.
-            Scorer subQueryScorer = subQueryWeight.scorer(context, true, false, acceptDocs);
+            Scorer subQueryScorer = subQueryWeight.scorer(context, acceptDocs);
             if (subQueryScorer == null) {
                 return null;
             }

+ 2 - 2
src/main/java/org/elasticsearch/common/lucene/search/function/FunctionScoreQuery.java

@@ -112,11 +112,11 @@ public class FunctionScoreQuery extends Query {
         }
 
         @Override
-        public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder, boolean topScorer, Bits acceptDocs) throws IOException {
+        public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException {
             // we ignore scoreDocsInOrder parameter, because we need to score in
             // order if documents are scored with a script. The
             // ShardLookup depends on in order scoring.
-            Scorer subQueryScorer = subQueryWeight.scorer(context, true, false, acceptDocs);
+            Scorer subQueryScorer = subQueryWeight.scorer(context, acceptDocs);
             if (subQueryScorer == null) {
                 return null;
             }

+ 0 - 97
src/main/java/org/elasticsearch/common/lucene/store/ChecksumIndexOutput.java

@@ -1,97 +0,0 @@
-/*
- * Licensed to Elasticsearch under one or more contributor
- * license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright
- * ownership. Elasticsearch licenses this file to you under
- * the Apache License, Version 2.0 (the "License"); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.elasticsearch.common.lucene.store;
-
-import org.apache.lucene.store.IndexOutput;
-
-import java.io.IOException;
-import java.util.zip.Checksum;
-
-/**
- */
-public class ChecksumIndexOutput extends IndexOutput {
-
-    private final IndexOutput out;
-
-    private final Checksum digest;
-
-    public ChecksumIndexOutput(IndexOutput out, Checksum digest) {
-        this.out = out;
-        this.digest = digest;
-    }
-
-    public Checksum digest() {
-        return digest;
-    }
-
-    @Override
-    public void writeByte(byte b) throws IOException {
-        out.writeByte(b);
-        digest.update(b);
-    }
-
-    @Override
-    public void setLength(long length) throws IOException {
-        out.setLength(length);
-    }
-
-    // don't override copyBytes, since we need to read it and compute it
-//    @Override
-//    public void copyBytes(DataInput input, long numBytes) throws IOException {
-//        super.copyBytes(input, numBytes);
-//    }
-
-
-    @Override
-    public String toString() {
-        return out.toString();
-    }
-
-    @Override
-    public void writeBytes(byte[] b, int offset, int length) throws IOException {
-        out.writeBytes(b, offset, length);
-        digest.update(b, offset, length);
-    }
-
-    @Override
-    public void flush() throws IOException {
-        out.flush();
-    }
-
-    @Override
-    public void close() throws IOException {
-        out.close();
-    }
-
-    @Override
-    public long getFilePointer() {
-        return out.getFilePointer();
-    }
-
-    @Override
-    public void seek(long pos) throws IOException {
-        out.seek(pos);
-    }
-
-    @Override
-    public long length() throws IOException {
-        return out.length();
-    }
-}

+ 55 - 5
src/main/java/org/elasticsearch/common/util/CollectionUtils.java

@@ -23,13 +23,10 @@ import com.carrotsearch.hppc.DoubleArrayList;
 import com.carrotsearch.hppc.FloatArrayList;
 import com.carrotsearch.hppc.LongArrayList;
 import com.carrotsearch.hppc.ObjectArrayList;
-import org.apache.lucene.util.IntroSorter;
+import org.apache.lucene.util.*;
 import org.elasticsearch.common.Preconditions;
 
-import java.util.AbstractList;
-import java.util.Arrays;
-import java.util.List;
-import java.util.RandomAccess;
+import java.util.*;
 
 /** Collections-related utility methods. */
 public enum CollectionUtils {
@@ -307,5 +304,58 @@ public enum CollectionUtils {
         }
 
     };
+    public static void sort(final BytesRefArray bytes, final int[] indices) {
+        sort(new BytesRef(), new BytesRef(), bytes, indices);
+    }
+
+    private static void sort(final BytesRef scratch, final BytesRef scratch1, final BytesRefArray bytes, final int[] indices) {
+
+        final int numValues = bytes.size();
+        assert indices.length >= numValues;
+        if (numValues > 1) {
+            new InPlaceMergeSorter() {
+                final Comparator<BytesRef> comparator = BytesRef.getUTF8SortedAsUnicodeComparator();
+                @Override
+                protected int compare(int i, int j) {
+                    return comparator.compare(bytes.get(scratch, indices[i]), bytes.get(scratch1, indices[j]));
+                }
+
+                @Override
+                protected void swap(int i, int j) {
+                    int value_i = indices[i];
+                    indices[i] = indices[j];
+                    indices[j] = value_i;
+                }
+            }.sort(0, numValues);
+        }
+
+    }
+
+    public static int sortAndDedup(final BytesRefArray bytes, final int[] indices) {
+        final BytesRef scratch = new BytesRef();
+        final BytesRef scratch1 = new BytesRef();
+        final int numValues = bytes.size();
+        assert indices.length >= numValues;
+        if (numValues <= 1) {
+            return numValues;
+        }
+        sort(scratch, scratch1, bytes, indices);
+        int uniqueCount = 1;
+        BytesRef previous = scratch;
+        BytesRef current = scratch1;
+        bytes.get(previous, indices[0]);
+        for (int i = 1; i < numValues; ++i) {
+            bytes.get(current, indices[i]);
+            if (!previous.equals(current)) {
+                indices[uniqueCount++] = indices[i];
+            }
+            BytesRef tmp = previous;
+            previous = current;
+            current = tmp;
+        }
+        return uniqueCount;
+
+    }
+
 
 }

+ 24 - 1
src/main/java/org/elasticsearch/env/NodeEnvironment.java

@@ -38,6 +38,7 @@ import java.io.File;
 import java.io.IOException;
 import java.util.Arrays;
 import java.util.Set;
+import java.util.concurrent.atomic.AtomicBoolean;
 
 /**
  *
@@ -50,6 +51,7 @@ public class NodeEnvironment extends AbstractComponent {
     private final Lock[] locks;
 
     private final int localNodeId;
+    private final AtomicBoolean closed = new AtomicBoolean(false);
 
     @Inject
     public NodeEnvironment(Settings settings, Environment environment) {
@@ -143,6 +145,7 @@ public class NodeEnvironment extends AbstractComponent {
     }
 
     public File[] nodeDataLocations() {
+        assert assertEnvIsLocked();
         if (nodeFiles == null || locks == null) {
             throw new ElasticsearchIllegalStateException("node is not configured to store local location");
         }
@@ -150,10 +153,12 @@ public class NodeEnvironment extends AbstractComponent {
     }
 
     public File[] indicesLocations() {
+        assert assertEnvIsLocked();
         return nodeIndicesLocations;
     }
 
     public File[] indexLocations(Index index) {
+        assert assertEnvIsLocked();
         File[] indexLocations = new File[nodeFiles.length];
         for (int i = 0; i < nodeFiles.length; i++) {
             indexLocations[i] = new File(new File(nodeFiles[i], "indices"), index.name());
@@ -162,6 +167,7 @@ public class NodeEnvironment extends AbstractComponent {
     }
 
     public File[] shardLocations(ShardId shardId) {
+        assert assertEnvIsLocked();
         File[] shardLocations = new File[nodeFiles.length];
         for (int i = 0; i < nodeFiles.length; i++) {
             shardLocations[i] = new File(new File(new File(nodeFiles[i], "indices"), shardId.index().name()), Integer.toString(shardId.id()));
@@ -173,6 +179,7 @@ public class NodeEnvironment extends AbstractComponent {
         if (nodeFiles == null || locks == null) {
             throw new ElasticsearchIllegalStateException("node is not configured to store local location");
         }
+        assert assertEnvIsLocked();
         Set<String> indices = Sets.newHashSet();
         for (File indicesLocation : nodeIndicesLocations) {
             File[] indicesList = indicesLocation.listFiles();
@@ -192,6 +199,7 @@ public class NodeEnvironment extends AbstractComponent {
         if (nodeFiles == null || locks == null) {
             throw new ElasticsearchIllegalStateException("node is not configured to store local location");
         }
+        assert assertEnvIsLocked();
         Set<ShardId> shardIds = Sets.newHashSet();
         for (File indicesLocation : nodeIndicesLocations) {
             File[] indicesList = indicesLocation.listFiles();
@@ -222,7 +230,7 @@ public class NodeEnvironment extends AbstractComponent {
     }
 
     public void close() {
-        if (locks != null) {
+        if (closed.compareAndSet(false, true) && locks != null) {
             for (Lock lock : locks) {
                 try {
                     logger.trace("releasing lock [{}]", lock);
@@ -233,4 +241,19 @@ public class NodeEnvironment extends AbstractComponent {
             }
         }
     }
+
+
+    private boolean assertEnvIsLocked() {
+        if (!closed.get() && locks != null) {
+            for (Lock lock : locks) {
+                try {
+                    assert lock.isLocked() : "Lock: " + lock + "is not locked";
+                } catch (IOException e) {
+                    logger.warn("lock assertion failed", e);
+                    return false;
+                }
+            }
+        }
+        return true;
+    }
 }

+ 10 - 14
src/main/java/org/elasticsearch/index/analysis/HunspellTokenFilterFactory.java

@@ -18,10 +18,8 @@
  */
 package org.elasticsearch.index.analysis;
 
-import java.util.Locale;
-
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.hunspell.HunspellDictionary;
+import org.apache.lucene.analysis.hunspell.Dictionary;
 import org.apache.lucene.analysis.hunspell.HunspellStemFilter;
 import org.elasticsearch.ElasticsearchIllegalArgumentException;
 import org.elasticsearch.common.inject.Inject;
@@ -31,12 +29,14 @@ import org.elasticsearch.index.Index;
 import org.elasticsearch.index.settings.IndexSettings;
 import org.elasticsearch.indices.analysis.HunspellService;
 
+import java.util.Locale;
+
 @AnalysisSettingsRequired
 public class HunspellTokenFilterFactory extends AbstractTokenFilterFactory {
 
-    private final HunspellDictionary dictionary;
+    private final Dictionary dictionary;
     private final boolean dedup;
-    private final int recursionLevel;
+    private final boolean longestOnly;
 
     @Inject
     public HunspellTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings, HunspellService hunspellService) {
@@ -53,24 +53,20 @@ public class HunspellTokenFilterFactory extends AbstractTokenFilterFactory {
         }
 
         dedup = settings.getAsBoolean("dedup", true);
-
-        recursionLevel = settings.getAsInt("recursion_level", 2);
-        if (recursionLevel < 0) {
-            throw new ElasticsearchIllegalArgumentException(String.format(Locale.ROOT, "Negative recursion level not allowed for hunspell [%d]", recursionLevel));
-        }
+        longestOnly = settings.getAsBoolean("longest_only", false);
     }
 
     @Override
     public TokenStream create(TokenStream tokenStream) {
-        return new HunspellStemFilter(tokenStream, dictionary, dedup, recursionLevel);
+        return new HunspellStemFilter(tokenStream, dictionary, dedup, longestOnly);
     }
 
     public boolean dedup() {
         return dedup;
     }
-
-    public int recursionLevel() {
-        return recursionLevel;
+    
+    public boolean longestOnly() {
+        return longestOnly;
     }
 
 }

+ 13 - 4
src/main/java/org/elasticsearch/index/analysis/WordDelimiterTokenFilterFactory.java

@@ -20,9 +20,11 @@
 package org.elasticsearch.index.analysis;
 
 import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.miscellaneous.Lucene47WordDelimiterFilter;
 import org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter;
 import org.apache.lucene.analysis.miscellaneous.WordDelimiterIterator;
 import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.util.Version;
 import org.elasticsearch.common.inject.Inject;
 import org.elasticsearch.common.inject.assistedinject.Assisted;
 import org.elasticsearch.common.lucene.Lucene;
@@ -86,10 +88,17 @@ public class WordDelimiterTokenFilterFactory extends AbstractTokenFilterFactory
 
     @Override
     public TokenStream create(TokenStream tokenStream) {
-        return new WordDelimiterFilter(tokenStream,
-                charTypeTable,
-                flags,
-                protoWords);
+         if (version.onOrAfter(Version.LUCENE_48)) {
+             return new WordDelimiterFilter(version, tokenStream,
+                     charTypeTable,
+                     flags,
+                     protoWords);
+         } else {
+             return new Lucene47WordDelimiterFilter(tokenStream,
+                     charTypeTable,
+                     flags,
+                     protoWords);
+         }
     }
 
     public int getFlag(int flag, Settings settings, String key, boolean defaultValue) {

+ 19 - 6
src/main/java/org/elasticsearch/index/codec/postingsformat/BloomFilterPostingsFormat.java

@@ -21,8 +21,8 @@ package org.elasticsearch.index.codec.postingsformat;
 
 import org.apache.lucene.codecs.*;
 import org.apache.lucene.index.*;
+import org.apache.lucene.store.ChecksumIndexInput;
 import org.apache.lucene.store.IOContext;
-import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.store.IndexOutput;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
@@ -51,6 +51,8 @@ public final class BloomFilterPostingsFormat extends PostingsFormat {
 
     public static final String BLOOM_CODEC_NAME = "XBloomFilter"; // the Lucene one is named BloomFilter
     public static final int BLOOM_CODEC_VERSION = 1;
+    public static final int BLOOM_CODEC_VERSION_CHECKSUM = 2;
+    public static final int BLOOM_CODEC_VERSION_CURRENT = BLOOM_CODEC_VERSION_CHECKSUM;
 
     /**
      * Extension of Bloom Filters file
@@ -116,12 +118,12 @@ public final class BloomFilterPostingsFormat extends PostingsFormat {
 
             String bloomFileName = IndexFileNames.segmentFileName(
                     state.segmentInfo.name, state.segmentSuffix, BLOOM_EXTENSION);
-            IndexInput bloomIn = null;
+            ChecksumIndexInput bloomIn = null;
             boolean success = false;
             try {
-                bloomIn = state.directory.openInput(bloomFileName, state.context);
-                CodecUtil.checkHeader(bloomIn, BLOOM_CODEC_NAME, BLOOM_CODEC_VERSION,
-                        BLOOM_CODEC_VERSION);
+                bloomIn = state.directory.openChecksumInput(bloomFileName, state.context);
+                int version = CodecUtil.checkHeader(bloomIn, BLOOM_CODEC_NAME, BLOOM_CODEC_VERSION,
+                        BLOOM_CODEC_VERSION_CURRENT);
                 // // Load the hash function used in the BloomFilter
                 // hashFunction = HashFunction.forName(bloomIn.readString());
                 // Load the delegate postings format
@@ -146,6 +148,11 @@ public final class BloomFilterPostingsFormat extends PostingsFormat {
                         FieldInfo fieldInfo = state.fieldInfos.fieldInfo(fieldNum);
                         bloomsByFieldName.put(fieldInfo.name, bloom);
                     }
+                    if (version >= BLOOM_CODEC_VERSION_CHECKSUM) {
+                        CodecUtil.checkFooter(bloomIn);
+                    } else {
+                        CodecUtil.checkEOF(bloomIn);
+                    }
                 }
                 IOUtils.close(bloomIn);
                 success = true;
@@ -197,6 +204,11 @@ public final class BloomFilterPostingsFormat extends PostingsFormat {
             }
             return size;
         }
+
+        @Override
+        public void checkIntegrity() throws IOException {
+            delegateFieldsProducer.checkIntegrity();
+        }
     }
 
     public static final class BloomFilteredTerms extends FilterAtomicReader.FilterTerms {
@@ -382,7 +394,7 @@ public final class BloomFilterPostingsFormat extends PostingsFormat {
                 bloomOutput = state.directory
                         .createOutput(bloomFileName, state.context);
                 CodecUtil.writeHeader(bloomOutput, BLOOM_CODEC_NAME,
-                        BLOOM_CODEC_VERSION);
+                        BLOOM_CODEC_VERSION_CURRENT);
                 // remember the name of the postings format we will delegate to
                 bloomOutput.writeString(delegatePostingsFormat.getName());
 
@@ -394,6 +406,7 @@ public final class BloomFilterPostingsFormat extends PostingsFormat {
                     bloomOutput.writeInt(fieldInfo.number);
                     saveAppropriatelySizedBloomFilter(bloomOutput, bloomFilter, fieldInfo);
                 }
+                CodecUtil.writeFooter(bloomOutput);
             } finally {
                 IOUtils.close(bloomOutput);
             }

+ 4 - 5
src/main/java/org/elasticsearch/index/engine/internal/InternalEngine.java

@@ -59,7 +59,6 @@ import org.elasticsearch.index.deletionpolicy.SnapshotIndexCommit;
 import org.elasticsearch.index.engine.*;
 import org.elasticsearch.index.indexing.ShardIndexingService;
 import org.elasticsearch.index.mapper.Uid;
-import org.elasticsearch.index.merge.Merges;
 import org.elasticsearch.index.merge.OnGoingMerge;
 import org.elasticsearch.index.merge.policy.ElasticsearchMergePolicy;
 import org.elasticsearch.index.merge.policy.MergePolicyProvider;
@@ -883,7 +882,7 @@ public class InternalEngine extends AbstractIndexShardComponent implements Engin
         }
         possibleMergeNeeded = false;
         try (InternalLock _ = readLock.acquire()) {
-            Merges.maybeMerge(currentIndexWriter());
+            currentIndexWriter().maybeMerge();
         } catch (Throwable t) {
             maybeFailEngine(t);
             throw new OptimizeFailedEngineException(shardId, t);
@@ -918,12 +917,12 @@ public class InternalEngine extends AbstractIndexShardComponent implements Engin
                     elasticsearchMergePolicy.setForce(true);
                 }
                 if (optimize.onlyExpungeDeletes()) {
-                    Merges.forceMergeDeletes(writer, false);
+                    writer.forceMergeDeletes(false);
                 } else if (optimize.maxNumSegments() <= 0) {
-                    Merges.maybeMerge(writer);
+                    writer.maybeMerge();
                     possibleMergeNeeded = false;
                 } else {
-                    Merges.forceMerge(writer, optimize.maxNumSegments(), false);
+                    writer.forceMerge(optimize.maxNumSegments(), false);
                 }
             } catch (Throwable t) {
                 maybeFailEngine(t);

+ 2 - 1
src/main/java/org/elasticsearch/index/fielddata/plain/BinaryDVAtomicFieldData.java

@@ -21,6 +21,7 @@ package org.elasticsearch.index.fielddata.plain;
 
 import org.apache.lucene.index.AtomicReader;
 import org.apache.lucene.index.BinaryDocValues;
+import org.apache.lucene.index.DocValues;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
 import org.elasticsearch.ElasticsearchIllegalStateException;
@@ -69,7 +70,7 @@ public class BinaryDVAtomicFieldData implements AtomicFieldData<ScriptDocValues.
             final BinaryDocValues v = reader.getBinaryDocValues(field);
             if (v == null) {
                 // segment has no value
-                values = BinaryDocValues.EMPTY;
+                values = DocValues.EMPTY_BINARY;
                 docsWithField = new Bits.MatchNoBits(reader.maxDoc());
             } else {
                 values = v;

+ 2 - 1
src/main/java/org/elasticsearch/index/fielddata/plain/BinaryDVNumericAtomicFieldData.java

@@ -21,6 +21,7 @@ package org.elasticsearch.index.fielddata.plain;
 
 import org.apache.lucene.index.AtomicReader;
 import org.apache.lucene.index.BinaryDocValues;
+import org.apache.lucene.index.DocValues;
 import org.apache.lucene.store.ByteArrayDataInput;
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.BytesRef;
@@ -39,7 +40,7 @@ final class BinaryDVNumericAtomicFieldData extends AbstractAtomicNumericFieldDat
     BinaryDVNumericAtomicFieldData(AtomicReader reader, BinaryDocValues values, NumericType numericType) {
         super(numericType.isFloatingPoint());
         this.reader = reader;
-        this.values = values == null ? BinaryDocValues.EMPTY : values;
+        this.values = values == null ? DocValues.EMPTY_BINARY : values;
         this.numericType = numericType;
     }
 

+ 2 - 1
src/main/java/org/elasticsearch/index/fielddata/plain/BytesBinaryDVAtomicFieldData.java

@@ -21,6 +21,7 @@ package org.elasticsearch.index.fielddata.plain;
 
 import org.apache.lucene.index.AtomicReader;
 import org.apache.lucene.index.BinaryDocValues;
+import org.apache.lucene.index.DocValues;
 import org.apache.lucene.store.ByteArrayDataInput;
 import org.apache.lucene.util.BytesRef;
 import org.elasticsearch.index.fielddata.AtomicFieldData;
@@ -35,7 +36,7 @@ final class BytesBinaryDVAtomicFieldData implements AtomicFieldData<ScriptDocVal
     BytesBinaryDVAtomicFieldData(AtomicReader reader, BinaryDocValues values) {
         super();
         this.reader = reader;
-        this.values = values == null ? BinaryDocValues.EMPTY : values;
+        this.values = values == null ? DocValues.EMPTY_BINARY : values;
     }
 
     @Override

+ 2 - 1
src/main/java/org/elasticsearch/index/fielddata/plain/GeoPointBinaryDVAtomicFieldData.java

@@ -20,6 +20,7 @@
 package org.elasticsearch.index.fielddata.plain;
 
 import org.apache.lucene.index.BinaryDocValues;
+import org.apache.lucene.index.DocValues;
 import org.apache.lucene.util.BytesRef;
 import org.elasticsearch.common.geo.GeoPoint;
 import org.elasticsearch.common.util.ByteUtils;
@@ -33,7 +34,7 @@ final class GeoPointBinaryDVAtomicFieldData extends AtomicGeoPointFieldData<Scri
 
     GeoPointBinaryDVAtomicFieldData(BinaryDocValues values) {
         super();
-        this.values = values == null ? BinaryDocValues.EMPTY : values;
+        this.values = values == null ? DocValues.EMPTY_BINARY : values;
     }
 
     @Override

+ 2 - 1
src/main/java/org/elasticsearch/index/fielddata/plain/NumericDVAtomicFieldData.java

@@ -20,6 +20,7 @@
 package org.elasticsearch.index.fielddata.plain;
 
 import org.apache.lucene.index.AtomicReader;
+import org.apache.lucene.index.DocValues;
 import org.apache.lucene.index.NumericDocValues;
 import org.apache.lucene.util.Bits;
 import org.elasticsearch.ElasticsearchIllegalStateException;
@@ -82,7 +83,7 @@ public class NumericDVAtomicFieldData extends AbstractAtomicNumericFieldData {
             final NumericDocValues v = reader.getNumericDocValues(field);
             if (v == null) {
                 // segment has no value
-                values = NumericDocValues.EMPTY;
+                values = DocValues.EMPTY_NUMERIC;
                 docsWithField = new Bits.MatchNoBits(reader.maxDoc());
             } else {
                 values = v;

+ 2 - 4
src/main/java/org/elasticsearch/index/fielddata/plain/SortedSetDVAtomicFieldData.java

@@ -19,9 +19,7 @@
 
 package org.elasticsearch.index.fielddata.plain;
 
-import org.apache.lucene.index.AtomicReader;
-import org.apache.lucene.index.SortedSetDocValues;
-import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.index.*;
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.BytesRef;
 import org.elasticsearch.ElasticsearchIllegalStateException;
@@ -104,7 +102,7 @@ abstract class SortedSetDVAtomicFieldData {
             if (values == null) {
                 // This field has not been populated
                 assert reader.getFieldInfos().fieldInfo(field) == null;
-                values = SortedSetDocValues.EMPTY;
+                values = DocValues.EMPTY_SORTED_SET;
             }
             return values;
         } catch (IOException e) {

+ 5 - 26
src/main/java/org/elasticsearch/index/merge/EnableMergeScheduler.java

@@ -21,6 +21,7 @@ package org.elasticsearch.index.merge;
 
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.MergeScheduler;
+import org.apache.lucene.index.MergeTrigger;
 
 import java.io.IOException;
 
@@ -31,44 +32,22 @@ import java.io.IOException;
  * <p/>
  * This merge scheduler can be used to get around the fact that even though a merge
  * policy can control that no new merges will be created as a result of a segment flush
- * (during indexing operation for example), the {@link #merge(org.apache.lucene.index.IndexWriter)}
+ * (during indexing operation for example), the {@link #merge(org.apache.lucene.index.IndexWriter, org.apache.lucene.index.MergeTrigger, boolean)}
  * call will still be called, and can result in stalling indexing.
  */
 public class EnableMergeScheduler extends MergeScheduler {
 
     private final MergeScheduler mergeScheduler;
 
-    private final ThreadLocal<Boolean> enabled = new ThreadLocal<Boolean>() {
-        @Override
-        protected Boolean initialValue() {
-            return Boolean.FALSE;
-        }
-    };
-
     public EnableMergeScheduler(MergeScheduler mergeScheduler) {
         this.mergeScheduler = mergeScheduler;
     }
 
-    /**
-     * Enable merges on the current thread.
-     */
-    void enableMerge() {
-        assert !enabled.get();
-        enabled.set(Boolean.TRUE);
-    }
-
-    /**
-     * Disable merges on the current thread.
-     */
-    void disableMerge() {
-        assert enabled.get();
-        enabled.set(Boolean.FALSE);
-    }
 
     @Override
-    public void merge(IndexWriter writer) throws IOException {
-        if (enabled.get()) {
-            mergeScheduler.merge(writer);
+    public void merge(IndexWriter writer, MergeTrigger trigger, boolean newMergesFound) throws IOException {
+        if (trigger == MergeTrigger.EXPLICIT) {
+            mergeScheduler.merge(writer, trigger, newMergesFound);
         }
     }
 

+ 0 - 107
src/main/java/org/elasticsearch/index/merge/Merges.java

@@ -1,107 +0,0 @@
-/*
- * Licensed to Elasticsearch under one or more contributor
- * license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright
- * ownership. Elasticsearch licenses this file to you under
- * the Apache License, Version 2.0 (the "License"); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.elasticsearch.index.merge;
-
-import org.apache.lucene.index.IndexWriter;
-import org.apache.lucene.index.MergeScheduler;
-
-import java.io.IOException;
-
-/**
- * A helper to execute explicit merges of the {@link org.apache.lucene.index.IndexWriter} APIs. It
- * holds additional logic which in case the merge scheduler is an {@link org.elasticsearch.index.merge.EnableMergeScheduler}
- * then merges are explicitly enabled and disabled back at the end.
- * <p/>
- * In our codebase, at least until we can somehow use this logic in Lucene IW itself, we should only use
- * this class to execute explicit merges. The explicit merge calls have been added to the forbidden APIs
- * list to make sure we don't call them unless we use this class.
- */
-public class Merges {
-
-    /**
-     * See {@link org.apache.lucene.index.IndexWriter#maybeMerge()}, with the additional
-     * logic of explicitly enabling merges if the scheduler is {@link org.elasticsearch.index.merge.EnableMergeScheduler}.
-     */
-    public static void maybeMerge(IndexWriter writer) throws IOException {
-        MergeScheduler mergeScheduler = writer.getConfig().getMergeScheduler();
-        if (mergeScheduler instanceof EnableMergeScheduler) {
-            ((EnableMergeScheduler) mergeScheduler).enableMerge();
-            try {
-                writer.maybeMerge();
-            } finally {
-                ((EnableMergeScheduler) mergeScheduler).disableMerge();
-            }
-        } else {
-            writer.maybeMerge();
-        }
-    }
-
-    /**
-     * See {@link org.apache.lucene.index.IndexWriter#forceMerge(int)}, with the additional
-     * logic of explicitly enabling merges if the scheduler is {@link org.elasticsearch.index.merge.EnableMergeScheduler}.
-     */
-    public static void forceMerge(IndexWriter writer, int maxNumSegments) throws IOException {
-        forceMerge(writer, maxNumSegments, true);
-    }
-
-    /**
-     * See {@link org.apache.lucene.index.IndexWriter#forceMerge(int, boolean)}, with the additional
-     * logic of explicitly enabling merges if the scheduler is {@link org.elasticsearch.index.merge.EnableMergeScheduler}.
-     */
-    public static void forceMerge(IndexWriter writer, int maxNumSegments, boolean doWait) throws IOException {
-        MergeScheduler mergeScheduler = writer.getConfig().getMergeScheduler();
-        if (mergeScheduler instanceof EnableMergeScheduler) {
-            ((EnableMergeScheduler) mergeScheduler).enableMerge();
-            try {
-                writer.forceMerge(maxNumSegments, doWait);
-            } finally {
-                ((EnableMergeScheduler) mergeScheduler).disableMerge();
-            }
-        } else {
-            writer.forceMerge(maxNumSegments, doWait);
-        }
-    }
-
-    /**
-     * See {@link org.apache.lucene.index.IndexWriter#forceMergeDeletes()}, with the additional
-     * logic of explicitly enabling merges if the scheduler is {@link org.elasticsearch.index.merge.EnableMergeScheduler}.
-     */
-    public static void forceMergeDeletes(IndexWriter writer) throws IOException {
-        forceMergeDeletes(writer, true);
-    }
-
-    /**
-     * See {@link org.apache.lucene.index.IndexWriter#forceMergeDeletes(boolean)}, with the additional
-     * logic of explicitly enabling merges if the scheduler is {@link org.elasticsearch.index.merge.EnableMergeScheduler}.
-     */
-    public static void forceMergeDeletes(IndexWriter writer, boolean doWait) throws IOException {
-        MergeScheduler mergeScheduler = writer.getConfig().getMergeScheduler();
-        if (mergeScheduler instanceof EnableMergeScheduler) {
-            ((EnableMergeScheduler) mergeScheduler).enableMerge();
-            try {
-                writer.forceMergeDeletes(doWait);
-            } finally {
-                ((EnableMergeScheduler) mergeScheduler).disableMerge();
-            }
-        } else {
-            writer.forceMergeDeletes(doWait);
-        }
-    }
-}

+ 2 - 2
src/main/java/org/elasticsearch/index/merge/scheduler/SerialMergeSchedulerProvider.java

@@ -84,9 +84,9 @@ public class SerialMergeSchedulerProvider extends MergeSchedulerProvider {
         }
 
         @Override
-        public void merge(IndexWriter writer) throws CorruptIndexException, IOException {
+        public void merge(IndexWriter writer, MergeTrigger trigger, boolean newMergesFound) throws CorruptIndexException, IOException {
             try {
-                super.merge(writer);
+                super.merge(writer, trigger, newMergesFound);
             } catch (Throwable e) {
                 logger.warn("failed to merge", e);
                 provider.failedMerge(new MergePolicy.MergeException(e, writer.getDirectory()));

+ 1 - 1
src/main/java/org/elasticsearch/index/search/child/ChildrenConstantScoreQuery.java

@@ -176,7 +176,7 @@ public class ChildrenConstantScoreQuery extends Query {
         }
 
         @Override
-        public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder, boolean topScorer, Bits acceptDocs) throws IOException {
+        public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException {
             if (remaining == 0) {
                 return null;
             }

+ 1 - 1
src/main/java/org/elasticsearch/index/search/child/ChildrenQuery.java

@@ -265,7 +265,7 @@ public class ChildrenQuery extends Query {
         }
 
         @Override
-        public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder, boolean topScorer, Bits acceptDocs) throws IOException {
+        public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException {
             DocIdSet parentsSet = parentFilter.getDocIdSet(context, acceptDocs);
             if (DocIdSets.isEmpty(parentsSet) || remaining == 0) {
                 return null;

+ 1 - 1
src/main/java/org/elasticsearch/index/search/child/CustomQueryWrappingFilter.java

@@ -74,7 +74,7 @@ public class CustomQueryWrappingFilter extends NoCacheFilter implements Releasab
                 final DocIdSet set = DocIdSets.toCacheable(leaf.reader(), new DocIdSet() {
                     @Override
                     public DocIdSetIterator iterator() throws IOException {
-                        return weight.scorer(leaf, true, false, null);
+                        return weight.scorer(leaf, null);
                     }
                     @Override
                     public boolean isCacheable() { return false; }

+ 1 - 1
src/main/java/org/elasticsearch/index/search/child/DeleteByQueryWrappingFilter.java

@@ -91,7 +91,7 @@ public class DeleteByQueryWrappingFilter extends Filter {
         return new DocIdSet() {
             @Override
             public DocIdSetIterator iterator() throws IOException {
-                return weight.scorer(context, true, false, acceptDocs);
+                return weight.scorer(context, acceptDocs);
             }
             @Override
             public boolean isCacheable() { return false; }

+ 1 - 1
src/main/java/org/elasticsearch/index/search/child/ParentConstantScoreQuery.java

@@ -154,7 +154,7 @@ public class ParentConstantScoreQuery extends Query {
         }
 
         @Override
-        public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder, boolean topScorer, Bits acceptDocs) throws IOException {
+        public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException {
             DocIdSet childrenDocIdSet = childrenFilter.getDocIdSet(context, acceptDocs);
             if (DocIdSets.isEmpty(childrenDocIdSet)) {
                 return null;

+ 1 - 1
src/main/java/org/elasticsearch/index/search/child/ParentQuery.java

@@ -245,7 +245,7 @@ public class ParentQuery extends Query {
         }
 
         @Override
-        public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder, boolean topScorer, Bits acceptDocs) throws IOException {
+        public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException {
             DocIdSet childrenDocSet = childrenFilter.getDocIdSet(context, acceptDocs);
             if (DocIdSets.isEmpty(childrenDocSet)) {
                 return null;

+ 1 - 1
src/main/java/org/elasticsearch/index/search/child/TopChildrenQuery.java

@@ -327,7 +327,7 @@ public class TopChildrenQuery extends Query {
         }
 
         @Override
-        public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder, boolean topScorer, Bits acceptDocs) throws IOException {
+        public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException {
             ParentDoc[] readerParentDocs = parentDocs.v().get(context.reader().getCoreCacheKey());
             if (readerParentDocs != null) {
                 if (scoreType == ScoreType.MAX) {

+ 2 - 2
src/main/java/org/elasticsearch/index/search/nested/IncludeNestedDocsQuery.java

@@ -103,8 +103,8 @@ public class IncludeNestedDocsQuery extends Query {
         }
 
         @Override
-        public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder, boolean topScorer, Bits acceptDocs) throws IOException {
-            final Scorer parentScorer = parentWeight.scorer(context, true, false, acceptDocs);
+        public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException {
+            final Scorer parentScorer = parentWeight.scorer(context, acceptDocs);
 
             // no matches
             if (parentScorer == null) {

+ 14 - 5
src/main/java/org/elasticsearch/index/store/Store.java

@@ -32,7 +32,6 @@ import org.elasticsearch.common.compress.Compressor;
 import org.elasticsearch.common.compress.CompressorFactory;
 import org.elasticsearch.common.inject.Inject;
 import org.elasticsearch.common.lucene.Directories;
-import org.elasticsearch.common.lucene.store.ChecksumIndexOutput;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.common.unit.ByteSizeValue;
 import org.elasticsearch.index.CloseableIndexComponent;
@@ -46,6 +45,7 @@ import org.elasticsearch.index.store.support.ForceSyncDirectory;
 import java.io.File;
 import java.io.FileNotFoundException;
 import java.io.IOException;
+import java.nio.file.NoSuchFileException;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.HashMap;
@@ -53,6 +53,7 @@ import java.util.Map;
 import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.zip.Adler32;
+import java.util.zip.Checksum;
 
 /**
  */
@@ -145,7 +146,7 @@ public class Store extends AbstractIndexShardComponent implements CloseableIndex
             } else {
                 try {
                     directory.deleteFile(file);
-                } catch (FileNotFoundException e) {
+                } catch (NoSuchFileException | FileNotFoundException e) {
                     // ignore
                 } catch (IOException e) {
                     lastException = e;
@@ -649,10 +650,13 @@ public class Store extends AbstractIndexShardComponent implements CloseableIndex
             out.close();
             String checksum = null;
             IndexOutput underlying = out;
+            // TODO: cut over to lucene's CRC
+            // *WARNING*: lucene has classes in same o.a.l.store package with very similar names,
+            // but using CRC, not Adler!
             if (underlying instanceof BufferedChecksumIndexOutput) {
-                checksum = Long.toString(((BufferedChecksumIndexOutput) underlying).digest().getValue(), Character.MAX_RADIX);
-            } else if (underlying instanceof ChecksumIndexOutput) {
-                checksum = Long.toString(((ChecksumIndexOutput) underlying).digest().getValue(), Character.MAX_RADIX);
+                Checksum digest = ((BufferedChecksumIndexOutput) underlying).digest();
+                assert digest instanceof Adler32;
+                checksum = Long.toString(digest.getValue(), Character.MAX_RADIX);
             }
             synchronized (mutex) {
                 StoreFileMetaData md = new StoreFileMetaData(name, metaData.directory().fileLength(name), checksum, metaData.directory());
@@ -705,5 +709,10 @@ public class Store extends AbstractIndexShardComponent implements CloseableIndex
         public String toString() {
             return out.toString();
         }
+
+        @Override
+        public long getChecksum() throws IOException {
+            return out.getChecksum();
+        }
     }
 }

+ 13 - 23
src/main/java/org/elasticsearch/indices/analysis/HunspellService.java

@@ -21,23 +21,17 @@ package org.elasticsearch.indices.analysis;
 import com.google.common.cache.CacheBuilder;
 import com.google.common.cache.CacheLoader;
 import com.google.common.cache.LoadingCache;
-import org.apache.lucene.analysis.hunspell.HunspellDictionary;
-import org.apache.lucene.util.Version;
+import org.apache.lucene.analysis.hunspell.Dictionary;
 import org.elasticsearch.ElasticsearchException;
 import org.elasticsearch.common.component.AbstractComponent;
 import org.elasticsearch.common.inject.Inject;
-import org.elasticsearch.common.lucene.Lucene;
 import org.elasticsearch.common.settings.ImmutableSettings;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.env.Environment;
 
 import java.io.*;
 import java.net.MalformedURLException;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.List;
-import java.util.Locale;
-import java.util.Map;
+import java.util.*;
 
 /**
  * Serves as a node level registry for hunspell dictionaries. This services expects all dictionaries to be located under
@@ -73,31 +67,28 @@ public class HunspellService extends AbstractComponent {
     private final static DictionaryFileFilter DIC_FILE_FILTER = new DictionaryFileFilter();
     private final static AffixFileFilter AFFIX_FILE_FILTER = new AffixFileFilter();
 
-    private final LoadingCache<String, HunspellDictionary> dictionaries;
-    private final Map<String, HunspellDictionary> knownDictionaries;
+    private final LoadingCache<String, Dictionary> dictionaries;
+    private final Map<String, Dictionary> knownDictionaries;
 
     private final boolean defaultIgnoreCase;
-    private final boolean defaultStrictAffixParsing;
     private final File hunspellDir;
 
     public HunspellService(final Settings settings, final Environment env) {
-        this(settings, env, Collections.<String, HunspellDictionary>emptyMap());
+        this(settings, env, Collections.<String, Dictionary>emptyMap());
     }
 
     @Inject
-    public HunspellService(final Settings settings, final Environment env, final Map<String, HunspellDictionary> knownDictionaries) {
+    public HunspellService(final Settings settings, final Environment env, final Map<String, Dictionary> knownDictionaries) {
         super(settings);
         this.knownDictionaries = knownDictionaries;
         this.hunspellDir = resolveHunspellDirectory(settings, env);
         this.defaultIgnoreCase = settings.getAsBoolean("indices.analysis.hunspell.dictionary.ignore_case", false);
-        this.defaultStrictAffixParsing = settings.getAsBoolean("indices.analysis.hunspell.dictionary.strict_affix_parsing", false);
-        final Version version = Lucene.parseVersion(settings.get("indices.analysis.hunspell.version"), Lucene.ANALYZER_VERSION, logger);
-        dictionaries = CacheBuilder.newBuilder().build(new CacheLoader<String, HunspellDictionary>() {
+        dictionaries = CacheBuilder.newBuilder().build(new CacheLoader<String, Dictionary>() {
             @Override
-            public HunspellDictionary load(String locale) throws Exception {
-                HunspellDictionary dictionary = knownDictionaries.get(locale);
+            public Dictionary load(String locale) throws Exception {
+                Dictionary dictionary = knownDictionaries.get(locale);
                 if (dictionary == null) {
-                    dictionary = loadDictionary(locale, settings, env, version);
+                    dictionary = loadDictionary(locale, settings, env);
                 }
                 return dictionary;
             }
@@ -110,7 +101,7 @@ public class HunspellService extends AbstractComponent {
      *
      * @param locale The name of the locale
      */
-    public HunspellDictionary getDictionary(String locale) {
+    public Dictionary getDictionary(String locale) {
         return dictionaries.getUnchecked(locale);
     }
 
@@ -147,7 +138,7 @@ public class HunspellService extends AbstractComponent {
      * @return The loaded Hunspell dictionary
      * @throws Exception when loading fails (due to IO errors or malformed dictionary files)
      */
-    private HunspellDictionary loadDictionary(String locale, Settings nodeSettings, Environment env, Version version) throws Exception {
+    private Dictionary loadDictionary(String locale, Settings nodeSettings, Environment env) throws Exception {
         if (logger.isDebugEnabled()) {
             logger.debug("Loading huspell dictionary [{}]...", locale);
         }
@@ -160,7 +151,6 @@ public class HunspellService extends AbstractComponent {
         nodeSettings = loadDictionarySettings(dicDir, nodeSettings.getByPrefix("indices.analysis.hunspell.dictionary." + locale + "."));
 
         boolean ignoreCase = nodeSettings.getAsBoolean("ignore_case", defaultIgnoreCase);
-        boolean strictAffixParsing = nodeSettings.getAsBoolean("strict_affix_parsing", defaultStrictAffixParsing);
 
         File[] affixFiles = dicDir.listFiles(AFFIX_FILE_FILTER);
         if (affixFiles.length != 1) {
@@ -178,7 +168,7 @@ public class HunspellService extends AbstractComponent {
 
             affixStream = new FileInputStream(affixFiles[0]);
 
-            return new HunspellDictionary(affixStream, dicStreams, version, ignoreCase, strictAffixParsing);
+            return new Dictionary(affixStream, dicStreams, ignoreCase);
 
         } catch (Exception e) {
             logger.error("Could not load hunspell dictionary [{}]", e, locale);

+ 5 - 5
src/main/java/org/elasticsearch/indices/analysis/IndicesAnalysisModule.java

@@ -20,7 +20,7 @@
 package org.elasticsearch.indices.analysis;
 
 import com.google.common.collect.Maps;
-import org.apache.lucene.analysis.hunspell.HunspellDictionary;
+import org.apache.lucene.analysis.hunspell.Dictionary;
 import org.elasticsearch.common.inject.AbstractModule;
 import org.elasticsearch.common.inject.multibindings.MapBinder;
 
@@ -28,9 +28,9 @@ import java.util.Map;
 
 public class IndicesAnalysisModule extends AbstractModule {
 
-    private final Map<String, HunspellDictionary> hunspellDictionaries =  Maps.newHashMap();
+    private final Map<String, Dictionary> hunspellDictionaries =  Maps.newHashMap();
 
-    public void addHunspellDictionary(String lang, HunspellDictionary dictionary) {
+    public void addHunspellDictionary(String lang, Dictionary dictionary) {
         hunspellDictionaries.put(lang, dictionary);
     }
 
@@ -38,8 +38,8 @@ public class IndicesAnalysisModule extends AbstractModule {
     protected void configure() {
         bind(IndicesAnalysisService.class).asEagerSingleton();
 
-        MapBinder<String, HunspellDictionary> dictionariesBinder = MapBinder.newMapBinder(binder(), String.class, HunspellDictionary.class);
-        for (Map.Entry<String, HunspellDictionary> entry : hunspellDictionaries.entrySet()) {
+        MapBinder<String, Dictionary> dictionariesBinder = MapBinder.newMapBinder(binder(), String.class, Dictionary.class);
+        for (Map.Entry<String, Dictionary> entry : hunspellDictionaries.entrySet()) {
             dictionariesBinder.addBinding(entry.getKey()).toInstance(entry.getValue());
         }
         bind(HunspellService.class).asEagerSingleton();

+ 15 - 6
src/main/java/org/elasticsearch/indices/analysis/PreBuiltTokenFilters.java

@@ -60,12 +60,21 @@ public enum PreBuiltTokenFilters {
     WORD_DELIMITER(CachingStrategy.ONE) {
         @Override
         public TokenStream create(TokenStream tokenStream, Version version) {
-            return new WordDelimiterFilter(tokenStream,
-                       WordDelimiterFilter.GENERATE_WORD_PARTS |
-                       WordDelimiterFilter.GENERATE_NUMBER_PARTS |
-                       WordDelimiterFilter.SPLIT_ON_CASE_CHANGE |
-                       WordDelimiterFilter.SPLIT_ON_NUMERICS |
-                       WordDelimiterFilter.STEM_ENGLISH_POSSESSIVE, null);
+            if (version.luceneVersion.onOrAfter(org.apache.lucene.util.Version.LUCENE_48)) {
+                return new WordDelimiterFilter(version.luceneVersion, tokenStream,
+                           WordDelimiterFilter.GENERATE_WORD_PARTS |
+                           WordDelimiterFilter.GENERATE_NUMBER_PARTS |
+                           WordDelimiterFilter.SPLIT_ON_CASE_CHANGE |
+                           WordDelimiterFilter.SPLIT_ON_NUMERICS |
+                           WordDelimiterFilter.STEM_ENGLISH_POSSESSIVE, null);
+            } else {
+                return new Lucene47WordDelimiterFilter(tokenStream,
+                           WordDelimiterFilter.GENERATE_WORD_PARTS |
+                           WordDelimiterFilter.GENERATE_NUMBER_PARTS |
+                           WordDelimiterFilter.SPLIT_ON_CASE_CHANGE |
+                           WordDelimiterFilter.SPLIT_ON_NUMERICS |
+                           WordDelimiterFilter.STEM_ENGLISH_POSSESSIVE, null);
+            }
         }
     },
 

+ 1 - 1
src/main/java/org/elasticsearch/indices/recovery/RecoveryTarget.java

@@ -303,7 +303,7 @@ public class RecoveryTarget extends AbstractComponent {
                 return;
             }
 
-            logger.trace("[{}][{}] recovery from [{}] failed", e, request.shardId().index().name(), request.shardId().id(), request.sourceNode());
+            logger.warn("[{}][{}] recovery from [{}] failed", e, request.shardId().index().name(), request.shardId().id(), request.sourceNode());
             listener.onRecoveryFailure(new RecoveryFailedException(request, e), true);
         }
     }

+ 0 - 1
src/main/java/org/elasticsearch/monitor/dump/AbstractDump.java

@@ -22,7 +22,6 @@ package org.elasticsearch.monitor.dump;
 import com.google.common.base.Charsets;
 import com.google.common.collect.ImmutableMap;
 import org.elasticsearch.common.Nullable;
-import org.elasticsearch.common.io.Streams;
 
 import java.io.*;
 import java.util.ArrayList;

+ 1 - 2
src/main/java/org/elasticsearch/monitor/dump/SimpleDump.java

@@ -22,7 +22,6 @@ package org.elasticsearch.monitor.dump;
 import org.elasticsearch.common.Nullable;
 
 import java.io.File;
-import java.io.FileNotFoundException;
 import java.util.Map;
 
 /**
@@ -32,7 +31,7 @@ public class SimpleDump extends AbstractDump {
 
     private final File location;
 
-    public SimpleDump(long timestamp, String cause, @Nullable Map<String, Object> context, File location) throws FileNotFoundException {
+    public SimpleDump(long timestamp, String cause, @Nullable Map<String, Object> context, File location)  {
         super(timestamp, cause, context);
         this.location = location;
     }

+ 1 - 7
src/main/java/org/elasticsearch/monitor/dump/SimpleDumpGenerator.java

@@ -25,7 +25,6 @@ import org.elasticsearch.common.Nullable;
 import org.elasticsearch.common.io.FileSystemUtils;
 
 import java.io.File;
-import java.io.FileNotFoundException;
 import java.util.ArrayList;
 import java.util.Map;
 
@@ -59,12 +58,7 @@ public class SimpleDumpGenerator implements DumpGenerator {
         }
         File file = new File(dumpLocation, fileName + cause + "-" + timestamp);
         FileSystemUtils.mkdirs(file);
-        SimpleDump dump;
-        try {
-            dump = new SimpleDump(System.currentTimeMillis(), cause, context, file);
-        } catch (FileNotFoundException e) {
-            throw new DumpGenerationFailedException("Failed to generate dump", e);
-        }
+        SimpleDump dump = new SimpleDump(System.currentTimeMillis(), cause, context, file);
         ArrayList<DumpContributionFailedException> failedContributors = new ArrayList<>();
         for (String name : contributors) {
             DumpContributor contributor = this.contributors.get(name);

+ 3 - 2
src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java

@@ -52,6 +52,7 @@ import org.elasticsearch.snapshots.*;
 
 import java.io.FileNotFoundException;
 import java.io.IOException;
+import java.nio.file.NoSuchFileException;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Map;
@@ -366,7 +367,7 @@ public abstract class BlobStoreRepository extends AbstractLifecycleComponent<Rep
         try {
             byte[] data = snapshotsBlobContainer.readBlobFully(metaDataBlobName(snapshotId));
             metaData = readMetaData(data);
-        } catch (FileNotFoundException ex) {
+        } catch (FileNotFoundException | NoSuchFileException ex) {
             throw new SnapshotMissingException(snapshotId, ex);
         } catch (IOException ex) {
             throw new SnapshotException(snapshotId, "failed to get snapshots", ex);
@@ -427,7 +428,7 @@ public abstract class BlobStoreRepository extends AbstractLifecycleComponent<Rep
                     }
                 }
             }
-        } catch (FileNotFoundException ex) {
+        } catch (FileNotFoundException | NoSuchFileException ex) {
             throw new SnapshotMissingException(snapshotId, ex);
         } catch (IOException ex) {
             throw new SnapshotException(snapshotId, "failed to get snapshots", ex);

+ 20 - 52
src/main/java/org/elasticsearch/search/aggregations/support/ValuesSource.java

@@ -22,8 +22,8 @@ import org.apache.lucene.index.AtomicReaderContext;
 import org.apache.lucene.index.IndexReaderContext;
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.BytesRefHash;
-import org.apache.lucene.util.InPlaceMergeSorter;
+import org.apache.lucene.util.BytesRefArray;
+import org.apache.lucene.util.Counter;
 import org.elasticsearch.common.lucene.ReaderContextAware;
 import org.elasticsearch.common.lucene.TopReaderContextAware;
 import org.elasticsearch.common.util.CollectionUtils;
@@ -326,43 +326,39 @@ public abstract class ValuesSource {
             }
 
             static class SortedUniqueBytesValues extends BytesValues {
-
                 final BytesValues delegate;
-                int[] sortedIds;
-                final BytesRefHash bytes;
+                int[] indices = new int[1]; // at least one
+                final BytesRefArray bytes;
                 int numUniqueValues;
                 int pos = Integer.MAX_VALUE;
 
                 public SortedUniqueBytesValues(BytesValues delegate) {
                     super(delegate.isMultiValued());
                     this.delegate = delegate;
-                    bytes = new BytesRefHash();
+                    bytes = new BytesRefArray(Counter.newCounter(false));
                 }
 
                 @Override
                 public int setDocument(int docId) {
                     final int numValues = delegate.setDocument(docId);
-                    if (numValues == 0) {
-                        sortedIds = null;
-                        return 0;
-                    }
-                    bytes.clear();
-                    bytes.reinit();
-                    for (int i = 0; i < numValues; ++i) {
-                        final BytesRef next = delegate.nextValue();
-                        final int hash = delegate.currentValueHash();
-                        assert hash == next.hashCode();
-                        bytes.add(next, hash);
-                    }
-                    numUniqueValues = bytes.size();
-                    sortedIds = bytes.sort(BytesRef.getUTF8SortedAsUnicodeComparator());
+                    numUniqueValues = 0;
                     pos = 0;
+                    if (numValues > 0) {
+                        bytes.clear();
+                        indices = ArrayUtil.grow(this.indices, numValues);
+                        for (int i = 0; i < numValues; ++i) {
+                            final BytesRef next = delegate.nextValue();
+                            indices[i] = i;
+                            bytes.append(next);
+                        }
+                        numUniqueValues = CollectionUtils.sortAndDedup(bytes, indices);
+                    }
                     return numUniqueValues;
                 }
 
                 @Override
                 public BytesRef nextValue() {
-                    bytes.get(sortedIds[pos++], scratch);
+                    bytes.get(scratch, indices[pos++]);
                     return scratch;
                 }
 
@@ -657,21 +653,6 @@ public abstract class ValuesSource {
                 long[] array = new long[2];
                 int pos = Integer.MAX_VALUE;
 
-                final InPlaceMergeSorter sorter = new InPlaceMergeSorter() {
-                    @Override
-                    protected void swap(int i, int j) {
-                        final long tmp = array[i];
-                        array[i] = array[j];
-                        array[j] = tmp;
-                    }
-                    @Override
-                    protected int compare(int i, int j) {
-                        final long l1 = array[i];
-                        final long l2 = array[j];
-                        return Long.compare(l1, l2);
-                    }
-                };
-
                 protected SortedUniqueLongValues(LongValues delegate) {
                     super(delegate);
                 }
@@ -702,22 +683,9 @@ public abstract class ValuesSource {
 
             private static class SortedUniqueDoubleValues extends FilterDoubleValues {
 
-                int numUniqueValues;
-                double[] array = new double[2];
-                int pos = Integer.MAX_VALUE;
-
-                final InPlaceMergeSorter sorter = new InPlaceMergeSorter() {
-                    @Override
-                    protected void swap(int i, int j) {
-                        final double tmp = array[i];
-                        array[i] = array[j];
-                        array[j] = tmp;
-                    }
-                    @Override
-                    protected int compare(int i, int j) {
-                        return Double.compare(array[i], array[j]);
-                    }
-                };
+                private int numUniqueValues;
+                private double[] array = new double[2];
+                private int pos = Integer.MAX_VALUE;
 
                 SortedUniqueDoubleValues(DoubleValues delegate) {
                     super(delegate);

+ 7 - 6
src/main/java/org/elasticsearch/search/facet/terms/strings/HashedAggregator.java

@@ -22,9 +22,10 @@ import com.carrotsearch.hppc.ObjectIntOpenHashMap;
 import com.google.common.collect.ImmutableList;
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.BytesRefHash;
 import org.elasticsearch.common.collect.BoundedTreeSet;
 import org.elasticsearch.common.lucene.HashedBytesRef;
+import org.elasticsearch.common.util.BigArrays;
+import org.elasticsearch.common.util.BytesRefHash;
 import org.elasticsearch.index.fielddata.BytesValues;
 import org.elasticsearch.search.facet.InternalFacet;
 import org.elasticsearch.search.facet.terms.TermsFacet;
@@ -39,7 +40,7 @@ public class HashedAggregator {
     private final HashCount assertHash = getAssertHash();
 
     public HashedAggregator() {
-        hash = new BytesRefHashHashCount(new BytesRefHash());
+        hash = new BytesRefHashHashCount(new BytesRefHash(10, BigArrays.NON_RECYCLING_INSTANCE));
     }
 
     public void onDoc(int docId, BytesValues values) {
@@ -158,7 +159,7 @@ public class HashedAggregator {
 
         @Override
         public boolean add(BytesRef value, int hashCode, BytesValues values) {
-            int key = hash.add(value, hashCode);
+            int key = (int)hash.add(value, hashCode);
             if (key < 0) {
                 key = ((-key) - 1);
             } else if (key >= counts.length) {
@@ -168,7 +169,7 @@ public class HashedAggregator {
         }
 
         public boolean addNoCount(BytesRef value, int hashCode, BytesValues values) {
-            int key = hash.add(value, hashCode);
+            int key = (int)hash.add(value, hashCode);
             final boolean added = key >= 0;
             if (key < 0) {
                 key = ((-key) - 1);
@@ -190,7 +191,7 @@ public class HashedAggregator {
             private int currentCount = -1;
 
             BytesRefCountIteratorImpl() {
-                this.size = hash.size();
+                this.size = (int)hash.size();
             }
 
             public BytesRef next() {
@@ -220,7 +221,7 @@ public class HashedAggregator {
 
         @Override
         public int size() {
-            return hash.size();
+            return (int)hash.size();
         }
 
         @Override

+ 1 - 1
src/main/java/org/elasticsearch/search/scan/ScanContext.java

@@ -127,7 +127,7 @@ public class ScanContext {
 
         @Override
         public boolean acceptsDocsOutOfOrder() {
-            return true;
+            return false;
         }
 
         public static final RuntimeException StopCollectingException = new StopCollectingException();

+ 18 - 8
src/main/java/org/elasticsearch/search/suggest/completion/AnalyzingCompletionLookupProvider.java

@@ -32,8 +32,11 @@ import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.IntsRef;
 import org.apache.lucene.util.automaton.Automaton;
-import org.apache.lucene.util.fst.*;
+import org.apache.lucene.util.fst.ByteSequenceOutputs;
+import org.apache.lucene.util.fst.FST;
+import org.apache.lucene.util.fst.PairOutputs;
 import org.apache.lucene.util.fst.PairOutputs.Pair;
+import org.apache.lucene.util.fst.PositiveIntOutputs;
 import org.elasticsearch.common.regex.Regex;
 import org.elasticsearch.index.mapper.core.CompletionFieldMapper;
 import org.elasticsearch.search.suggest.completion.Completion090PostingsFormat.CompletionLookupProvider;
@@ -55,7 +58,9 @@ public class AnalyzingCompletionLookupProvider extends CompletionLookupProvider
 
     public static final String CODEC_NAME = "analyzing";
     public static final int CODEC_VERSION_START = 1;
-    public static final int CODEC_VERSION_LATEST = 2;
+    public static final int CODEC_VERSION_SERIALIZED_LABELS = 2;
+    public static final int CODEC_VERSION_CHECKSUMS = 3;
+    public static final int CODEC_VERSION_LATEST = CODEC_VERSION_CHECKSUMS;
 
     private boolean preserveSep;
     private boolean preservePositionIncrements;
@@ -89,10 +94,11 @@ public class AnalyzingCompletionLookupProvider extends CompletionLookupProvider
 
             @Override
             public void close() throws IOException {
-                try { /*
-                       * write the offsets per field such that we know where
-                       * we need to load the FSTs from
-                       */
+                try {
+                  /*
+                   * write the offsets per field such that we know where
+                   * we need to load the FSTs from
+                   */
                     long pointer = output.getFilePointer();
                     output.writeVInt(fieldOffsets.size());
                     for (Map.Entry<FieldInfo, Long> entry : fieldOffsets.entrySet()) {
@@ -100,7 +106,7 @@ public class AnalyzingCompletionLookupProvider extends CompletionLookupProvider
                         output.writeVLong(entry.getValue());
                     }
                     output.writeLong(pointer);
-                    output.flush();
+                    CodecUtil.writeFooter(output);
                 } finally {
                     IOUtils.close(output);
                 }
@@ -202,8 +208,12 @@ public class AnalyzingCompletionLookupProvider extends CompletionLookupProvider
     public LookupFactory load(IndexInput input) throws IOException {
         long sizeInBytes = 0;
         int version = CodecUtil.checkHeader(input, CODEC_NAME, CODEC_VERSION_START, CODEC_VERSION_LATEST);
+        if (version >= CODEC_VERSION_CHECKSUMS) {
+            CodecUtil.checksumEntireFile(input);
+        }
+        final long metaPointerPosition = input.length() - (version >= CODEC_VERSION_CHECKSUMS? 8 + CodecUtil.footerLength() : 8);
         final Map<String, AnalyzingSuggestHolder> lookupMap = new HashMap<>();
-        input.seek(input.length() - 8);
+        input.seek(metaPointerPosition);
         long metaPointer = input.readLong();
         input.seek(metaPointer);
         int numFields = input.readVInt();

+ 13 - 3
src/main/java/org/elasticsearch/search/suggest/completion/Completion090PostingsFormat.java

@@ -25,7 +25,10 @@ import org.apache.lucene.index.*;
 import org.apache.lucene.index.FilterAtomicReader.FilterTerms;
 import org.apache.lucene.search.suggest.Lookup;
 import org.apache.lucene.store.IOContext.Context;
-import org.apache.lucene.store.*;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.store.InputStreamDataInput;
+import org.apache.lucene.store.OutputStreamDataOutput;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.IOUtils;
 import org.elasticsearch.ElasticsearchIllegalStateException;
@@ -54,6 +57,7 @@ public class Completion090PostingsFormat extends PostingsFormat {
 
     public static final String CODEC_NAME = "completion090";
     public static final int SUGGEST_CODEC_VERSION = 1;
+    public static final int SUGGEST_VERSION_CURRENT = SUGGEST_CODEC_VERSION;
     public static final String EXTENSION = "cmp";
 
     private final static ESLogger logger = Loggers.getLogger(Completion090PostingsFormat.class);
@@ -110,7 +114,7 @@ public class Completion090PostingsFormat extends PostingsFormat {
             boolean success = false;
             try {
                 output = state.directory.createOutput(suggestFSTFile, state.context);
-                CodecUtil.writeHeader(output, CODEC_NAME, SUGGEST_CODEC_VERSION);
+                CodecUtil.writeHeader(output, CODEC_NAME, SUGGEST_VERSION_CURRENT);
                 /*
                  * we write the delegate postings format name so we can load it
                  * without getting an instance in the ctor
@@ -206,11 +210,12 @@ public class Completion090PostingsFormat extends PostingsFormat {
 
         private final FieldsProducer delegateProducer;
         private final LookupFactory lookupFactory;
+        private final int version;
 
         public CompletionFieldsProducer(SegmentReadState state) throws IOException {
             String suggestFSTFile = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, EXTENSION);
             IndexInput input = state.directory.openInput(suggestFSTFile, state.context);
-            CodecUtil.checkHeader(input, CODEC_NAME, SUGGEST_CODEC_VERSION, SUGGEST_CODEC_VERSION);
+            version = CodecUtil.checkHeader(input, CODEC_NAME, SUGGEST_CODEC_VERSION, SUGGEST_VERSION_CURRENT);
             FieldsProducer delegateProducer = null;
             boolean success = false;
             try {
@@ -273,6 +278,11 @@ public class Completion090PostingsFormat extends PostingsFormat {
         public long ramBytesUsed() {
             return (lookupFactory == null ? 0 : lookupFactory.ramBytesUsed()) + delegateProducer.ramBytesUsed();
         }
+
+        @Override
+        public void checkIntegrity() throws IOException {
+            delegateProducer.checkIntegrity();
+        }
     }
 
     public static final class CompletionTerms extends FilterTerms {

+ 6 - 3
src/main/java/org/elasticsearch/search/suggest/completion/CompletionTokenStream.java

@@ -19,7 +19,10 @@
 package org.elasticsearch.search.suggest.completion;
 
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.*;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
 import org.apache.lucene.util.*;
 import org.apache.lucene.util.fst.Util;
 
@@ -128,8 +131,8 @@ public final class CompletionTokenStream extends TokenStream {
         private CharsRef charsRef;
 
         @Override
-        public int fillBytesRef() {
-            return bytes.hashCode();
+        public void fillBytesRef() {
+            // does nothing - we change in place
         }
 
         @Override

+ 2 - 3
src/test/java/org/apache/lucene/TrackingSerialMergeSchedulerTests.java

@@ -30,7 +30,6 @@ import org.apache.lucene.index.TrackingSerialMergeScheduler;
 import org.apache.lucene.store.Directory;
 import org.elasticsearch.common.logging.Loggers;
 import org.elasticsearch.index.merge.EnableMergeScheduler;
-import org.elasticsearch.index.merge.Merges;
 import org.elasticsearch.test.ElasticsearchLuceneTestCase;
 import org.junit.Test;
 
@@ -62,10 +61,10 @@ public class TrackingSerialMergeSchedulerTests extends ElasticsearchLuceneTestCa
         // 4 merge runs to work out through the pending merges
         for (int i = 0; i < 4; i++) {
             assertTrue(iw.hasPendingMerges());
-            Merges.maybeMerge(iw);
+            iw.maybeMerge();
             assertTrue(iw.hasPendingMerges());
         }
-        Merges.maybeMerge(iw);
+        iw.maybeMerge();
         assertFalse(iw.hasPendingMerges());
 
         iw.close(false);

+ 4 - 4
src/test/java/org/apache/lucene/queries/BlendedTermQueryTest.java

@@ -32,7 +32,7 @@ import org.apache.lucene.search.similarities.BM25Similarity;
 import org.apache.lucene.search.similarities.DefaultSimilarity;
 import org.apache.lucene.search.similarities.Similarity;
 import org.apache.lucene.store.Directory;
-import org.apache.lucene.util._TestUtil;
+import org.apache.lucene.util.TestUtil;
 import org.elasticsearch.test.ElasticsearchLuceneTestCase;
 import org.junit.Test;
 
@@ -176,9 +176,9 @@ public class BlendedTermQueryTest extends ElasticsearchLuceneTestCase {
         for (int j = 0; j < iters; j++) {
             String[] fields = new String[1 + random().nextInt(10)];
             for (int i = 0; i < fields.length; i++) {
-                fields[i] = _TestUtil.randomRealisticUnicodeString(random(), 1, 10);
+                fields[i] = TestUtil.randomRealisticUnicodeString(random(), 1, 10);
             }
-            String term = _TestUtil.randomRealisticUnicodeString(random(), 1, 10);
+            String term = TestUtil.randomRealisticUnicodeString(random(), 1, 10);
             Term[] terms = toTerms(fields, term);
             boolean disableCoord = random().nextBoolean();
             boolean useBoolean = random().nextBoolean();
@@ -213,7 +213,7 @@ public class BlendedTermQueryTest extends ElasticsearchLuceneTestCase {
         Set<Term> terms = new HashSet<>();
         int num = scaledRandomIntBetween(1, 10);
         for (int i = 0; i < num; i++) {
-            terms.add(new Term(_TestUtil.randomRealisticUnicodeString(random(), 1, 10), _TestUtil.randomRealisticUnicodeString(random(), 1, 10)));
+            terms.add(new Term(TestUtil.randomRealisticUnicodeString(random(), 1, 10), TestUtil.randomRealisticUnicodeString(random(), 1, 10)));
         }
 
         BlendedTermQuery blendedTermQuery = random().nextBoolean() ? BlendedTermQuery.dismaxBlendedQuery(terms.toArray(new Term[0]), random().nextFloat()) :

+ 1 - 2
src/test/java/org/elasticsearch/benchmark/fielddata/LongFieldDataBenchmark.java

@@ -37,7 +37,6 @@ import org.elasticsearch.index.fielddata.IndexNumericFieldData;
 import org.elasticsearch.index.mapper.ContentPath;
 import org.elasticsearch.index.mapper.Mapper.BuilderContext;
 import org.elasticsearch.index.mapper.core.LongFieldMapper;
-import org.elasticsearch.index.merge.Merges;
 import org.elasticsearch.indices.fielddata.breaker.DummyCircuitBreakerService;
 
 import java.util.Random;
@@ -142,7 +141,7 @@ public class LongFieldDataBenchmark {
                 }
                 indexWriter.addDocument(doc);
             }
-            Merges.forceMerge(indexWriter, 1);
+            indexWriter.forceMerge(1, true);
             indexWriter.close();
 
             final DirectoryReader dr = DirectoryReader.open(dir);

+ 1 - 2
src/test/java/org/elasticsearch/common/lucene/uid/VersionsTests.java

@@ -34,7 +34,6 @@ import org.elasticsearch.common.Numbers;
 import org.elasticsearch.common.lucene.Lucene;
 import org.elasticsearch.index.mapper.internal.UidFieldMapper;
 import org.elasticsearch.index.mapper.internal.VersionFieldMapper;
-import org.elasticsearch.index.merge.Merges;
 import org.elasticsearch.index.merge.policy.ElasticsearchMergePolicy;
 import org.elasticsearch.test.ElasticsearchLuceneTestCase;
 import org.hamcrest.MatcherAssert;
@@ -267,7 +266,7 @@ public class VersionsTests extends ElasticsearchLuceneTestCase {
                 .put("1", 0L).put("2", 0L).put("3", 0L).put("4", 4L).put("5", 5L).put("6", 6L).build();
 
         // Force merge and check versions
-        Merges.forceMerge(iw, 1);
+        iw.forceMerge(1, true);
         final AtomicReader ir = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(iw.getDirectory()));
         final NumericDocValues versions = ir.getNumericDocValues(VersionFieldMapper.NAME);
         assertThat(versions, notNullValue());

+ 5 - 5
src/test/java/org/elasticsearch/common/util/BytesRefHashTests.java

@@ -23,7 +23,7 @@ import com.carrotsearch.hppc.ObjectLongMap;
 import com.carrotsearch.hppc.ObjectLongOpenHashMap;
 import com.carrotsearch.hppc.cursors.ObjectLongCursor;
 import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util._TestUtil;
+import org.apache.lucene.util.TestUtil;
 import org.elasticsearch.test.ElasticsearchTestCase;
 import org.junit.Test;
 
@@ -100,7 +100,7 @@ public class BytesRefHashTests extends ElasticsearchTestCase {
             for (int i = 0; i < 797; i++) {
                 String str;
                 do {
-                    str = _TestUtil.randomRealisticUnicodeString(getRandom(), 1000);
+                    str = TestUtil.randomRealisticUnicodeString(getRandom(), 1000);
                 } while (str.length() == 0);
                 ref.copyChars(str);
                 long count = hash.size();
@@ -133,7 +133,7 @@ public class BytesRefHashTests extends ElasticsearchTestCase {
             for (int i = 0; i < 797; i++) {
                 String str;
                 do {
-                    str = _TestUtil.randomRealisticUnicodeString(getRandom(), 1000);
+                    str = TestUtil.randomRealisticUnicodeString(getRandom(), 1000);
                 } while (str.length() == 0);
                 ref.copyChars(str);
                 long count = hash.size();
@@ -173,7 +173,7 @@ public class BytesRefHashTests extends ElasticsearchTestCase {
             for (int i = 0; i < 797; i++) {
                 String str;
                 do {
-                    str = _TestUtil.randomRealisticUnicodeString(getRandom(), 1000);
+                    str = TestUtil.randomRealisticUnicodeString(getRandom(), 1000);
                 } while (str.length() == 0);
                 ref.copyChars(str);
                 long count = hash.size();
@@ -209,7 +209,7 @@ public class BytesRefHashTests extends ElasticsearchTestCase {
             for (int i = 0; i < 797; i++) {
                 String str;
                 do {
-                    str = _TestUtil.randomRealisticUnicodeString(getRandom(), 1000);
+                    str = TestUtil.randomRealisticUnicodeString(getRandom(), 1000);
                 } while (str.length() == 0);
                 ref.copyChars(str);
                 long count = hash.size();

+ 70 - 3
src/test/java/org/elasticsearch/common/util/CollectionUtilsTests.java

@@ -21,12 +21,16 @@ package org.elasticsearch.common.util;
 
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.Iterables;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefArray;
+import org.apache.lucene.util.Counter;
 import org.elasticsearch.test.ElasticsearchTestCase;
 import org.junit.Test;
 
-import java.util.ArrayList;
-import java.util.HashSet;
-import java.util.List;
+import java.util.*;
+
+import static org.hamcrest.Matchers.equalTo;
+import static org.hamcrest.Matchers.is;
 
 public class CollectionUtilsTests extends ElasticsearchTestCase {
 
@@ -61,4 +65,67 @@ public class CollectionUtilsTests extends ElasticsearchTestCase {
         }
     }
 
+    @Test
+    public void testSortAndDedupByteRefArray() {
+        SortedSet<BytesRef> set = new TreeSet<>();
+        final int numValues = scaledRandomIntBetween(0, 10000);
+        List<BytesRef> tmpList = new ArrayList<>();
+        BytesRefArray array = new BytesRefArray(Counter.newCounter());
+        for (int i = 0; i < numValues; i++) {
+            String s = randomRealisticUnicodeOfCodepointLengthBetween(1, 100);
+            set.add(new BytesRef(s));
+            tmpList.add(new BytesRef(s));
+            array.append(new BytesRef(s));
+        }
+        if (randomBoolean()) {
+            Collections.shuffle(tmpList, getRandom());
+            for (BytesRef ref : tmpList) {
+                array.append(ref);
+            }
+        }
+        int[] indices = new int[array.size()];
+        for (int i = 0; i < indices.length; i++) {
+            indices[i] = i;
+        }
+        int numUnique = CollectionUtils.sortAndDedup(array, indices);
+        assertThat(numUnique, equalTo(set.size()));
+        Iterator<BytesRef> iterator = set.iterator();
+
+        BytesRef spare = new BytesRef();
+        for (int i = 0; i < numUnique; i++) {
+            assertThat(iterator.hasNext(), is(true));
+            assertThat(array.get(spare, indices[i]), equalTo(iterator.next()));
+        }
+
+    }
+
+    @Test
+    public void testSortByteRefArray() {
+        List<BytesRef> values = new ArrayList<>();
+        final int numValues = scaledRandomIntBetween(0, 10000);
+        BytesRefArray array = new BytesRefArray(Counter.newCounter());
+        for (int i = 0; i < numValues; i++) {
+            String s = randomRealisticUnicodeOfCodepointLengthBetween(1, 100);
+            values.add(new BytesRef(s));
+            array.append(new BytesRef(s));
+        }
+        if (randomBoolean()) {
+            Collections.shuffle(values, getRandom());
+        }
+        int[] indices = new int[array.size()];
+        for (int i = 0; i < indices.length; i++) {
+            indices[i] = i;
+        }
+        CollectionUtils.sort(array, indices);
+        Collections.sort(values);
+        Iterator<BytesRef> iterator = values.iterator();
+
+        BytesRef spare = new BytesRef();
+        for (int i = 0; i < values.size(); i++) {
+            assertThat(iterator.hasNext(), is(true));
+            assertThat(array.get(spare, indices[i]), equalTo(iterator.next()));
+        }
+
+    }
+
 }

+ 220 - 0
src/test/java/org/elasticsearch/index/analysis/AnalysisFactoryTests.java

@@ -0,0 +1,220 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.index.analysis;
+
+import org.elasticsearch.index.analysis.compound.DictionaryCompoundWordTokenFilterFactory;
+import org.elasticsearch.index.analysis.compound.HyphenationCompoundWordTokenFilterFactory;
+import org.elasticsearch.test.ElasticsearchTestCase;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Set;
+import java.util.TreeSet;
+
+/** 
+ * Alerts us if new analyzers are added to lucene, so we don't miss them.
+ * <p>
+ * If we don't want to expose one for a specific reason, just map it to Void
+ */
+public class AnalysisFactoryTests extends ElasticsearchTestCase {
+    
+    static final Map<String,Class<?>> KNOWN_TOKENIZERS = new HashMap<String,Class<?>>() {{
+        // deprecated ones, we dont care about these
+        put("arabicletter",  Deprecated.class);
+        put("chinese",       Deprecated.class);
+        put("cjk",           Deprecated.class);
+        put("russianletter", Deprecated.class);
+        
+        // exposed in ES
+        put("edgengram",     EdgeNGramTokenizerFactory.class);
+        put("keyword",       KeywordTokenizerFactory.class);
+        put("letter",        LetterTokenizerFactory.class);
+        put("lowercase",     LowerCaseTokenizerFactory.class);
+        put("ngram",         NGramTokenizerFactory.class);
+        put("pathhierarchy", PathHierarchyTokenizerFactory.class);
+        put("pattern",       PatternTokenizerFactory.class);
+        put("standard",      StandardTokenizerFactory.class);
+        put("uax29urlemail", UAX29URLEmailTokenizerFactory.class);
+        put("whitespace",    WhitespaceTokenizerFactory.class);
+                
+        // TODO: these tokenizers are not yet exposed: useful?
+        
+        // historical version of standardtokenizer... tries to recognize 
+        // company names and a few other things. not good for asian languages etc.
+        put("classic",       Void.class);
+        // we should add this, the thaiwordfilter is deprecated. this one has correct offsets
+        put("thai",          Void.class);
+        // this one "seems to mess up offsets". probably shouldn't be a tokenizer...
+        put("wikipedia",     Void.class);
+    }};
+    
+    public void testTokenizers() {
+        Set<String> missing = new TreeSet<String>(org.apache.lucene.analysis.util.TokenizerFactory.availableTokenizers());
+        missing.removeAll(KNOWN_TOKENIZERS.keySet());
+        assertTrue("new tokenizers found, please update KNOWN_TOKENIZERS: " + missing.toString(), missing.isEmpty());
+    }
+    
+    static final Map<String,Class<?>> KNOWN_TOKENFILTERS = new HashMap<String,Class<?>>() {{
+        // deprecated ones, we dont care about these
+        put("chinese",                Deprecated.class);
+        put("collationkey",           Deprecated.class);
+        put("position",               Deprecated.class);
+        put("thaiword",               Deprecated.class);
+        
+        
+        // exposed in ES
+        put("arabicnormalization",       ArabicNormalizationFilterFactory.class);
+        put("arabicstem",                ArabicStemTokenFilterFactory.class);
+        put("asciifolding",              ASCIIFoldingTokenFilterFactory.class);
+        put("brazilianstem",             BrazilianStemTokenFilterFactory.class);
+        put("bulgarianstem",             StemmerTokenFilterFactory.class);
+        put("cjkbigram",                 CJKBigramFilterFactory.class);
+        put("cjkwidth",                  CJKWidthFilterFactory.class);
+        put("commongrams",               CommonGramsTokenFilterFactory.class);
+        put("commongramsquery",          CommonGramsTokenFilterFactory.class);
+        put("czechstem",                 CzechStemTokenFilterFactory.class);
+        put("delimitedpayload",          DelimitedPayloadTokenFilterFactory.class);
+        put("dictionarycompoundword",    DictionaryCompoundWordTokenFilterFactory.class);
+        put("edgengram",                 EdgeNGramTokenFilterFactory.class);
+        put("elision",                   ElisionTokenFilterFactory.class);
+        put("englishminimalstem",        StemmerTokenFilterFactory.class);
+        put("englishpossessive",         StemmerTokenFilterFactory.class);
+        put("finnishlightstem",          StemmerTokenFilterFactory.class);
+        put("frenchlightstem",           StemmerTokenFilterFactory.class);
+        put("frenchminimalstem",         StemmerTokenFilterFactory.class);
+        put("germanstem",                GermanStemTokenFilterFactory.class);
+        put("germanlightstem",           StemmerTokenFilterFactory.class);
+        put("germanminimalstem",         StemmerTokenFilterFactory.class);
+        put("greeklowercase",            LowerCaseTokenFilterFactory.class);
+        put("greekstem",                 StemmerTokenFilterFactory.class);
+        put("hindistem",                 StemmerTokenFilterFactory.class);
+        put("hindistem",                 StemmerTokenFilterFactory.class);
+        put("hungarianlightstem",        StemmerTokenFilterFactory.class);
+        put("hunspellstem",              HunspellTokenFilterFactory.class);
+        put("hyphenationcompoundword",   HyphenationCompoundWordTokenFilterFactory.class);
+        put("indonesianstem",            StemmerTokenFilterFactory.class);
+        put("italianlightstem",          StemmerTokenFilterFactory.class);
+        put("keepword",                  KeepWordFilterFactory.class);
+        put("keywordmarker",             KeywordMarkerTokenFilterFactory.class);
+        put("kstem",                     KStemTokenFilterFactory.class);
+        put("latvianstem",               StemmerTokenFilterFactory.class);
+        put("length",                    LengthTokenFilterFactory.class);
+        put("limittokencount",           LimitTokenCountFilterFactory.class);
+        put("lowercase",                 LowerCaseTokenFilterFactory.class);
+        put("ngram",                     NGramTokenFilterFactory.class);
+        put("norwegianminimalstem",      StemmerTokenFilterFactory.class);
+        put("patterncapturegroup",       PatternCaptureGroupTokenFilterFactory.class);
+        put("patternreplace",            PatternReplaceTokenFilterFactory.class);
+        put("persiannormalization",      PersianNormalizationFilterFactory.class);
+        put("porterstem",                PorterStemTokenFilterFactory.class);
+        put("portugueselightstem",       StemmerTokenFilterFactory.class);
+        put("portugueseminimalstem",     StemmerTokenFilterFactory.class);
+        put("reversestring",             ReverseTokenFilterFactory.class);
+        put("russianlightstem",          StemmerTokenFilterFactory.class);
+        put("shingle",                   ShingleTokenFilterFactory.class);
+        put("snowballporter",            SnowballTokenFilterFactory.class);
+        put("spanishlightstem",          StemmerTokenFilterFactory.class);
+        put("standard",                  StandardTokenFilterFactory.class);
+        put("stemmeroverride",           StemmerOverrideTokenFilterFactory.class);
+        put("stop",                      StopTokenFilterFactory.class);
+        put("swedishlightstem",          StemmerTokenFilterFactory.class);
+        put("synonym",                   SynonymTokenFilterFactory.class);
+        put("trim",                      TrimTokenFilterFactory.class);
+        put("truncate",                  TruncateTokenFilterFactory.class);
+        put("turkishlowercase",          LowerCaseTokenFilterFactory.class);
+        put("worddelimiter",             WordDelimiterTokenFilterFactory.class);
+                
+        // TODO: these tokenfilters are not yet exposed: useful?
+        
+        // useful for turkish language
+        put("apostrophe",                Void.class);
+        // capitalizes tokens
+        put("capitalization",            Void.class);
+        // cleans up after classic tokenizer
+        put("classic",                   Void.class);
+        // like length filter (but codepoints)
+        put("codepointcount",            Void.class);
+        // galician language stemmers
+        put("galicianminimalstem",       Void.class);
+        put("galicianstem",              Void.class);
+        // o+umlaut=oe type normalization for german
+        put("germannormalization",       Void.class);
+        // hindi text normalization
+        put("hindinormalization",        Void.class);
+        // puts hyphenated words back together
+        put("hyphenatedwords",           Void.class);
+        // unicode normalization for indian languages
+        put("indicnormalization",        Void.class);
+        // lowercasing for irish: add to LowerCase (has a stemmer, too)
+        put("irishlowercase",            Void.class);
+        // repeats anything marked as keyword
+        put("keywordrepeat",             Void.class);
+        // like limittokencount, but by position
+        put("limittokenposition",        Void.class);
+        // ???
+        put("numericpayload",            Void.class);
+        // RSLP stemmer for portuguese
+        put("portuguesestem",            Void.class);
+        // light stemming for norwegian (has nb/nn options too)
+        put("norwegianlightstem",        Void.class);
+        // removes duplicates at the same position (this should be used by the existing factory)
+        put("removeduplicates",          Void.class);
+        // accent handling for scandinavian languages
+        put("scandinavianfolding",       Void.class);
+        // less aggressive accent handling for scandinavian languages
+        put("scandinaviannormalization", Void.class);
+        // kurdish language support
+        put("soraninormalization",       Void.class);
+        put("soranistem",                Void.class);
+        // ???
+        put("tokenoffsetpayload",        Void.class);
+        // like a stop filter but by token-type
+        put("type",                      Void.class);
+        // puts the type into the payload
+        put("typeaspayload",             Void.class);
+        // opposite of lowercase...
+        put("uppercase",                 Void.class);
+    }};
+    
+    public void testTokenFilters() {
+        Set<String> missing = new TreeSet<String>(org.apache.lucene.analysis.util.TokenFilterFactory.availableTokenFilters());
+        missing.removeAll(KNOWN_TOKENFILTERS.keySet());
+        assertTrue("new tokenfilters found, please update KNOWN_TOKENFILTERS: " + missing.toString(), missing.isEmpty());
+    }
+    
+    static final Map<String,Class<?>> KNOWN_CHARFILTERS = new HashMap<String,Class<?>>() {{        
+        // exposed in ES
+        put("htmlstrip",      HtmlStripCharFilterFactory.class);
+        put("mapping",        MappingCharFilterFactory.class);
+        put("patternreplace", PatternReplaceCharFilterFactory.class);
+                
+        // TODO: these charfilters are not yet exposed: useful?
+        // handling of zwnj for persian
+        put("persian",        Void.class);
+    }};
+    
+    public void testCharFilters() {
+        Set<String> missing = new TreeSet<String>(org.apache.lucene.analysis.util.CharFilterFactory.availableCharFilters());
+        missing.removeAll(KNOWN_CHARFILTERS.keySet());
+        assertTrue("new charfilters found, please update KNOWN_CHARFILTERS: " + missing.toString(), missing.isEmpty());
+    }
+   
+    
+}

+ 0 - 43
src/test/java/org/elasticsearch/index/analysis/HunspellTokenFilterFactoryTests.java

@@ -18,7 +18,6 @@
  */
 package org.elasticsearch.index.analysis;
 
-import org.elasticsearch.common.inject.ProvisionException;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.test.ElasticsearchTestCase;
 import org.junit.Test;
@@ -59,46 +58,4 @@ public class HunspellTokenFilterFactoryTests extends ElasticsearchTestCase {
         assertThat(hunspellTokenFilter.dedup(), is(false));
     }
 
-    @Test
-    public void testDefaultRecursionLevel() throws IOException {
-        Settings settings = settingsBuilder()
-                .put("path.conf", getResource("/indices/analyze/conf_dir"))
-                .put("index.analysis.filter.en_US.type", "hunspell")
-                .put("index.analysis.filter.en_US.locale", "en_US")
-                .build();
-
-        AnalysisService analysisService = AnalysisTestsHelper.createAnalysisServiceFromSettings(settings);
-        TokenFilterFactory tokenFilter = analysisService.tokenFilter("en_US");
-        assertThat(tokenFilter, instanceOf(HunspellTokenFilterFactory.class));
-        HunspellTokenFilterFactory hunspellTokenFilter = (HunspellTokenFilterFactory) tokenFilter;
-        assertThat(hunspellTokenFilter.recursionLevel(), is(2));
-    }
-
-    @Test
-    public void testCustomRecursionLevel() throws IOException {
-        Settings settings = settingsBuilder()
-                .put("path.conf", getResource("/indices/analyze/conf_dir"))
-                .put("index.analysis.filter.en_US.type", "hunspell")
-                .put("index.analysis.filter.en_US.recursion_level", 0)
-                .put("index.analysis.filter.en_US.locale", "en_US")
-                .build();
-
-        AnalysisService analysisService = AnalysisTestsHelper.createAnalysisServiceFromSettings(settings);
-        TokenFilterFactory tokenFilter = analysisService.tokenFilter("en_US");
-        assertThat(tokenFilter, instanceOf(HunspellTokenFilterFactory.class));
-        HunspellTokenFilterFactory hunspellTokenFilter = (HunspellTokenFilterFactory) tokenFilter;
-        assertThat(hunspellTokenFilter.recursionLevel(), is(0));
-    }
-
-    @Test(expected = ProvisionException.class)
-    public void negativeRecursionLevelShouldFail() throws IOException {
-        Settings settings = settingsBuilder()
-                .put("path.conf", getResource("/indices/analyze/conf_dir"))
-                .put("index.analysis.filter.en_US.type", "hunspell")
-                .put("index.analysis.filter.en_US.recursion_level", -1)
-                .put("index.analysis.filter.en_US.locale", "en_US")
-                .build();
-        AnalysisTestsHelper.createAnalysisServiceFromSettings(settings);
-    }
-
 }

+ 32 - 0
src/test/java/org/elasticsearch/index/analysis/WordDelimiterTokenFilterFactoryTests.java

@@ -125,4 +125,36 @@ public class WordDelimiterTokenFilterFactoryTests extends ElasticsearchTokenStre
         assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
     }
 
+    /** Correct offset order when doing both parts and concatenation: PowerShot is a synonym of Power */
+    @Test
+    public void testPartsAndCatenate() throws IOException {
+        AnalysisService analysisService = AnalysisTestsHelper.createAnalysisServiceFromSettings(settingsBuilder()
+                .put("index.analysis.filter.my_word_delimiter.type", "word_delimiter")
+                .put("index.analysis.filter.my_word_delimiter.catenate_words", "true")
+                .put("index.analysis.filter.my_word_delimiter.generate_word_parts", "true")
+                .build());
+        TokenFilterFactory tokenFilter = analysisService.tokenFilter("my_word_delimiter");
+        String source = "PowerShot";
+        String[] expected = new String[]{"Power", "PowerShot", "Shot" };
+        Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(source));
+                    assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
+    }
+     
+    /** Back compat: 
+     * old offset order when doing both parts and concatenation: PowerShot is a synonym of Shot */
+    @Test
+    public void testDeprecatedPartsAndCatenate() throws IOException {
+        AnalysisService analysisService = AnalysisTestsHelper.createAnalysisServiceFromSettings(settingsBuilder()
+                .put("index.analysis.filter.my_word_delimiter.type", "word_delimiter")
+                .put("index.analysis.filter.my_word_delimiter.catenate_words", "true")
+                .put("index.analysis.filter.my_word_delimiter.generate_word_parts", "true")
+                .put("index.analysis.filter.my_word_delimiter.version", "4.7")
+                .build());
+        TokenFilterFactory tokenFilter = analysisService.tokenFilter("my_word_delimiter");
+        String source = "PowerShot";
+        String[] expected = new String[]{"Power", "Shot", "PowerShot" };
+        Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(source));
+                    assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
+    }
+
 }

+ 1 - 2
src/test/java/org/elasticsearch/index/codec/postingformat/DefaultPostingsFormatTests.java

@@ -32,7 +32,6 @@ import org.elasticsearch.common.lucene.Lucene;
 import org.elasticsearch.index.codec.postingsformat.BloomFilterPostingsFormat;
 import org.elasticsearch.index.codec.postingsformat.Elasticsearch090PostingsFormat;
 import org.elasticsearch.index.mapper.internal.UidFieldMapper;
-import org.elasticsearch.index.merge.Merges;
 import org.elasticsearch.test.ElasticsearchTestCase;
 import org.junit.Test;
 
@@ -94,7 +93,7 @@ public class DefaultPostingsFormatTests extends ElasticsearchTestCase {
         for (int i = 0; i < 100; i++) {
             writer.addDocument(Arrays.asList(new TextField("foo", "foo bar foo bar", Store.YES), new TextField("some_other_field", "1234", Store.YES)));
         }
-        Merges.forceMerge(writer, 1);
+        writer.forceMerge(1, true);
         writer.commit();
         
         DirectoryReader reader = DirectoryReader.open(writer, false);

+ 2 - 2
src/test/java/org/elasticsearch/index/codec/postingformat/ElasticsearchPostingsFormatTest.java

@@ -25,8 +25,8 @@ import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope;
 import com.carrotsearch.randomizedtesting.annotations.TimeoutSuite;
 import org.apache.lucene.codecs.Codec;
 import org.apache.lucene.index.BasePostingsFormatTestCase;
+import org.apache.lucene.util.TestUtil;
 import org.apache.lucene.util.TimeUnits;
-import org.apache.lucene.util._TestUtil;
 import org.elasticsearch.index.codec.postingsformat.Elasticsearch090PostingsFormat;
 import org.elasticsearch.test.ElasticsearchThreadFilter;
 import org.elasticsearch.test.junit.listeners.ReproduceInfoPrinter;
@@ -42,7 +42,7 @@ public class ElasticsearchPostingsFormatTest extends BasePostingsFormatTestCase
 
     @Override
     protected Codec getCodec() {
-        return _TestUtil.alwaysPostingsFormat(new Elasticsearch090PostingsFormat());
+        return TestUtil.alwaysPostingsFormat(new Elasticsearch090PostingsFormat());
     }
     
 }

+ 5 - 10
src/test/java/org/elasticsearch/index/engine/internal/InternalEngineTests.java

@@ -31,7 +31,6 @@ import org.apache.lucene.document.TextField;
 import org.apache.lucene.index.IndexDeletionPolicy;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.search.TermQuery;
-import org.apache.lucene.util.Version;
 import org.elasticsearch.ExceptionsHelper;
 import org.elasticsearch.common.bytes.BytesArray;
 import org.elasticsearch.common.bytes.BytesReference;
@@ -322,9 +321,6 @@ public class InternalEngineTests extends ElasticsearchTestCase {
         assertThat(segments.get(2).isCompound(), equalTo(true));
     }
 
-    static {
-        assert Version.LUCENE_47.onOrAfter(Lucene.VERSION) : "LUCENE-5481 is fixed, improve test below";
-    }
 
     @Test
     public void testSegmentsWithMergeFlag() throws Exception {
@@ -396,17 +392,16 @@ public class InternalEngineTests extends ElasticsearchTestCase {
         }
 
         // forcing an optimize will merge this single segment shard
-        // TODO: put a random boolean again once LUCENE-5481 is fixed
-        final boolean force = true; // randomBoolean();
-        waitTillMerge.set(new CountDownLatch(1));
-        waitForMerge.set(new CountDownLatch(1));
+        final boolean force = randomBoolean();
+        if (force) {
+            waitTillMerge.set(new CountDownLatch(1));
+            waitForMerge.set(new CountDownLatch(1));
+        }
         engine.optimize(new Engine.Optimize().flush(true).maxNumSegments(1).force(force).waitForMerge(false));
         waitTillMerge.get().await();
-
         for (Segment segment : engine.segments()) {
             assertThat(segment.getMergeId(), force ? notNullValue() : nullValue());
         }
-
         waitForMerge.get().countDown();
 
         engine.close();

+ 5 - 5
src/test/java/org/elasticsearch/index/fielddata/AbstractStringFieldDataTests.java

@@ -35,8 +35,8 @@ import org.apache.lucene.search.join.ScoreMode;
 import org.apache.lucene.search.join.ToParentBlockJoinQuery;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.OpenBitSet;
+import org.apache.lucene.util.TestUtil;
 import org.apache.lucene.util.UnicodeUtil;
-import org.apache.lucene.util._TestUtil;
 import org.elasticsearch.common.lucene.search.NotFilter;
 import org.elasticsearch.common.lucene.search.XFilteredQuery;
 import org.elasticsearch.common.settings.ImmutableSettings;
@@ -225,7 +225,7 @@ public abstract class AbstractStringFieldDataTests extends AbstractFieldDataImpl
         d.add(s);
         final String[] values = new String[randomIntBetween(2, 30)];
         for (int i = 1; i < values.length; ++i) {
-            values[i] = _TestUtil.randomUnicodeString(getRandom());
+            values[i] = TestUtil.randomUnicodeString(getRandom());
         }
         final int numDocs = scaledRandomIntBetween(10, 10000);
         for (int i = 0; i < numDocs; ++i) {
@@ -283,7 +283,7 @@ public abstract class AbstractStringFieldDataTests extends AbstractFieldDataImpl
         d.add(s);
         final String[] values = new String[randomIntBetween(2, 10)];
         for (int i = 1; i < values.length; ++i) {
-            values[i] = _TestUtil.randomUnicodeString(getRandom());
+            values[i] = TestUtil.randomUnicodeString(getRandom());
         }
         final int numDocs = scaledRandomIntBetween(10, 10000);
         for (int i = 0; i < numDocs; ++i) {
@@ -335,7 +335,7 @@ public abstract class AbstractStringFieldDataTests extends AbstractFieldDataImpl
     public void testNestedSorting(MultiValueMode sortMode) throws IOException {
         final String[] values = new String[randomIntBetween(2, 20)];
         for (int i = 0; i < values.length; ++i) {
-            values[i] = _TestUtil.randomSimpleString(getRandom());
+            values[i] = TestUtil.randomSimpleString(getRandom());
         }
         final int numParents = scaledRandomIntBetween(10, 10000);
         List<Document> docs = new ArrayList<>();
@@ -379,7 +379,7 @@ public abstract class AbstractStringFieldDataTests extends AbstractFieldDataImpl
             missingValue = new BytesRef(RandomPicks.randomFrom(getRandom(), values));
             break;
         default:
-            missingValue = new BytesRef(_TestUtil.randomSimpleString(getRandom()));
+            missingValue = new BytesRef(TestUtil.randomSimpleString(getRandom()));
             break;
         }
         BytesRefFieldComparatorSource innerSource = new BytesRefFieldComparatorSource(fieldData, missingValue, sortMode);

+ 3 - 4
src/test/java/org/elasticsearch/index/fielddata/FilterFieldDataTest.java

@@ -26,7 +26,6 @@ import org.elasticsearch.common.settings.ImmutableSettings;
 import org.elasticsearch.index.fielddata.AtomicFieldData.WithOrdinals;
 import org.elasticsearch.index.fielddata.ScriptDocValues.Strings;
 import org.elasticsearch.index.fielddata.ordinals.Ordinals.Docs;
-import org.elasticsearch.index.merge.Merges;
 import org.junit.Test;
 
 import java.util.Random;
@@ -61,7 +60,7 @@ public class FilterFieldDataTest extends AbstractFieldDataTests {
             }
             writer.addDocument(d);
         }
-        Merges.forceMerge(writer, 1);
+        writer.forceMerge(1, true);
         AtomicReaderContext context = refreshReader();
         String[] formats = new String[] { "fst", "paged_bytes"};
         
@@ -158,8 +157,8 @@ public class FilterFieldDataTest extends AbstractFieldDataTests {
             }
             writer.addDocument(d);
         }
-        System.out.println(hundred + " " + ten + " " + five);
-        Merges.forceMerge(writer, 1);
+        logger.debug(hundred + " " + ten + " " + five);
+        writer.forceMerge(1, true);
         AtomicReaderContext context = refreshReader();
         String[] formats = new String[] { "fst", "paged_bytes"};
         for (String format : formats) {

+ 1 - 2
src/test/java/org/elasticsearch/index/fielddata/LongFieldDataTests.java

@@ -27,7 +27,6 @@ import org.apache.lucene.document.LongField;
 import org.apache.lucene.document.StringField;
 import org.apache.lucene.index.Term;
 import org.elasticsearch.index.fielddata.plain.PackedArrayAtomicFieldData;
-import org.elasticsearch.index.merge.Merges;
 import org.joda.time.DateTimeZone;
 import org.junit.Test;
 
@@ -334,7 +333,7 @@ public class LongFieldDataTests extends AbstractNumericFieldDataTests {
             }
             writer.addDocument(doc);
         }
-        Merges.forceMerge(writer, 1);
+        writer.forceMerge(1, true);
 
         final IndexNumericFieldData indexFieldData = getForField("value");
         final AtomicNumericFieldData atomicFieldData = indexFieldData.load(refreshReader());

+ 17 - 16
src/test/java/org/elasticsearch/indices/analyze/HunspellServiceTests.java

@@ -18,9 +18,7 @@
  */
 package org.elasticsearch.indices.analyze;
 
-import org.apache.lucene.analysis.hunspell.HunspellDictionary;
-import org.apache.lucene.util.Version;
-import org.elasticsearch.common.lucene.Lucene;
+import org.apache.lucene.analysis.hunspell.Dictionary;
 import org.elasticsearch.common.settings.ImmutableSettings;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.indices.analysis.HunspellService;
@@ -28,7 +26,8 @@ import org.elasticsearch.test.ElasticsearchIntegrationTest;
 import org.elasticsearch.test.ElasticsearchIntegrationTest.ClusterScope;
 import org.junit.Test;
 
-import static org.hamcrest.Matchers.equalTo;
+import java.lang.reflect.Field;
+
 import static org.hamcrest.Matchers.notNullValue;
 
 /**
@@ -46,11 +45,9 @@ public class HunspellServiceTests extends ElasticsearchIntegrationTest {
                 .build();
 
         cluster().startNode(settings);
-        HunspellDictionary dictionary = cluster().getInstance(HunspellService.class).getDictionary("en_US");
+        Dictionary dictionary = cluster().getInstance(HunspellService.class).getDictionary("en_US");
         assertThat(dictionary, notNullValue());
-        Version expectedVersion = Lucene.parseVersion(settings.get("indices.analysis.hunspell.version"), Lucene.ANALYZER_VERSION, logger);
-        assertThat(dictionary.getVersion(), equalTo(expectedVersion));
-        assertThat(dictionary.isIgnoreCase(), equalTo(true));
+        assertIgnoreCase(true, dictionary);
     }
 
     @Test
@@ -64,18 +61,16 @@ public class HunspellServiceTests extends ElasticsearchIntegrationTest {
                 .build();
 
         cluster().startNode(settings);
-        HunspellDictionary dictionary = cluster().getInstance(HunspellService.class).getDictionary("en_US");
+        Dictionary dictionary = cluster().getInstance(HunspellService.class).getDictionary("en_US");
         assertThat(dictionary, notNullValue());
-        Version expectedVersion = Lucene.parseVersion(settings.get("indices.analysis.hunspell.version"), Lucene.ANALYZER_VERSION, logger);
-        assertThat(dictionary.getVersion(), equalTo(expectedVersion));
-        assertThat(dictionary.isIgnoreCase(), equalTo(false));
+        assertIgnoreCase(false, dictionary);
+
 
 
         // testing that dictionary specific settings override node level settings
         dictionary = cluster().getInstance(HunspellService.class).getDictionary("en_US_custom");
         assertThat(dictionary, notNullValue());
-        assertThat(dictionary.getVersion(), equalTo(expectedVersion));
-        assertThat(dictionary.isIgnoreCase(), equalTo(true));
+        assertIgnoreCase(true, dictionary);
     }
 
     @Test
@@ -85,8 +80,14 @@ public class HunspellServiceTests extends ElasticsearchIntegrationTest {
                 .build();
 
         cluster().startNode(settings);
-        HunspellDictionary dictionary = cluster().getInstance(HunspellService.class).getDictionary("en_US");
+        Dictionary dictionary = cluster().getInstance(HunspellService.class).getDictionary("en_US");
         assertThat(dictionary, notNullValue());
     }
-
+    
+    // TODO: open up a getter on Dictionary
+    private void assertIgnoreCase(boolean expected, Dictionary dictionary) throws Exception {
+        Field f = Dictionary.class.getDeclaredField("ignoreCase");
+        f.setAccessible(true);
+        assertEquals(expected, f.getBoolean(dictionary));
+    }
 }

+ 1 - 1
src/test/java/org/elasticsearch/indices/leaks/IndicesLeaksTests.java

@@ -44,7 +44,7 @@ public class IndicesLeaksTests extends ElasticsearchIntegrationTest {
 
     @SuppressWarnings({"ConstantConditions", "unchecked"})
     @Test
-    @BadApple
+    @BadApple(bugUrl = "https://github.com/elasticsearch/elasticsearch/issues/3232")
     public void testIndexShardLifecycleLeak() throws Exception {
 
         client().admin().indices().prepareCreate("test")

+ 29 - 2
src/test/java/org/elasticsearch/search/aggregations/support/FieldDataSourceTests.java

@@ -27,20 +27,29 @@ import org.elasticsearch.script.SearchScript;
 import org.elasticsearch.test.ElasticsearchTestCase;
 import org.junit.Test;
 
+import java.util.ArrayList;
 import java.util.Map;
 
+import static org.hamcrest.Matchers.lessThan;
 public class FieldDataSourceTests extends ElasticsearchTestCase {
 
     private static BytesValues randomBytesValues() {
         final boolean multiValued = randomBoolean();
+        final int maxLength = rarely() ? 3 : 10;
         return new BytesValues(multiValued) {
+            BytesRef previous;
             @Override
             public int setDocument(int docId) {
                 return randomInt(multiValued ? 10 : 1);
             }
             @Override
             public BytesRef nextValue() {
-                scratch.copyChars(randomAsciiOfLength(10));
+                if (previous != null && randomBoolean()) {
+                    scratch.copyBytes(previous);
+                } else {
+                    scratch.copyChars(randomAsciiOfLength(maxLength));
+                }
+                previous = BytesRef.deepCopyOf(scratch);
                 return scratch;
             }
 
@@ -103,7 +112,8 @@ public class FieldDataSourceTests extends ElasticsearchTestCase {
     }
 
     private static void assertConsistent(BytesValues values) {
-        for (int i = 0; i < 10; ++i) {
+        final int numDocs = scaledRandomIntBetween(10, 100);
+        for (int i = 0; i < numDocs; ++i) {
             final int valueCount = values.setDocument(i);
             for (int j = 0; j < valueCount; ++j) {
                 final BytesRef term = values.nextValue();
@@ -136,6 +146,23 @@ public class FieldDataSourceTests extends ElasticsearchTestCase {
     @Test
     public void sortedUniqueBytesValues() {
         assertConsistent(new ValuesSource.Bytes.SortedAndUnique.SortedUniqueBytesValues(randomBytesValues()));
+        assertSortedAndUnique(new ValuesSource.Bytes.SortedAndUnique.SortedUniqueBytesValues(randomBytesValues()));
+    }
+
+    private static void assertSortedAndUnique(BytesValues values) {
+        final int numDocs = scaledRandomIntBetween(10, 100);
+        ArrayList<BytesRef> ref = new ArrayList<BytesRef>();
+        for (int i = 0; i < numDocs; ++i) {
+            final int valueCount = values.setDocument(i);
+            ref.clear();
+            for (int j = 0; j < valueCount; ++j) {
+                final BytesRef term = values.nextValue();
+                if (j > 0) {
+                    assertThat(BytesRef.getUTF8SortedAsUnicodeComparator().compare(ref.get(ref.size() - 1), term), lessThan(0));
+                }
+                ref.add(values.copyShared());
+            }
+        }
     }
 
 }

+ 0 - 5
src/test/java/org/elasticsearch/search/child/SimpleChildQuerySearchTests.java

@@ -20,7 +20,6 @@ package org.elasticsearch.search.child;
 
 import org.elasticsearch.ElasticsearchException;
 import org.elasticsearch.ElasticsearchIllegalArgumentException;
-import org.elasticsearch.Version;
 import org.elasticsearch.action.admin.indices.mapping.get.GetMappingsResponse;
 import org.elasticsearch.action.admin.indices.mapping.put.PutMappingResponse;
 import org.elasticsearch.action.admin.indices.stats.IndicesStatsResponse;
@@ -565,10 +564,6 @@ public class SimpleChildQuerySearchTests extends ElasticsearchIntegrationTest {
         assertThat(searchResponse.getHits().getAt(0).sourceAsString(), containsString("\"p_value1_updated\""));
     }
 
-    static {
-        assert Version.CURRENT.luceneVersion == org.apache.lucene.util.Version.LUCENE_47 : "See comments in testDfsSearchType";
-    }
-
     @Test
     public void testDfsSearchType() throws Exception {
         assertAcked(prepareCreate("test")

+ 1 - 1
src/test/java/org/elasticsearch/search/highlight/HighlighterSearchTests.java

@@ -167,7 +167,7 @@ public class HighlighterSearchTests extends ElasticsearchIntegrationTest {
         SearchResponse search = client().prepareSearch().setQuery(matchQuery("body", "Test: http://www.facebook.com ").type(Type.PHRASE)).addHighlightedField("body").execute().actionGet();
         assertHighlight(search, 0, "body", 0, startsWith("<em>Test: http://www.facebook.com</em>"));
         search = client().prepareSearch().setQuery(matchQuery("body", "Test: http://www.facebook.com http://elasticsearch.org http://xing.com http://cnn.com http://quora.com http://twitter.com this is a test for highlighting feature Test: http://www.facebook.com http://elasticsearch.org http://xing.com http://cnn.com http://quora.com http://twitter.com this is a test for highlighting feature").type(Type.PHRASE)).addHighlightedField("body").execute().actionGet();
-        assertHighlight(search, 0, "body", 0, equalTo("<em>Test</em>: <em>http</em>://<em>www</em>.<em>facebook</em>.com <em>http</em>://<em>elasticsearch</em>.<em>org</em> <em>http</em>://<em>xing</em>.com <em>http</em>://<em>cnn</em>.com <em>http</em>://<em>quora</em>.com"));
+        assertHighlight(search, 0, "body", 0, equalTo("<em>Test</em>: <em>http://www.facebook.com</em> <em>http://elasticsearch.org</em> <em>http://xing.com</em> <em>http://cnn.com</em> http://quora.com"));
     }
     
     @Test

+ 2 - 2
src/test/java/org/elasticsearch/search/sort/SimpleSortTests.java

@@ -21,8 +21,8 @@ package org.elasticsearch.search.sort;
 
 
 import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.TestUtil;
 import org.apache.lucene.util.UnicodeUtil;
-import org.apache.lucene.util._TestUtil;
 import org.elasticsearch.ElasticsearchException;
 import org.elasticsearch.action.index.IndexRequestBuilder;
 import org.elasticsearch.action.search.SearchPhaseExecutionException;
@@ -128,7 +128,7 @@ public class SimpleSortTests extends ElasticsearchIntegrationTest {
             String docId = Integer.toString(i);
             BytesRef ref = null;
             do {
-                ref = new BytesRef(_TestUtil.randomRealisticUnicodeString(random));
+                ref = new BytesRef(TestUtil.randomRealisticUnicodeString(random));
             } while (denseBytes.containsKey(ref));
             denseBytes.put(ref, docId);
             XContentBuilder src = jsonBuilder().startObject().field("dense_bytes", ref.utf8ToString());

+ 22 - 2
src/test/java/org/elasticsearch/search/suggest/completion/CompletionPostingsFormatTest.java

@@ -42,7 +42,6 @@ import org.elasticsearch.index.codec.postingsformat.PreBuiltPostingsFormatProvid
 import org.elasticsearch.index.mapper.FieldMapper.Names;
 import org.elasticsearch.index.mapper.core.AbstractFieldMapper;
 import org.elasticsearch.index.mapper.core.CompletionFieldMapper;
-import org.elasticsearch.index.merge.Merges;
 import org.elasticsearch.search.suggest.SuggestUtils;
 import org.elasticsearch.search.suggest.completion.Completion090PostingsFormat.LookupFactory;
 import org.elasticsearch.search.suggest.context.ContextMapping;
@@ -54,6 +53,7 @@ import java.lang.reflect.Field;
 import java.util.Comparator;
 import java.util.HashMap;
 import java.util.List;
+import java.util.Set;
 
 import static org.hamcrest.Matchers.equalTo;
 import static org.hamcrest.Matchers.is;
@@ -180,6 +180,16 @@ public class CompletionPostingsFormatTest extends ElasticsearchTestCase {
                 return false;
             }
 
+            @Override
+            public Set<BytesRef> contexts() {
+                return null;
+            }
+
+            @Override
+            public boolean hasContexts() {
+                return false;
+            }
+
         };
         InputIterator iter;
         if (usePayloads) {
@@ -208,6 +218,16 @@ public class CompletionPostingsFormatTest extends ElasticsearchTestCase {
                 public boolean hasPayloads() {
                     return true;
                 }
+                
+                @Override
+                public Set<BytesRef> contexts() {
+                    return null;
+                }
+
+                @Override
+                public boolean hasContexts() {
+                    return false;
+                }
             };
         } else {
             iter = primaryIter;
@@ -275,7 +295,7 @@ public class CompletionPostingsFormatTest extends ElasticsearchTestCase {
             writer.addDocument(doc);
         }
         writer.commit();
-        Merges.forceMerge(writer, 1);
+        writer.forceMerge(1, true);
         writer.commit();
         DirectoryReader reader = DirectoryReader.open(writer, true);
         assertThat(reader.leaves().size(), equalTo(1));

+ 1 - 1
src/test/java/org/elasticsearch/test/TestCluster.java

@@ -765,7 +765,7 @@ public final class TestCluster extends ImmutableTestCluster {
         if (!dataDirToClean.isEmpty()) {
             boolean deleted = false;
             try {
-                deleted = FileSystemUtils.deleteRecursively(dataDirToClean.toArray(new File[dataDirToClean.size()]), false);
+                deleted = FileSystemUtils.deleteSubDirectories(dataDirToClean.toArray(new File[dataDirToClean.size()]));
             } finally {
                 logger.info("Wipe data directory for all nodes locations: {} success: {}", this.dataDirToClean, deleted);
                 this.dataDirToClean.clear();