Browse Source

Disable bloom filters.

make the "es090" postings format read-only, just to support old segments. There is a test version that subclasses it with write-capability for testing.

Closes #8571
Robert Muir 11 years ago
parent
commit
9ef69f9f36
17 changed files with 99 additions and 360 deletions
  1. 0 36
      docs/reference/indices/update-settings.asciidoc
  2. 0 14
      src/main/java/org/elasticsearch/index/codec/CodecService.java
  3. 7 29
      src/main/java/org/elasticsearch/index/codec/postingsformat/BloomFilterPostingsFormat.java
  4. 2 0
      src/main/java/org/elasticsearch/index/codec/postingsformat/BloomFilterPostingsFormatProvider.java
  5. 8 32
      src/main/java/org/elasticsearch/index/codec/postingsformat/Elasticsearch090PostingsFormat.java
  6. 2 7
      src/main/java/org/elasticsearch/index/codec/postingsformat/PostingFormats.java
  7. 1 9
      src/main/java/org/elasticsearch/index/engine/internal/InternalEngine.java
  8. 0 1
      src/main/java/org/elasticsearch/index/settings/IndexDynamicSettingsModule.java
  9. 0 6
      src/main/java/org/elasticsearch/index/store/Store.java
  10. 0 1
      src/main/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat
  11. 6 12
      src/test/java/org/elasticsearch/index/codec/CodecTests.java
  12. 0 122
      src/test/java/org/elasticsearch/index/codec/postingformat/DefaultPostingsFormatTests.java
  13. 69 0
      src/test/java/org/elasticsearch/index/codec/postingformat/Elasticsearch090RWPostingsFormat.java
  14. 1 3
      src/test/java/org/elasticsearch/index/codec/postingformat/ElasticsearchPostingsFormatTest.java
  15. 0 83
      src/test/java/org/elasticsearch/index/engine/internal/InternalEngineIntegrationTest.java
  16. 3 2
      src/test/java/org/elasticsearch/search/suggest/completion/CompletionPostingsFormatTest.java
  17. 0 3
      src/test/java/org/elasticsearch/test/ElasticsearchIntegrationTest.java

+ 0 - 36
docs/reference/indices/update-settings.asciidoc

@@ -57,10 +57,6 @@ settings API:
 `index.index_concurrency`::
     Defaults to `8`.
 
-`index.codec.bloom.load`::
-    Whether to load the bloom filter. Defaults to `false`.
-    See <<codec-bloom-load>>.
-
 `index.fail_on_merge_failure`::
     Default to `true`.
 
@@ -227,35 +223,3 @@ curl -XPUT 'localhost:9200/myindex/_settings' -d '{
 
 curl -XPOST 'localhost:9200/myindex/_open'
 --------------------------------------------------
-
-[float]
-[[codec-bloom-load]]
-=== Bloom filters
-
-Up to version 1.3, Elasticsearch used to generate bloom filters for the `_uid`
-field at indexing time and to load them at search time in order to speed-up
-primary-key lookups by savings disk seeks.
-
-As of 1.4, bloom filters are still generated at indexing time, but they are
-no longer loaded at search time by default: they consume RAM in proportion to
-the number of unique terms, which can quickly add up for certain use cases,
-and separate performance improvements have made the performance gains with
-bloom filters very small.
-
-[TIP]
-==================================================
-
-You can enable loading of the bloom filter at search time on a
-per-index basis by updating the index settings:
-
-[source,js]
---------------------------------------------------
-PUT /old_index/_settings?index.codec.bloom.load=true
---------------------------------------------------
-
-This setting, which defaults to `false`, can be updated on a live index. Note,
-however, that changing the value will cause the index to be reopened, which
-will invalidate any existing caches.
-
-==================================================
-

+ 0 - 14
src/main/java/org/elasticsearch/index/codec/CodecService.java

@@ -44,16 +44,11 @@ import org.elasticsearch.index.settings.IndexSettings;
  */
 public class CodecService extends AbstractIndexComponent {
 
-    public static final String INDEX_CODEC_BLOOM_LOAD = "index.codec.bloom.load";
-    public static final boolean INDEX_CODEC_BLOOM_LOAD_DEFAULT = false;
-
     private final PostingsFormatService postingsFormatService;
     private final DocValuesFormatService docValuesFormatService;
     private final MapperService mapperService;
     private final ImmutableMap<String, Codec> codecs;
 
-    private volatile boolean loadBloomFilter = true;
-
     public final static String DEFAULT_CODEC = "default";
 
     public CodecService(Index index) {
@@ -83,7 +78,6 @@ public class CodecService extends AbstractIndexComponent {
             codecs.put(codec, Codec.forName(codec));
         }
         this.codecs = codecs.immutableMap();
-        this.loadBloomFilter = indexSettings.getAsBoolean(INDEX_CODEC_BLOOM_LOAD, INDEX_CODEC_BLOOM_LOAD_DEFAULT);
     }
 
     public PostingsFormatService postingsFormatService() {
@@ -105,12 +99,4 @@ public class CodecService extends AbstractIndexComponent {
         }
         return codec;
     }
-
-    public boolean isLoadBloomFilter() {
-        return this.loadBloomFilter;
-    }
-
-    public void setLoadBloomFilter(boolean loadBloomFilter) {
-        this.loadBloomFilter = loadBloomFilter;
-    }
 }

+ 7 - 29
src/main/java/org/elasticsearch/index/codec/postingsformat/BloomFilterPostingsFormat.java

@@ -24,8 +24,6 @@ import org.apache.lucene.index.*;
 import org.apache.lucene.store.*;
 import org.apache.lucene.util.*;
 import org.elasticsearch.common.util.BloomFilter;
-import org.elasticsearch.index.store.DirectoryUtils;
-import org.elasticsearch.index.store.Store;
 
 import java.io.IOException;
 import java.util.*;
@@ -42,7 +40,9 @@ import java.util.Map.Entry;
  * This is a special bloom filter version, based on {@link org.elasticsearch.common.util.BloomFilter} and inspired
  * by Lucene {@link org.apache.lucene.codecs.bloom.BloomFilteringPostingsFormat}.
  * </p>
+ * @deprecated only for reading old segments
  */
+@Deprecated
 public final class BloomFilterPostingsFormat extends PostingsFormat {
 
     public static final String BLOOM_CODEC_NAME = "XBloomFilter"; // the Lucene one is named BloomFilter
@@ -160,30 +160,7 @@ public final class BloomFilterPostingsFormat extends PostingsFormat {
                 // // Load the hash function used in the BloomFilter
                 // hashFunction = HashFunction.forName(bloomIn.readString());
                 // Load the delegate postings format
-               final String delegatePostings = bloomIn
-                        .readString();
-                int numBlooms = bloomIn.readInt();
-
-                boolean load = false;
-                Store.StoreDirectory storeDir = DirectoryUtils.getStoreDirectory(state.directory);
-                if (storeDir != null && storeDir.codecService() != null) {
-                    load = storeDir.codecService().isLoadBloomFilter();
-                }
-
-                if (load) {
-                    for (int i = 0; i < numBlooms; i++) {
-                        int fieldNum = bloomIn.readInt();
-                        FieldInfo fieldInfo = state.fieldInfos.fieldInfo(fieldNum);
-                        LazyBloomLoader loader = new LazyBloomLoader(bloomIn.getFilePointer(), dataInput);
-                        bloomsByFieldName.put(fieldInfo.name, loader);
-                        BloomFilter.skipBloom(bloomIn);
-                    }
-                    if (version >= BLOOM_CODEC_VERSION_CHECKSUM) {
-                        CodecUtil.checkFooter(bloomIn);
-                    } else {
-                        CodecUtil.checkEOF(bloomIn);
-                    }
-                }
+               final String delegatePostings = bloomIn.readString();
                 this.delegateFieldsProducer = PostingsFormat.forName(delegatePostings)
                         .fieldsProducer(state);
                 this.data = dataInput;
@@ -383,8 +360,9 @@ public final class BloomFilterPostingsFormat extends PostingsFormat {
 
     }
 
-
-    final class BloomFilteredFieldsConsumer extends FieldsConsumer {
+    // TODO: would be great to move this out to test code, but the interaction between es090 and bloom is complex
+    // at least it is not accessible via SPI
+    public final class BloomFilteredFieldsConsumer extends FieldsConsumer {
         private FieldsConsumer delegateFieldsConsumer;
         private Map<FieldInfo, BloomFilter> bloomFilters = new HashMap<>();
         private SegmentWriteState state;
@@ -399,7 +377,7 @@ public final class BloomFilterPostingsFormat extends PostingsFormat {
         }
 
         // for internal use only
-        FieldsConsumer getDelegate() {
+        public FieldsConsumer getDelegate() {
             return delegateFieldsConsumer;
         }
 

+ 2 - 0
src/main/java/org/elasticsearch/index/codec/postingsformat/BloomFilterPostingsFormatProvider.java

@@ -30,7 +30,9 @@ import org.elasticsearch.index.settings.IndexSettings;
 import java.util.Map;
 
 /**
+ * @deprecated only for reading old segments
  */
+@Deprecated
 public class BloomFilterPostingsFormatProvider extends AbstractPostingsFormatProvider {
 
     private final PostingsFormatProvider delegate;

+ 8 - 32
src/main/java/org/elasticsearch/index/codec/postingsformat/Elasticsearch090PostingsFormat.java

@@ -38,14 +38,17 @@ import java.io.IOException;
 import java.util.Iterator;
 
 /**
- * This is the default postings format for Elasticsearch that special cases
+ * This is the old default postings format for Elasticsearch that special cases
  * the <tt>_uid</tt> field to use a bloom filter while all other fields
  * will use a {@link Lucene50PostingsFormat}. This format will reuse the underlying
  * {@link Lucene50PostingsFormat} and its files also for the <tt>_uid</tt> saving up to
  * 5 files per segment in the default case.
+ * <p>
+ * @deprecated only for reading old segments
  */
-public final class Elasticsearch090PostingsFormat extends PostingsFormat {
-    private final BloomFilterPostingsFormat bloomPostings;
+@Deprecated
+public class Elasticsearch090PostingsFormat extends PostingsFormat {
+    protected final BloomFilterPostingsFormat bloomPostings;
 
     public Elasticsearch090PostingsFormat() {
         super("es090");
@@ -57,7 +60,7 @@ public final class Elasticsearch090PostingsFormat extends PostingsFormat {
     public PostingsFormat getDefaultWrapped() {
         return bloomPostings.getDelegate();
     }
-    private static final Predicate<String> UID_FIELD_FILTER = new Predicate<String>() {
+    protected static final Predicate<String> UID_FIELD_FILTER = new Predicate<String>() {
 
         @Override
         public boolean apply(String s) {
@@ -67,34 +70,7 @@ public final class Elasticsearch090PostingsFormat extends PostingsFormat {
 
     @Override
     public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
-        final BloomFilteredFieldsConsumer fieldsConsumer = bloomPostings.fieldsConsumer(state);
-        return new FieldsConsumer() {
-
-            @Override
-            public void write(Fields fields) throws IOException {
-
-                Fields maskedFields = new FilterLeafReader.FilterFields(fields) {
-                    @Override
-                    public Iterator<String> iterator() {
-                        return Iterators.filter(this.in.iterator(), Predicates.not(UID_FIELD_FILTER));
-                    }
-                };
-                fieldsConsumer.getDelegate().write(maskedFields);
-                maskedFields = new FilterLeafReader.FilterFields(fields) {
-                    @Override
-                    public Iterator<String> iterator() {
-                        return Iterators.singletonIterator(UidFieldMapper.NAME);
-                    }
-                };
-                // only go through bloom for the UID field
-                fieldsConsumer.write(maskedFields);
-            }
-
-            @Override
-            public void close() throws IOException {
-                fieldsConsumer.close();
-            }
-        };
+        throw new UnsupportedOperationException("this codec can only be used for reading");
     }
 
     @Override

+ 2 - 7
src/main/java/org/elasticsearch/index/codec/postingsformat/PostingFormats.java

@@ -30,10 +30,7 @@ import org.elasticsearch.common.util.BloomFilter;
  * This class represents the set of Elasticsearch "built-in"
  * {@link PostingsFormatProvider.Factory postings format factories}
  * <ul>
- * <li><b>bloom_default</b>: a postings format that uses a bloom filter to
- * improve term lookup performance. This is useful for primarily keys or fields
- * that are used as a delete key</li>
- * <li><b>default</b>: the default Elasticsearch postings format offering best
+ * <li><b>default</b>: the default Lucene postings format offering best
  * general purpose performance. This format is used if no postings format is
  * specified in the field mapping.</li>
  * <li><b>***</b>: other formats from Lucene core (e.g. Lucene41 as of Lucene 4.10)
@@ -51,12 +48,10 @@ public class PostingFormats {
         for (String luceneName : PostingsFormat.availablePostingsFormats()) {
             builtInPostingFormatsX.put(luceneName, new PreBuiltPostingsFormatProvider.Factory(PostingsFormat.forName(luceneName)));
         }
-        final PostingsFormat defaultFormat = new Elasticsearch090PostingsFormat();
+        final PostingsFormat defaultFormat = PostingsFormat.forName(Lucene.LATEST_POSTINGS_FORMAT);
         builtInPostingFormatsX.put(PostingsFormatService.DEFAULT_FORMAT,
                                    new PreBuiltPostingsFormatProvider.Factory(PostingsFormatService.DEFAULT_FORMAT, defaultFormat));
 
-        builtInPostingFormatsX.put("bloom_default", new PreBuiltPostingsFormatProvider.Factory("bloom_default", wrapInBloom(PostingsFormat.forName(Lucene.LATEST_POSTINGS_FORMAT))));
-
         builtInPostingFormats = builtInPostingFormatsX.immutableMap();
     }
 

+ 1 - 9
src/main/java/org/elasticsearch/index/engine/internal/InternalEngine.java

@@ -1522,12 +1522,10 @@ public class InternalEngine extends AbstractIndexShardComponent implements Engin
             int indexConcurrency = settings.getAsInt(INDEX_INDEX_CONCURRENCY, InternalEngine.this.indexConcurrency);
             boolean failOnMergeFailure = settings.getAsBoolean(INDEX_FAIL_ON_MERGE_FAILURE, InternalEngine.this.failOnMergeFailure);
             String codecName = settings.get(INDEX_CODEC, InternalEngine.this.codecName);
-            final boolean codecBloomLoad = settings.getAsBoolean(CodecService.INDEX_CODEC_BLOOM_LOAD, codecService.isLoadBloomFilter());
             boolean requiresFlushing = false;
             if (indexConcurrency != InternalEngine.this.indexConcurrency ||
                     !codecName.equals(InternalEngine.this.codecName) ||
-                    failOnMergeFailure != InternalEngine.this.failOnMergeFailure ||
-                    codecBloomLoad != codecService.isLoadBloomFilter()) {
+                    failOnMergeFailure != InternalEngine.this.failOnMergeFailure) {
                 try (InternalLock _ = readLock.acquire()) {
                     if (indexConcurrency != InternalEngine.this.indexConcurrency) {
                         logger.info("updating index.index_concurrency from [{}] to [{}]", InternalEngine.this.indexConcurrency, indexConcurrency);
@@ -1545,12 +1543,6 @@ public class InternalEngine extends AbstractIndexShardComponent implements Engin
                         logger.info("updating {} from [{}] to [{}]", InternalEngine.INDEX_FAIL_ON_MERGE_FAILURE, InternalEngine.this.failOnMergeFailure, failOnMergeFailure);
                         InternalEngine.this.failOnMergeFailure = failOnMergeFailure;
                     }
-                    if (codecBloomLoad != codecService.isLoadBloomFilter()) {
-                        logger.info("updating {} from [{}] to [{}]", CodecService.INDEX_CODEC_BLOOM_LOAD, codecService.isLoadBloomFilter(), codecBloomLoad);
-                        codecService.setLoadBloomFilter(codecBloomLoad);
-                        // we need to flush in this case, to load/unload the bloom filters
-                        requiresFlushing = true;
-                    }
                 }
                 if (requiresFlushing) {
                     flush(new Flush().type(Flush.Type.NEW_WRITER));

+ 0 - 1
src/main/java/org/elasticsearch/index/settings/IndexDynamicSettingsModule.java

@@ -85,7 +85,6 @@ public class IndexDynamicSettingsModule extends AbstractModule {
         indexDynamicSettings.addDynamicSetting(LogDocMergePolicyProvider.INDEX_COMPOUND_FORMAT);
         indexDynamicSettings.addDynamicSetting(InternalEngine.INDEX_INDEX_CONCURRENCY, Validator.NON_NEGATIVE_INTEGER);
         indexDynamicSettings.addDynamicSetting(InternalEngine.INDEX_COMPOUND_ON_FLUSH, Validator.BOOLEAN);
-        indexDynamicSettings.addDynamicSetting(CodecService.INDEX_CODEC_BLOOM_LOAD, Validator.BOOLEAN);
         indexDynamicSettings.addDynamicSetting(InternalEngine.INDEX_GC_DELETES, Validator.TIME);
         indexDynamicSettings.addDynamicSetting(InternalEngine.INDEX_CODEC);
         indexDynamicSettings.addDynamicSetting(InternalEngine.INDEX_FAIL_ON_MERGE_FAILURE);

+ 0 - 6
src/main/java/org/elasticsearch/index/store/Store.java

@@ -550,12 +550,6 @@ public class Store extends AbstractIndexShardComponent implements CloseableIndex
             return Store.this.shardId();
         }
 
-        @Nullable
-        public CodecService codecService() {
-            ensureOpen();
-            return Store.this.codecService;
-        }
-
         @Override
         public void close() throws IOException {
             assert false : "Nobody should close this directory except of the Store itself";

+ 0 - 1
src/main/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat

@@ -1,3 +1,2 @@
-org.elasticsearch.index.codec.postingsformat.BloomFilterPostingsFormat
 org.elasticsearch.index.codec.postingsformat.Elasticsearch090PostingsFormat
 org.elasticsearch.search.suggest.completion.Completion090PostingsFormat

+ 6 - 12
src/test/java/org/elasticsearch/index/codec/CodecTests.java

@@ -20,6 +20,7 @@
 package org.elasticsearch.index.codec;
 
 import org.apache.lucene.codecs.Codec;
+import org.apache.lucene.codecs.PostingsFormat;
 import org.apache.lucene.codecs.bloom.BloomFilteringPostingsFormat;
 import org.apache.lucene.codecs.lucene40.Lucene40Codec;
 import org.apache.lucene.codecs.lucene41.Lucene41Codec;
@@ -33,6 +34,7 @@ import org.apache.lucene.codecs.lucene49.Lucene49Codec;
 import org.apache.lucene.codecs.lucene50.Lucene50Codec;
 import org.apache.lucene.codecs.lucene50.Lucene50DocValuesFormat;
 import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
+import org.elasticsearch.common.lucene.Lucene;
 import org.elasticsearch.common.settings.ImmutableSettings;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.common.xcontent.XContentFactory;
@@ -80,25 +82,16 @@ public class CodecTests extends ElasticsearchSingleNodeLuceneTestCase {
     public void testResolveDefaultPostingFormats() throws Exception {
         PostingsFormatService postingsFormatService = createCodecService().postingsFormatService();
         assertThat(postingsFormatService.get("default"), instanceOf(PreBuiltPostingsFormatProvider.class));
-        assertThat(postingsFormatService.get("default").get(), instanceOf(Elasticsearch090PostingsFormat.class));
+        PostingsFormat luceneDefault = PostingsFormat.forName(Lucene.LATEST_POSTINGS_FORMAT);
+        assertThat(postingsFormatService.get("default").get(), instanceOf(luceneDefault.getClass()));
 
         // Should fail when upgrading Lucene with codec changes
-        assertThat(((Elasticsearch090PostingsFormat)postingsFormatService.get("default").get()).getDefaultWrapped(), instanceOf(((PerFieldPostingsFormat) Codec.getDefault().postingsFormat()).getPostingsFormatForField("").getClass()));
         assertThat(postingsFormatService.get("Lucene41"), instanceOf(PreBuiltPostingsFormatProvider.class));
         // Should fail when upgrading Lucene with codec changes
         assertThat(postingsFormatService.get("Lucene50").get(), instanceOf(((PerFieldPostingsFormat) Codec.getDefault().postingsFormat()).getPostingsFormatForField(null).getClass()));
 
-        assertThat(postingsFormatService.get("bloom_default"), instanceOf(PreBuiltPostingsFormatProvider.class));
-        if (PostingFormats.luceneBloomFilter) {
-            assertThat(postingsFormatService.get("bloom_default").get(), instanceOf(BloomFilteringPostingsFormat.class));
-        } else {
-            assertThat(postingsFormatService.get("bloom_default").get(), instanceOf(BloomFilterPostingsFormat.class));
-        }
         assertThat(postingsFormatService.get("BloomFilter"), instanceOf(PreBuiltPostingsFormatProvider.class));
         assertThat(postingsFormatService.get("BloomFilter").get(), instanceOf(BloomFilteringPostingsFormat.class));
-
-        assertThat(postingsFormatService.get("XBloomFilter"), instanceOf(PreBuiltPostingsFormatProvider.class));
-        assertThat(postingsFormatService.get("XBloomFilter").get(), instanceOf(BloomFilterPostingsFormat.class));
     }
 
     @Test
@@ -128,7 +121,8 @@ public class CodecTests extends ElasticsearchSingleNodeLuceneTestCase {
         CodecService codecService = createCodecService(indexSettings);
         DocumentMapper documentMapper = codecService.mapperService().documentMapperParser().parse(mapping);
         assertThat(documentMapper.mappers().name("field1").mapper().postingsFormatProvider(), instanceOf(PreBuiltPostingsFormatProvider.class));
-        assertThat(documentMapper.mappers().name("field1").mapper().postingsFormatProvider().get(), instanceOf(Elasticsearch090PostingsFormat.class));
+        PostingsFormat luceneDefault = PostingsFormat.forName(Lucene.LATEST_POSTINGS_FORMAT);
+        assertThat(documentMapper.mappers().name("field1").mapper().postingsFormatProvider().get(), instanceOf(luceneDefault.getClass()));
 
         assertThat(documentMapper.mappers().name("field2").mapper().postingsFormatProvider(), instanceOf(DefaultPostingsFormatProvider.class));
         DefaultPostingsFormatProvider provider = (DefaultPostingsFormatProvider) documentMapper.mappers().name("field2").mapper().postingsFormatProvider();

+ 0 - 122
src/test/java/org/elasticsearch/index/codec/postingformat/DefaultPostingsFormatTests.java

@@ -1,122 +0,0 @@
-/*
- * Licensed to Elasticsearch under one or more contributor
- * license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright
- * ownership. Elasticsearch licenses this file to you under
- * the Apache License, Version 2.0 (the "License"); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.elasticsearch.index.codec.postingformat;
-
-import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
-import org.apache.lucene.codecs.Codec;
-import org.apache.lucene.codecs.PostingsFormat;
-import org.apache.lucene.codecs.lucene50.Lucene50Codec;
-import org.apache.lucene.document.Field.Store;
-import org.apache.lucene.document.TextField;
-import org.apache.lucene.index.*;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.RAMDirectory;
-import org.elasticsearch.common.lucene.Lucene;
-import org.elasticsearch.index.codec.postingsformat.BloomFilterPostingsFormat;
-import org.elasticsearch.index.codec.postingsformat.Elasticsearch090PostingsFormat;
-import org.elasticsearch.index.mapper.internal.UidFieldMapper;
-import org.elasticsearch.test.ElasticsearchTestCase;
-import org.junit.Test;
-
-import java.io.IOException;
-import java.util.Arrays;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Set;
-
-import static org.hamcrest.Matchers.*;
-
-/**
- * Simple smoke test for {@link org.elasticsearch.index.codec.postingsformat.Elasticsearch090PostingsFormat}
- */
-public class DefaultPostingsFormatTests extends ElasticsearchTestCase {
-
-    private final class TestCodec extends Lucene50Codec {
-
-        @Override
-        public PostingsFormat getPostingsFormatForField(String field) {
-            return new Elasticsearch090PostingsFormat();
-        }
-    }
-
-    @Test
-    public void testUseDefault() throws IOException {
-       
-        Codec codec = new TestCodec();
-        Directory d = new RAMDirectory();
-        IndexWriterConfig config = new IndexWriterConfig(new WhitespaceAnalyzer());
-        config.setCodec(codec);
-        IndexWriter writer = new IndexWriter(d, config);
-        writer.addDocument(Arrays.asList(new TextField("foo", "bar", Store.YES), new TextField(UidFieldMapper.NAME, "1234", Store.YES)));
-        writer.commit();
-        DirectoryReader reader = DirectoryReader.open(writer, false);
-        List<LeafReaderContext> leaves = reader.leaves();
-        assertThat(leaves.size(), equalTo(1));
-        LeafReader ar = leaves.get(0).reader();
-        Terms terms = ar.terms("foo");
-        Terms uidTerms = ar.terms(UidFieldMapper.NAME);
-
-        assertThat(terms.size(), equalTo(1l));
-        assertThat(terms, not(instanceOf(BloomFilterPostingsFormat.BloomFilteredTerms.class)));
-        assertThat(uidTerms, not(instanceOf(BloomFilterPostingsFormat.BloomFilteredTerms.class)));
-
-        reader.close();
-        writer.close();
-        d.close();
-    }
-    
-    @Test
-    public void testNoUIDField() throws IOException {
-       
-        Codec codec = new TestCodec();
-        Directory d = new RAMDirectory();
-        IndexWriterConfig config = new IndexWriterConfig(new WhitespaceAnalyzer());
-        config.setCodec(codec);
-        IndexWriter writer = new IndexWriter(d, config);
-        for (int i = 0; i < 100; i++) {
-            writer.addDocument(Arrays.asList(new TextField("foo", "foo bar foo bar", Store.YES), new TextField("some_other_field", "1234", Store.YES)));
-        }
-        writer.forceMerge(1, true);
-        writer.commit();
-        
-        DirectoryReader reader = DirectoryReader.open(writer, false);
-        List<LeafReaderContext> leaves = reader.leaves();
-        assertThat(leaves.size(), equalTo(1));
-        LeafReader ar = leaves.get(0).reader();
-        Terms terms = ar.terms("foo");
-        Terms some_other_field = ar.terms("some_other_field");
-
-        assertThat(terms.size(), equalTo(2l));
-        assertThat(terms, not(instanceOf(BloomFilterPostingsFormat.BloomFilteredTerms.class)));
-        assertThat(some_other_field, not(instanceOf(BloomFilterPostingsFormat.BloomFilteredTerms.class)));
-        TermsEnum iterator = terms.iterator(null);
-        Set<String> expected = new HashSet<>();
-        expected.add("foo");
-        expected.add("bar");
-        while(iterator.next() != null) {
-            expected.remove(iterator.term().utf8ToString());
-        }
-        assertThat(expected.size(), equalTo(0));
-        reader.close();
-        writer.close();
-        d.close();
-    }
-
-}

+ 69 - 0
src/test/java/org/elasticsearch/index/codec/postingformat/Elasticsearch090RWPostingsFormat.java

@@ -0,0 +1,69 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.index.codec.postingformat;
+
+import com.google.common.base.Predicates;
+import com.google.common.collect.Iterators;
+
+import org.apache.lucene.codecs.FieldsConsumer;
+import org.apache.lucene.index.Fields;
+import org.apache.lucene.index.FilterLeafReader;
+import org.apache.lucene.index.SegmentWriteState;
+import org.elasticsearch.index.codec.postingsformat.BloomFilterPostingsFormat.BloomFilteredFieldsConsumer;
+import org.elasticsearch.index.codec.postingsformat.Elasticsearch090PostingsFormat;
+import org.elasticsearch.index.mapper.internal.UidFieldMapper;
+
+import java.io.IOException;
+import java.util.Iterator;
+
+/** read-write version with blooms for testing */
+public class Elasticsearch090RWPostingsFormat extends Elasticsearch090PostingsFormat {
+    @Override
+    public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
+        final BloomFilteredFieldsConsumer fieldsConsumer = bloomPostings.fieldsConsumer(state);
+        return new FieldsConsumer() {
+
+            @Override
+            public void write(Fields fields) throws IOException {
+
+                Fields maskedFields = new FilterLeafReader.FilterFields(fields) {
+                    @Override
+                    public Iterator<String> iterator() {
+                        return Iterators.filter(this.in.iterator(), Predicates.not(UID_FIELD_FILTER));
+                    }
+                };
+                fieldsConsumer.getDelegate().write(maskedFields);
+                maskedFields = new FilterLeafReader.FilterFields(fields) {
+                    @Override
+                    public Iterator<String> iterator() {
+                        return Iterators.singletonIterator(UidFieldMapper.NAME);
+                    }
+                };
+                // only go through bloom for the UID field
+                fieldsConsumer.write(maskedFields);
+            }
+
+            @Override
+            public void close() throws IOException {
+                fieldsConsumer.close();
+            }
+        };
+    }
+}

+ 1 - 3
src/test/java/org/elasticsearch/index/codec/postingformat/ElasticsearchPostingsFormatTest.java

@@ -47,9 +47,7 @@ public class ElasticsearchPostingsFormatTest extends BasePostingsFormatTestCase
 
     @Override
     protected Codec getCodec() {
-        return random().nextBoolean() ?
-                TestUtil.alwaysPostingsFormat(new Elasticsearch090PostingsFormat())
-                : TestUtil.alwaysPostingsFormat(new BloomFilterPostingsFormat(PostingsFormat.forName("Lucene50"), BloomFilter.Factory.DEFAULT));
+        return TestUtil.alwaysPostingsFormat(new Elasticsearch090RWPostingsFormat());
     }
     
 }

+ 0 - 83
src/test/java/org/elasticsearch/index/engine/internal/InternalEngineIntegrationTest.java

@@ -19,19 +19,12 @@
 
 package org.elasticsearch.index.engine.internal;
 
-import com.google.common.base.Predicate;
-import org.apache.lucene.util.LuceneTestCase.Slow;
 import org.elasticsearch.action.admin.indices.segments.IndexSegments;
 import org.elasticsearch.action.admin.indices.segments.IndexShardSegments;
 import org.elasticsearch.action.admin.indices.segments.IndicesSegmentResponse;
 import org.elasticsearch.action.admin.indices.segments.ShardSegments;
-import org.elasticsearch.action.admin.indices.stats.IndicesStatsResponse;
-import org.elasticsearch.action.index.IndexRequestBuilder;
 import org.elasticsearch.common.settings.ImmutableSettings;
-import org.elasticsearch.common.util.BloomFilter;
-import org.elasticsearch.index.codec.CodecService;
 import org.elasticsearch.index.engine.Segment;
-import org.elasticsearch.index.merge.policy.AbstractMergePolicyProvider;
 import org.elasticsearch.test.ElasticsearchIntegrationTest;
 import org.hamcrest.Matchers;
 import org.junit.Test;
@@ -39,85 +32,9 @@ import org.junit.Test;
 import java.util.Collection;
 import java.util.HashSet;
 import java.util.Set;
-import java.util.concurrent.ExecutionException;
-
-import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
 
 public class InternalEngineIntegrationTest extends ElasticsearchIntegrationTest {
 
-    @Test
-    @Slow
-    public void testSettingLoadBloomFilterDefaultTrue() throws Exception {
-        client().admin().indices().prepareCreate("test").setSettings(ImmutableSettings.builder().put("number_of_replicas", 0).put("number_of_shards", 1)).get();
-        client().prepareIndex("test", "foo").setSource("field", "foo").get();
-        ensureGreen();
-        refresh();
-        IndicesStatsResponse stats = client().admin().indices().prepareStats().setSegments(true).get();
-        final long segmentsMemoryWithBloom = stats.getTotal().getSegments().getMemoryInBytes();
-        logger.info("segments with bloom: {}", segmentsMemoryWithBloom);
-
-        logger.info("updating the setting to unload bloom filters");
-        client().admin().indices().prepareUpdateSettings("test").setSettings(ImmutableSettings.builder().put(CodecService.INDEX_CODEC_BLOOM_LOAD, false)).get();
-        logger.info("waiting for memory to match without blooms");
-        awaitBusy(new Predicate<Object>() {
-            public boolean apply(Object o) {
-                IndicesStatsResponse stats = client().admin().indices().prepareStats().setSegments(true).get();
-                long segmentsMemoryWithoutBloom = stats.getTotal().getSegments().getMemoryInBytes();
-                logger.info("trying segments without bloom: {}", segmentsMemoryWithoutBloom);
-                return segmentsMemoryWithoutBloom == (segmentsMemoryWithBloom - BloomFilter.Factory.DEFAULT.createFilter(1).getSizeInBytes());
-            }
-        });
-
-        logger.info("updating the setting to load bloom filters");
-        client().admin().indices().prepareUpdateSettings("test").setSettings(ImmutableSettings.builder().put(CodecService.INDEX_CODEC_BLOOM_LOAD, true)).get();
-        logger.info("waiting for memory to match with blooms");
-        awaitBusy(new Predicate<Object>() {
-            public boolean apply(Object o) {
-                IndicesStatsResponse stats = client().admin().indices().prepareStats().setSegments(true).get();
-                long newSegmentsMemoryWithBloom = stats.getTotal().getSegments().getMemoryInBytes();
-                logger.info("trying segments with bloom: {}", newSegmentsMemoryWithBloom);
-                return newSegmentsMemoryWithBloom == segmentsMemoryWithBloom;
-            }
-        });
-    }
-
-    @Test
-    @Slow
-    public void testSettingLoadBloomFilterDefaultFalse() throws Exception {
-        client().admin().indices().prepareCreate("test").setSettings(ImmutableSettings.builder().put("number_of_replicas", 0).put("number_of_shards", 1).put(CodecService.INDEX_CODEC_BLOOM_LOAD, false)).get();
-        client().prepareIndex("test", "foo").setSource("field", "foo").get();
-        ensureGreen();
-        refresh();
-
-        IndicesStatsResponse stats = client().admin().indices().prepareStats().setSegments(true).get();
-        final long segmentsMemoryWithoutBloom = stats.getTotal().getSegments().getMemoryInBytes();
-        logger.info("segments without bloom: {}", segmentsMemoryWithoutBloom);
-
-        logger.info("updating the setting to load bloom filters");
-        client().admin().indices().prepareUpdateSettings("test").setSettings(ImmutableSettings.builder().put(CodecService.INDEX_CODEC_BLOOM_LOAD, true)).get();
-        logger.info("waiting for memory to match with blooms");
-        awaitBusy(new Predicate<Object>() {
-            public boolean apply(Object o) {
-                IndicesStatsResponse stats = client().admin().indices().prepareStats().setSegments(true).get();
-                long segmentsMemoryWithBloom = stats.getTotal().getSegments().getMemoryInBytes();
-                logger.info("trying segments with bloom: {}", segmentsMemoryWithoutBloom);
-                return segmentsMemoryWithoutBloom == (segmentsMemoryWithBloom - BloomFilter.Factory.DEFAULT.createFilter(1).getSizeInBytes());
-            }
-        });
-
-        logger.info("updating the setting to unload bloom filters");
-        client().admin().indices().prepareUpdateSettings("test").setSettings(ImmutableSettings.builder().put(CodecService.INDEX_CODEC_BLOOM_LOAD, false)).get();
-        logger.info("waiting for memory to match without blooms");
-        awaitBusy(new Predicate<Object>() {
-            public boolean apply(Object o) {
-                IndicesStatsResponse stats = client().admin().indices().prepareStats().setSegments(true).get();
-                long newSegmentsMemoryWithoutBloom = stats.getTotal().getSegments().getMemoryInBytes();
-                logger.info("trying segments without bloom: {}", newSegmentsMemoryWithoutBloom);
-                return newSegmentsMemoryWithoutBloom == segmentsMemoryWithoutBloom;
-            }
-        });
-    }
-
     @Test
     public void testSetIndexCompoundOnFlush() {
         client().admin().indices().prepareCreate("test").setSettings(ImmutableSettings.builder().put("number_of_replicas", 0).put("number_of_shards", 1)).get();

+ 3 - 2
src/test/java/org/elasticsearch/search/suggest/completion/CompletionPostingsFormatTest.java

@@ -36,6 +36,7 @@ import org.apache.lucene.store.*;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.LineFileDocs;
+import org.elasticsearch.common.lucene.Lucene;
 import org.elasticsearch.index.analysis.NamedAnalyzer;
 import org.elasticsearch.index.codec.postingsformat.Elasticsearch090PostingsFormat;
 import org.elasticsearch.index.codec.postingsformat.PostingsFormatProvider;
@@ -70,7 +71,7 @@ public class CompletionPostingsFormatTest extends ElasticsearchTestCase {
 
         IndexInput input = dir.openInput("foo.txt", IOContext.DEFAULT);
         LookupFactory load = currentProvider.load(input);
-        PostingsFormatProvider format = new PreBuiltPostingsFormatProvider(new Elasticsearch090PostingsFormat());
+        PostingsFormatProvider format = new PreBuiltPostingsFormatProvider(PostingsFormat.forName(Lucene.LATEST_POSTINGS_FORMAT));
         NamedAnalyzer analyzer = new NamedAnalyzer("foo", new StandardAnalyzer());
         Lookup lookup = load.getLookup(new CompletionFieldMapper(new Names("foo"), analyzer, analyzer, format, null, true, true, true, Integer.MAX_VALUE, AbstractFieldMapper.MultiFields.empty(), null, ContextMapping.EMPTY_MAPPING), new CompletionSuggestionContext(null));
         List<LookupResult> result = lookup.lookup("ge", false, 10);
@@ -214,7 +215,7 @@ public class CompletionPostingsFormatTest extends ElasticsearchTestCase {
             iter = primaryIter;
         }
         reference.build(iter);
-        PostingsFormatProvider provider = new PreBuiltPostingsFormatProvider(new Elasticsearch090PostingsFormat());
+        PostingsFormatProvider provider = new PreBuiltPostingsFormatProvider(PostingsFormat.forName(Lucene.LATEST_POSTINGS_FORMAT));
 
         NamedAnalyzer namedAnalzyer = new NamedAnalyzer("foo", new StandardAnalyzer());
         final CompletionFieldMapper mapper = new CompletionFieldMapper(new Names("foo"), namedAnalzyer, namedAnalzyer, provider, null, usePayloads,

+ 0 - 3
src/test/java/org/elasticsearch/test/ElasticsearchIntegrationTest.java

@@ -466,9 +466,6 @@ public abstract class ElasticsearchIntegrationTest extends ElasticsearchTestCase
              builder.put(FsTranslog.INDEX_TRANSLOG_FS_TYPE, RandomPicks.randomFrom(random, FsTranslogFile.Type.values()).name());
         }
 
-        // Randomly load or don't load bloom filters:
-        builder.put(CodecService.INDEX_CODEC_BLOOM_LOAD, random.nextBoolean());
-
         if (random.nextBoolean()) {
             builder.put(IndicesQueryCache.INDEX_CACHE_QUERY_ENABLED, random.nextBoolean());
         }