1
0
Эх сурвалжийг харах

Add doc values support for ES 5 and ES 6 (#82207)

Adds support for reading doc values formats of ES 5 and 6.

Relates #81210
Yannick Welsch 3 жил өмнө
parent
commit
c55a4607e6
24 өөрчлөгдсөн 4483 нэмэгдсэн , 47 устгасан
  1. 0 2
      x-pack/plugin/old-lucene-versions/build.gradle
  2. 14 12
      x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/OldLuceneVersions.java
  3. 3 15
      x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/BWCCodec.java
  4. 42 0
      x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/index/LegacyBinaryDocValues.java
  5. 93 0
      x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/index/LegacyBinaryDocValuesWrapper.java
  6. 539 0
      x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/index/LegacyDocValuesIterables.java
  7. 42 0
      x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/index/LegacyNumericDocValues.java
  8. 99 0
      x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/index/LegacyNumericDocValuesWrapper.java
  9. 114 0
      x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/index/LegacySortedDocValues.java
  10. 104 0
      x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/index/LegacySortedDocValuesWrapper.java
  11. 53 0
      x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/index/LegacySortedNumericDocValues.java
  12. 102 0
      x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/index/LegacySortedNumericDocValuesWrapper.java
  13. 115 0
      x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/index/LegacySortedSetDocValues.java
  14. 115 0
      x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/index/LegacySortedSetDocValuesWrapper.java
  15. 72 0
      x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene54/LegacyStringHelper.java
  16. 842 0
      x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene54/Lucene54DocValuesConsumer.java
  17. 119 0
      x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene54/Lucene54DocValuesFormat.java
  18. 1847 0
      x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene54/Lucene54DocValuesProducer.java
  19. 16 1
      x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene60/Lucene60Codec.java
  20. 15 0
      x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene62/Lucene62Codec.java
  21. 14 0
      x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/BWCLucene70Codec.java
  22. 16 0
      x-pack/plugin/old-lucene-versions/src/main/resources/META-INF/services/org.apache.lucene.codecs.DocValuesFormat
  23. 22 0
      x-pack/plugin/old-lucene-versions/src/test/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene54/Lucene54DocValuesFormatTests.java
  24. 85 17
      x-pack/qa/repository-old-versions/src/test/java/org/elasticsearch/oldrepos/OldRepositoryAccessIT.java

+ 0 - 2
x-pack/plugin/old-lucene-versions/build.gradle

@@ -13,6 +13,4 @@ dependencies {
   compileOnly project(path: xpackModule('core'))
 }
 
-test.enabled = false
-
 addQaCheckDependencies()

+ 14 - 12
x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/OldLuceneVersions.java

@@ -71,6 +71,8 @@ public class OldLuceneVersions extends Plugin implements IndexStorePlugin {
 
     private static SegmentInfos convertToNewerLuceneVersion(OldSegmentInfos oldSegmentInfos) {
         final SegmentInfos segmentInfos = new SegmentInfos(org.apache.lucene.util.Version.LATEST.major);
+        segmentInfos.version = oldSegmentInfos.version;
+        segmentInfos.counter = oldSegmentInfos.counter;
         segmentInfos.setNextWriteGeneration(oldSegmentInfos.getGeneration() + 1);
         final Map<String, String> map = new HashMap<>(oldSegmentInfos.getUserData());
         if (map.containsKey(Engine.HISTORY_UUID_KEY) == false) {
@@ -85,21 +87,21 @@ public class OldLuceneVersions extends Plugin implements IndexStorePlugin {
         if (map.containsKey(Engine.MAX_UNSAFE_AUTO_ID_TIMESTAMP_COMMIT_ID) == false) {
             map.put(Engine.MAX_UNSAFE_AUTO_ID_TIMESTAMP_COMMIT_ID, "-1");
         }
-        segmentInfos.setUserData(map, true);
+        segmentInfos.setUserData(map, false);
         for (SegmentCommitInfo infoPerCommit : oldSegmentInfos.asList()) {
             final SegmentInfo newInfo = BWCCodec.wrap(infoPerCommit.info);
-
-            segmentInfos.add(
-                new SegmentCommitInfo(
-                    newInfo,
-                    infoPerCommit.getDelCount(),
-                    infoPerCommit.getSoftDelCount(),
-                    infoPerCommit.getDelGen(),
-                    infoPerCommit.getFieldInfosGen(),
-                    infoPerCommit.getDocValuesGen(),
-                    infoPerCommit.getId()
-                )
+            final SegmentCommitInfo commitInfo = new SegmentCommitInfo(
+                newInfo,
+                infoPerCommit.getDelCount(),
+                infoPerCommit.getSoftDelCount(),
+                infoPerCommit.getDelGen(),
+                infoPerCommit.getFieldInfosGen(),
+                infoPerCommit.getDocValuesGen(),
+                infoPerCommit.getId()
             );
+            commitInfo.setDocValuesUpdatesFiles(infoPerCommit.getDocValuesUpdatesFiles());
+            commitInfo.setFieldInfosFiles(infoPerCommit.getFieldInfosFiles());
+            segmentInfos.add(commitInfo);
         }
         return segmentInfos;
     }

+ 3 - 15
x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/BWCCodec.java

@@ -9,7 +9,6 @@ package org.elasticsearch.xpack.lucene.bwc.codecs;
 
 import org.apache.lucene.backward_codecs.lucene70.Lucene70Codec;
 import org.apache.lucene.codecs.Codec;
-import org.apache.lucene.codecs.DocValuesFormat;
 import org.apache.lucene.codecs.FieldInfosFormat;
 import org.apache.lucene.codecs.FieldsConsumer;
 import org.apache.lucene.codecs.FieldsProducer;
@@ -20,7 +19,6 @@ import org.apache.lucene.codecs.PointsFormat;
 import org.apache.lucene.codecs.PostingsFormat;
 import org.apache.lucene.codecs.SegmentInfoFormat;
 import org.apache.lucene.codecs.TermVectorsFormat;
-import org.apache.lucene.index.DocValuesType;
 import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.FieldInfos;
 import org.apache.lucene.index.Fields;
@@ -31,7 +29,6 @@ import org.apache.lucene.index.SegmentWriteState;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.IOContext;
-import org.elasticsearch.index.mapper.SeqNoFieldMapper;
 import org.elasticsearch.xpack.lucene.bwc.codecs.lucene70.BWCLucene70Codec;
 
 import java.io.IOException;
@@ -60,11 +57,6 @@ public abstract class BWCCodec extends Codec {
         throw new UnsupportedOperationException();
     }
 
-    @Override
-    public DocValuesFormat docValuesFormat() {
-        throw new UnsupportedOperationException();
-    }
-
     @Override
     public TermVectorsFormat termVectorsFormat() {
         throw new UnsupportedOperationException();
@@ -166,14 +158,10 @@ public abstract class BWCCodec extends Codec {
         };
     }
 
-    // mark all fields as having no postings, no doc values, and no points.
+    // mark all fields as having no postings, no term vectors, no norms, no payloads, no points, and no vectors.
     private static FieldInfos filterFields(FieldInfos fieldInfos) {
         List<FieldInfo> fieldInfoCopy = new ArrayList<>(fieldInfos.size());
         for (FieldInfo fieldInfo : fieldInfos) {
-            // omit sequence number field so that it doesn't interfere with peer recovery
-            if (fieldInfo.name.equals(SeqNoFieldMapper.NAME)) {
-                continue;
-            }
             fieldInfoCopy.add(
                 new FieldInfo(
                     fieldInfo.name,
@@ -182,8 +170,8 @@ public abstract class BWCCodec extends Codec {
                     false,
                     false,
                     IndexOptions.NONE,
-                    DocValuesType.NONE,
-                    -1,
+                    fieldInfo.getDocValuesType(),
+                    fieldInfo.getDocValuesGen(),
                     fieldInfo.attributes(),
                     0,
                     0,

+ 42 - 0
x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/index/LegacyBinaryDocValues.java

@@ -0,0 +1,42 @@
+/*
+ * @notice
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Modifications copyright (C) 2021 Elasticsearch B.V.
+ */
+package org.elasticsearch.xpack.lucene.bwc.codecs.index;
+
+import org.apache.lucene.index.BinaryDocValues;
+import org.apache.lucene.util.BytesRef;
+
+/**
+ * A per-document byte[]
+ *
+ * @deprecated Use {@link BinaryDocValues} instead.
+ */
+@Deprecated
+public abstract class LegacyBinaryDocValues {
+
+    /** Sole constructor. (For invocation by subclass
+     * constructors, typically implicit.) */
+    protected LegacyBinaryDocValues() {}
+
+    /** Lookup the value for document.  The returned {@link BytesRef} may be
+     * re-used across calls to {@link #get(int)} so make sure to
+     * {@link BytesRef#deepCopyOf(BytesRef) copy it} if you want to keep it
+     * around. */
+    public abstract BytesRef get(int docID);
+}

+ 93 - 0
x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/index/LegacyBinaryDocValuesWrapper.java

@@ -0,0 +1,93 @@
+/*
+ * @notice
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Modifications copyright (C) 2021 Elasticsearch B.V.
+ */
+package org.elasticsearch.xpack.lucene.bwc.codecs.index;
+
+import org.apache.lucene.index.BinaryDocValues;
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.BytesRef;
+
+import java.io.IOException;
+
+/**
+ * Wraps a {@link LegacyBinaryDocValues} into a {@link BinaryDocValues}.
+ *
+ * @deprecated Implement {@link BinaryDocValues} directly.
+ */
+@Deprecated
+public final class LegacyBinaryDocValuesWrapper extends BinaryDocValues {
+    private final Bits docsWithField;
+    private final LegacyBinaryDocValues values;
+    private final int maxDoc;
+    private int docID = -1;
+
+    public LegacyBinaryDocValuesWrapper(Bits docsWithField, LegacyBinaryDocValues values) {
+        this.docsWithField = docsWithField;
+        this.values = values;
+        this.maxDoc = docsWithField.length();
+    }
+
+    @Override
+    public int docID() {
+        return docID;
+    }
+
+    @Override
+    public int nextDoc() {
+        docID++;
+        while (docID < maxDoc) {
+            if (docsWithField.get(docID)) {
+                return docID;
+            }
+            docID++;
+        }
+        docID = NO_MORE_DOCS;
+        return NO_MORE_DOCS;
+    }
+
+    @Override
+    public int advance(int target) {
+        if (target < docID) {
+            throw new IllegalArgumentException("cannot advance backwards: docID=" + docID + " target=" + target);
+        }
+        if (target == NO_MORE_DOCS) {
+            this.docID = NO_MORE_DOCS;
+        } else {
+            this.docID = target - 1;
+            nextDoc();
+        }
+        return docID;
+    }
+
+    @Override
+    public boolean advanceExact(int target) throws IOException {
+        docID = target;
+        return docsWithField.get(target);
+    }
+
+    @Override
+    public long cost() {
+        return 0;
+    }
+
+    @Override
+    public BytesRef binaryValue() {
+        return values.get(docID);
+    }
+}

+ 539 - 0
x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/index/LegacyDocValuesIterables.java

@@ -0,0 +1,539 @@
+/*
+ * @notice
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Modifications copyright (C) 2021 Elasticsearch B.V.
+ */
+package org.elasticsearch.xpack.lucene.bwc.codecs.index;
+
+import org.apache.lucene.codecs.DocValuesProducer;
+import org.apache.lucene.codecs.NormsProducer;
+import org.apache.lucene.index.BinaryDocValues;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.NumericDocValues;
+import org.apache.lucene.index.SortedDocValues;
+import org.apache.lucene.index.SortedNumericDocValues;
+import org.apache.lucene.index.SortedSetDocValues;
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.util.BytesRef;
+
+import java.io.IOException;
+import java.util.Iterator;
+
+/** Bridge helper methods for legacy codecs to map sorted doc values to iterables. */
+
+public class LegacyDocValuesIterables {
+
+    private LegacyDocValuesIterables() {
+        // no
+    }
+
+    /** Converts {@link SortedDocValues} into an {@code Iterable&lt;BytesRef&gt;} for all the values.
+     *
+     * @deprecated Consume {@link SortedDocValues} instead. */
+    @Deprecated
+    public static Iterable<BytesRef> valuesIterable(final SortedDocValues values) {
+        return new Iterable<BytesRef>() {
+            @Override
+            public Iterator<BytesRef> iterator() {
+                return new Iterator<BytesRef>() {
+                    private int nextOrd;
+
+                    @Override
+                    public boolean hasNext() {
+                        return nextOrd < values.getValueCount();
+                    }
+
+                    @Override
+                    public BytesRef next() {
+                        try {
+                            return values.lookupOrd(nextOrd++);
+                        } catch (IOException e) {
+                            throw new RuntimeException(e);
+                        }
+                    }
+                };
+            }
+        };
+    }
+
+    /** Converts {@link SortedSetDocValues} into an {@code Iterable&lt;BytesRef&gt;} for all the values.
+     *
+     * @deprecated Consume {@link SortedSetDocValues} instead. */
+    @Deprecated
+    public static Iterable<BytesRef> valuesIterable(final SortedSetDocValues values) {
+        return new Iterable<BytesRef>() {
+            @Override
+            public Iterator<BytesRef> iterator() {
+                return new Iterator<BytesRef>() {
+                    private long nextOrd;
+
+                    @Override
+                    public boolean hasNext() {
+                        return nextOrd < values.getValueCount();
+                    }
+
+                    @Override
+                    public BytesRef next() {
+                        try {
+                            return values.lookupOrd(nextOrd++);
+                        } catch (IOException e) {
+                            throw new RuntimeException(e);
+                        }
+                    }
+                };
+            }
+        };
+    }
+
+    /** Converts {@link SortedDocValues} into the ord for each document as an {@code Iterable&lt;Number&gt;}.
+     *
+     * @deprecated Consume {@link SortedDocValues} instead. */
+    @Deprecated
+    public static Iterable<Number> sortedOrdIterable(final DocValuesProducer valuesProducer, FieldInfo fieldInfo, int maxDoc) {
+        return new Iterable<Number>() {
+            @Override
+            public Iterator<Number> iterator() {
+
+                final SortedDocValues values;
+                try {
+                    values = valuesProducer.getSorted(fieldInfo);
+                } catch (IOException ioe) {
+                    throw new RuntimeException(ioe);
+                }
+
+                return new Iterator<Number>() {
+                    private int nextDocID;
+
+                    @Override
+                    public boolean hasNext() {
+                        return nextDocID < maxDoc;
+                    }
+
+                    @Override
+                    public Number next() {
+                        try {
+                            if (nextDocID > values.docID()) {
+                                values.nextDoc();
+                            }
+                            int result;
+                            if (nextDocID == values.docID()) {
+                                result = values.ordValue();
+                            } else {
+                                result = -1;
+                            }
+                            nextDocID++;
+                            return result;
+                        } catch (IOException ioe) {
+                            throw new RuntimeException(ioe);
+                        }
+                    }
+                };
+            }
+        };
+    }
+
+    /** Converts number-of-ords per document from {@link SortedSetDocValues} into {@code Iterable&lt;Number&gt;}.
+     *
+     * @deprecated Consume {@link SortedSetDocValues} instead. */
+    @Deprecated
+    public static Iterable<Number> sortedSetOrdCountIterable(
+        final DocValuesProducer valuesProducer,
+        final FieldInfo fieldInfo,
+        final int maxDoc
+    ) {
+
+        return new Iterable<Number>() {
+
+            @Override
+            public Iterator<Number> iterator() {
+
+                final SortedSetDocValues values;
+                try {
+                    values = valuesProducer.getSortedSet(fieldInfo);
+                } catch (IOException ioe) {
+                    throw new RuntimeException(ioe);
+                }
+
+                return new Iterator<Number>() {
+                    private int nextDocID;
+                    private int ordCount;
+
+                    @Override
+                    public boolean hasNext() {
+                        return nextDocID < maxDoc;
+                    }
+
+                    @Override
+                    public Number next() {
+                        try {
+                            if (nextDocID > values.docID()) {
+                                if (values.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
+                                    ordCount = 0;
+                                    while (values.nextOrd() != SortedSetDocValues.NO_MORE_ORDS) {
+                                        ordCount++;
+                                    }
+                                }
+                            }
+                            int result;
+                            if (nextDocID == values.docID()) {
+                                result = ordCount;
+                            } else {
+                                result = 0;
+                            }
+                            nextDocID++;
+                            return result;
+                        } catch (IOException ioe) {
+                            throw new RuntimeException(ioe);
+                        }
+                    }
+                };
+            }
+        };
+    }
+
+    /** Converts all concatenated ords (in docID order) from {@link SortedSetDocValues} into {@code Iterable&lt;Number&gt;}.
+     *
+     * @deprecated Consume {@link SortedSetDocValues} instead. */
+    @Deprecated
+    public static Iterable<Number> sortedSetOrdsIterable(final DocValuesProducer valuesProducer, final FieldInfo fieldInfo) {
+
+        return new Iterable<Number>() {
+
+            @Override
+            public Iterator<Number> iterator() {
+
+                final SortedSetDocValues values;
+                try {
+                    values = valuesProducer.getSortedSet(fieldInfo);
+                } catch (IOException ioe) {
+                    throw new RuntimeException(ioe);
+                }
+
+                return new Iterator<Number>() {
+                    private boolean nextIsSet;
+                    private long nextOrd;
+
+                    private void setNext() {
+                        try {
+                            if (nextIsSet == false) {
+                                if (values.docID() == -1) {
+                                    values.nextDoc();
+                                }
+                                while (true) {
+                                    if (values.docID() == DocIdSetIterator.NO_MORE_DOCS) {
+                                        nextOrd = -1;
+                                        break;
+                                    }
+                                    nextOrd = values.nextOrd();
+                                    if (nextOrd != -1) {
+                                        break;
+                                    }
+                                    values.nextDoc();
+                                }
+                                nextIsSet = true;
+                            }
+                        } catch (IOException ioe) {
+                            throw new RuntimeException(ioe);
+                        }
+                    }
+
+                    @Override
+                    public boolean hasNext() {
+                        setNext();
+                        return nextOrd != -1;
+                    }
+
+                    @Override
+                    public Number next() {
+                        setNext();
+                        assert nextOrd != -1;
+                        nextIsSet = false;
+                        return nextOrd;
+                    }
+                };
+            }
+        };
+    }
+
+    /** Converts number-of-values per document from {@link SortedNumericDocValues} into {@code Iterable&lt;Number&gt;}.
+     *
+     * @deprecated Consume {@link SortedDocValues} instead. */
+    @Deprecated
+    public static Iterable<Number> sortedNumericToDocCount(final DocValuesProducer valuesProducer, final FieldInfo fieldInfo, int maxDoc) {
+        return new Iterable<Number>() {
+
+            @Override
+            public Iterator<Number> iterator() {
+
+                final SortedNumericDocValues values;
+                try {
+                    values = valuesProducer.getSortedNumeric(fieldInfo);
+                } catch (IOException ioe) {
+                    throw new RuntimeException(ioe);
+                }
+
+                return new Iterator<Number>() {
+                    private int nextDocID;
+
+                    @Override
+                    public boolean hasNext() {
+                        return nextDocID < maxDoc;
+                    }
+
+                    @Override
+                    public Number next() {
+                        try {
+                            if (nextDocID > values.docID()) {
+                                values.nextDoc();
+                            }
+                            int result;
+                            if (nextDocID == values.docID()) {
+                                result = values.docValueCount();
+                            } else {
+                                result = 0;
+                            }
+                            nextDocID++;
+                            return result;
+                        } catch (IOException ioe) {
+                            throw new RuntimeException(ioe);
+                        }
+                    }
+                };
+            }
+        };
+    }
+
+    /** Converts all concatenated values (in docID order) from {@link SortedNumericDocValues} into {@code Iterable&lt;Number&gt;}.
+     *
+     * @deprecated Consume {@link SortedDocValues} instead. */
+    @Deprecated
+    public static Iterable<Number> sortedNumericToValues(final DocValuesProducer valuesProducer, final FieldInfo fieldInfo) {
+        return new Iterable<Number>() {
+
+            @Override
+            public Iterator<Number> iterator() {
+
+                final SortedNumericDocValues values;
+                try {
+                    values = valuesProducer.getSortedNumeric(fieldInfo);
+                } catch (IOException ioe) {
+                    throw new RuntimeException(ioe);
+                }
+
+                return new Iterator<Number>() {
+                    private boolean nextIsSet;
+                    private int nextCount;
+                    private int upto;
+                    private long nextValue;
+
+                    private void setNext() {
+                        try {
+                            if (nextIsSet == false) {
+                                if (upto == nextCount) {
+                                    values.nextDoc();
+                                    if (values.docID() == DocIdSetIterator.NO_MORE_DOCS) {
+                                        nextCount = 0;
+                                        nextIsSet = false;
+                                        return;
+                                    } else {
+                                        nextCount = values.docValueCount();
+                                    }
+                                    upto = 0;
+                                }
+                                nextValue = values.nextValue();
+                                upto++;
+                                nextIsSet = true;
+                            }
+                        } catch (IOException ioe) {
+                            throw new RuntimeException(ioe);
+                        }
+                    }
+
+                    @Override
+                    public boolean hasNext() {
+                        setNext();
+                        return nextCount != 0;
+                    }
+
+                    @Override
+                    public Number next() {
+                        setNext();
+                        assert nextCount != 0;
+                        nextIsSet = false;
+                        return nextValue;
+                    }
+                };
+            }
+        };
+    }
+
+    /** Converts norms into {@code Iterable&lt;Number&gt;}.
+     *
+     * @deprecated Consume {@link NumericDocValues} instead. */
+    @Deprecated
+    public static Iterable<Number> normsIterable(final FieldInfo field, final NormsProducer normsProducer, final int maxDoc) {
+
+        return new Iterable<Number>() {
+
+            @Override
+            public Iterator<Number> iterator() {
+
+                final NumericDocValues values;
+                try {
+                    values = normsProducer.getNorms(field);
+                } catch (IOException ioe) {
+                    throw new RuntimeException(ioe);
+                }
+
+                return new Iterator<Number>() {
+                    private int docIDUpto = -1;
+
+                    @Override
+                    public boolean hasNext() {
+                        return docIDUpto + 1 < maxDoc;
+                    }
+
+                    @Override
+                    public Number next() {
+                        docIDUpto++;
+                        if (docIDUpto > values.docID()) {
+                            try {
+                                values.nextDoc();
+                            } catch (IOException ioe) {
+                                throw new RuntimeException(ioe);
+                            }
+                        }
+                        Number result;
+                        if (docIDUpto == values.docID()) {
+                            try {
+                                result = values.longValue();
+                            } catch (IOException ioe) {
+                                throw new RuntimeException(ioe);
+                            }
+                        } else {
+                            // Unlike NumericDocValues, norms used to return 0 for missing values:
+                            result = 0;
+                        }
+                        return result;
+                    }
+                };
+            }
+        };
+    }
+
+    /** Converts values from {@link BinaryDocValues} into {@code Iterable&lt;BytesRef&gt;}.
+     *
+     * @deprecated Consume {@link BinaryDocValues} instead. */
+    @Deprecated
+    public static Iterable<BytesRef> binaryIterable(final FieldInfo field, final DocValuesProducer valuesProducer, final int maxDoc) {
+        return new Iterable<BytesRef>() {
+            @Override
+            public Iterator<BytesRef> iterator() {
+
+                final BinaryDocValues values;
+                try {
+                    values = valuesProducer.getBinary(field);
+                } catch (IOException ioe) {
+                    throw new RuntimeException(ioe);
+                }
+
+                return new Iterator<BytesRef>() {
+                    private int docIDUpto = -1;
+
+                    @Override
+                    public boolean hasNext() {
+                        return docIDUpto + 1 < maxDoc;
+                    }
+
+                    @Override
+                    public BytesRef next() {
+                        docIDUpto++;
+                        if (docIDUpto > values.docID()) {
+                            try {
+                                values.nextDoc();
+                            } catch (IOException ioe) {
+                                throw new RuntimeException(ioe);
+                            }
+                        }
+                        BytesRef result;
+                        if (docIDUpto == values.docID()) {
+                            try {
+                                result = values.binaryValue();
+                            } catch (IOException e) {
+                                throw new RuntimeException(e);
+                            }
+                        } else {
+                            result = null;
+                        }
+                        return result;
+                    }
+                };
+            }
+        };
+    }
+
+    /** Converts values from {@link NumericDocValues} into {@code Iterable&lt;Number&gt;}.
+     *
+     * @deprecated Consume {@link NumericDocValues} instead. */
+    @Deprecated
+    public static Iterable<Number> numericIterable(final FieldInfo field, final DocValuesProducer valuesProducer, final int maxDoc) {
+        return new Iterable<Number>() {
+            @Override
+            public Iterator<Number> iterator() {
+
+                final NumericDocValues values;
+                try {
+                    values = valuesProducer.getNumeric(field);
+                } catch (IOException ioe) {
+                    throw new RuntimeException(ioe);
+                }
+
+                return new Iterator<Number>() {
+                    private int docIDUpto = -1;
+
+                    @Override
+                    public boolean hasNext() {
+                        return docIDUpto + 1 < maxDoc;
+                    }
+
+                    @Override
+                    public Number next() {
+                        docIDUpto++;
+                        if (docIDUpto > values.docID()) {
+                            try {
+                                values.nextDoc();
+                            } catch (IOException ioe) {
+                                throw new RuntimeException(ioe);
+                            }
+                        }
+                        Number result;
+                        if (docIDUpto == values.docID()) {
+                            try {
+                                result = values.longValue();
+                            } catch (IOException ioe) {
+                                throw new RuntimeException(ioe);
+                            }
+                        } else {
+                            result = null;
+                        }
+                        return result;
+                    }
+                };
+            }
+        };
+    }
+}

+ 42 - 0
x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/index/LegacyNumericDocValues.java

@@ -0,0 +1,42 @@
+/*
+ * @notice
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Modifications copyright (C) 2021 Elasticsearch B.V.
+ */
+package org.elasticsearch.xpack.lucene.bwc.codecs.index;
+
+import org.apache.lucene.index.NumericDocValues;
+
+/**
+ * A per-document numeric value.
+ *
+ * @deprecated Use {@link NumericDocValues} instead.
+ */
+@Deprecated
+public abstract class LegacyNumericDocValues {
+
+    /** Sole constructor. (For invocation by subclass
+     *  constructors, typically implicit.) */
+    protected LegacyNumericDocValues() {}
+
+    /**
+     * Returns the numeric value for the specified document ID.
+     * @param docID document ID to lookup
+     * @return numeric value
+     */
+    public abstract long get(int docID);
+}

+ 99 - 0
x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/index/LegacyNumericDocValuesWrapper.java

@@ -0,0 +1,99 @@
+/*
+ * @notice
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Modifications copyright (C) 2021 Elasticsearch B.V.
+ */
+package org.elasticsearch.xpack.lucene.bwc.codecs.index;
+
+import org.apache.lucene.index.NumericDocValues;
+import org.apache.lucene.util.Bits;
+
+import java.io.IOException;
+
+/**
+ * Wraps a {@link LegacyNumericDocValues} into a {@link NumericDocValues}.
+ *
+ * @deprecated Implement {@link NumericDocValues} directly.
+ */
+@Deprecated
+public final class LegacyNumericDocValuesWrapper extends NumericDocValues {
+    private final Bits docsWithField;
+    private final LegacyNumericDocValues values;
+    private final int maxDoc;
+    private int docID = -1;
+    private long value;
+
+    public LegacyNumericDocValuesWrapper(Bits docsWithField, LegacyNumericDocValues values) {
+        this.docsWithField = docsWithField;
+        this.values = values;
+        this.maxDoc = docsWithField.length();
+    }
+
+    @Override
+    public int docID() {
+        return docID;
+    }
+
+    @Override
+    public int nextDoc() {
+        docID++;
+        while (docID < maxDoc) {
+            value = values.get(docID);
+            if (value != 0 || docsWithField.get(docID)) {
+                return docID;
+            }
+            docID++;
+        }
+        docID = NO_MORE_DOCS;
+        return NO_MORE_DOCS;
+    }
+
+    @Override
+    public int advance(int target) {
+        assert target >= docID : "target=" + target + " docID=" + docID;
+        if (target == NO_MORE_DOCS) {
+            this.docID = NO_MORE_DOCS;
+        } else {
+            this.docID = target - 1;
+            nextDoc();
+        }
+        return docID;
+    }
+
+    @Override
+    public boolean advanceExact(int target) throws IOException {
+        docID = target;
+        value = values.get(docID);
+        return value != 0 || docsWithField.get(docID);
+    }
+
+    @Override
+    public long cost() {
+        // TODO
+        return 0;
+    }
+
+    @Override
+    public long longValue() {
+        return value;
+    }
+
+    @Override
+    public String toString() {
+        return "LegacyNumericDocValuesWrapper(" + values + ")";
+    }
+}

+ 114 - 0
x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/index/LegacySortedDocValues.java

@@ -0,0 +1,114 @@
+/*
+ * @notice
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Modifications copyright (C) 2021 Elasticsearch B.V.
+ */
+package org.elasticsearch.xpack.lucene.bwc.codecs.index;
+
+import org.apache.lucene.index.SortedDocValues;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.util.BytesRef;
+
+/**
+ * A per-document byte[] with presorted values.
+ * <p>
+ * Per-Document values in a SortedDocValues are deduplicated, dereferenced,
+ * and sorted into a dictionary of unique values. A pointer to the
+ * dictionary value (ordinal) can be retrieved for each document. Ordinals
+ * are dense and in increasing sorted order.
+ *
+ * @deprecated Use {@link SortedDocValues} instead.
+ */
+@Deprecated
+public abstract class LegacySortedDocValues extends LegacyBinaryDocValues {
+
+    /** Sole constructor. (For invocation by subclass
+     *  constructors, typically implicit.) */
+    protected LegacySortedDocValues() {}
+
+    /**
+     * Returns the ordinal for the specified docID.
+     * @param  docID document ID to lookup
+     * @return ordinal for the document: this is dense, starts at 0, then
+     *         increments by 1 for the next value in sorted order. Note that
+     *         missing values are indicated by -1.
+     */
+    public abstract int getOrd(int docID);
+
+    /** Retrieves the value for the specified ordinal. The returned
+     * {@link BytesRef} may be re-used across calls to {@link #lookupOrd(int)}
+     * so make sure to {@link BytesRef#deepCopyOf(BytesRef) copy it} if you want
+     * to keep it around.
+     * @param ord ordinal to lookup (must be &gt;= 0 and &lt; {@link #getValueCount()})
+     * @see #getOrd(int)
+     */
+    public abstract BytesRef lookupOrd(int ord);
+
+    /**
+     * Returns the number of unique values.
+     * @return number of unique values in this SortedDocValues. This is
+     *         also equivalent to one plus the maximum ordinal.
+     */
+    public abstract int getValueCount();
+
+    private final BytesRef empty = new BytesRef();
+
+    @Override
+    public BytesRef get(int docID) {
+        int ord = getOrd(docID);
+        if (ord == -1) {
+            return empty;
+        } else {
+            return lookupOrd(ord);
+        }
+    }
+
+    /** If {@code key} exists, returns its ordinal, else
+     *  returns {@code -insertionPoint-1}, like {@code
+     *  Arrays.binarySearch}.
+     *
+     *  @param key Key to look up
+     **/
+    public int lookupTerm(BytesRef key) {
+        int low = 0;
+        int high = getValueCount() - 1;
+
+        while (low <= high) {
+            int mid = (low + high) >>> 1;
+            final BytesRef term = lookupOrd(mid);
+            int cmp = term.compareTo(key);
+
+            if (cmp < 0) {
+                low = mid + 1;
+            } else if (cmp > 0) {
+                high = mid - 1;
+            } else {
+                return mid; // key found
+            }
+        }
+
+        return -(low + 1);  // key not found.
+    }
+
+    /**
+     * Returns a {@link TermsEnum} over the values.
+     * The enum supports {@link TermsEnum#ord()} and {@link TermsEnum#seekExact(long)}.
+     */
+    public TermsEnum termsEnum() {
+        throw new UnsupportedOperationException();
+    }
+}

+ 104 - 0
x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/index/LegacySortedDocValuesWrapper.java

@@ -0,0 +1,104 @@
+/*
+ * @notice
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Modifications copyright (C) 2021 Elasticsearch B.V.
+ */
+package org.elasticsearch.xpack.lucene.bwc.codecs.index;
+
+import org.apache.lucene.index.SortedDocValues;
+import org.apache.lucene.util.BytesRef;
+
+import java.io.IOException;
+
+/**
+ * Wraps a {@link LegacySortedDocValues} into a {@link SortedDocValues}.
+ *
+ * @deprecated Implement {@link SortedDocValues} directly.
+ */
+@Deprecated
+public final class LegacySortedDocValuesWrapper extends SortedDocValues {
+    private final LegacySortedDocValues values;
+    private final int maxDoc;
+    private int docID = -1;
+    private int ord;
+
+    public LegacySortedDocValuesWrapper(LegacySortedDocValues values, int maxDoc) {
+        this.values = values;
+        this.maxDoc = maxDoc;
+    }
+
+    @Override
+    public int docID() {
+        return docID;
+    }
+
+    @Override
+    public int nextDoc() {
+        assert docID != NO_MORE_DOCS;
+        docID++;
+        while (docID < maxDoc) {
+            ord = values.getOrd(docID);
+            if (ord != -1) {
+                return docID;
+            }
+            docID++;
+        }
+        docID = NO_MORE_DOCS;
+        return NO_MORE_DOCS;
+    }
+
+    @Override
+    public int advance(int target) {
+        if (target < docID) {
+            throw new IllegalArgumentException("cannot advance backwards: docID=" + docID + " target=" + target);
+        }
+        if (target >= maxDoc) {
+            this.docID = NO_MORE_DOCS;
+        } else {
+            this.docID = target - 1;
+            nextDoc();
+        }
+        return docID;
+    }
+
+    @Override
+    public boolean advanceExact(int target) throws IOException {
+        docID = target;
+        ord = values.getOrd(docID);
+        return ord != -1;
+    }
+
+    @Override
+    public long cost() {
+        return 0;
+    }
+
+    @Override
+    public int ordValue() {
+        return ord;
+    }
+
+    @Override
+    public BytesRef lookupOrd(int ord) {
+        return values.lookupOrd(ord);
+    }
+
+    @Override
+    public int getValueCount() {
+        return values.getValueCount();
+    }
+}

+ 53 - 0
x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/index/LegacySortedNumericDocValues.java

@@ -0,0 +1,53 @@
+/*
+ * @notice
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Modifications copyright (C) 2021 Elasticsearch B.V.
+ */
+package org.elasticsearch.xpack.lucene.bwc.codecs.index;
+
+import org.apache.lucene.index.SortedNumericDocValues;
+
+/**
+ * A list of per-document numeric values, sorted
+ * according to {@link Long#compare(long, long)}.
+ *
+ * @deprecated Use {@link SortedNumericDocValues} instead.
+ */
+@Deprecated
+public abstract class LegacySortedNumericDocValues {
+
+    /** Sole constructor. (For invocation by subclass
+     *  constructors, typically implicit.) */
+    protected LegacySortedNumericDocValues() {}
+
+    /**
+     * Positions to the specified document
+     */
+    public abstract void setDocument(int doc);
+
+    /**
+     * Retrieve the value for the current document at the specified index.
+     * An index ranges from {@code 0} to {@code count()-1}.
+     */
+    public abstract long valueAt(int index);
+
+    /**
+     * Retrieves the count of values for the current document.
+     * This may be zero if a document has no values.
+     */
+    public abstract int count();
+}

+ 102 - 0
x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/index/LegacySortedNumericDocValuesWrapper.java

@@ -0,0 +1,102 @@
+/*
+ * @notice
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Modifications copyright (C) 2021 Elasticsearch B.V.
+ */
+package org.elasticsearch.xpack.lucene.bwc.codecs.index;
+
+import org.apache.lucene.index.SortedNumericDocValues;
+
+import java.io.IOException;
+
+/**
+ * Wraps a {@link LegacySortedNumericDocValues} into a {@link SortedNumericDocValues}.
+ *
+ * @deprecated Implement {@link SortedNumericDocValues} directly.
+ */
+@Deprecated
+public final class LegacySortedNumericDocValuesWrapper extends SortedNumericDocValues {
+    private final LegacySortedNumericDocValues values;
+    private final int maxDoc;
+    private int docID = -1;
+    private int upto;
+
+    public LegacySortedNumericDocValuesWrapper(LegacySortedNumericDocValues values, int maxDoc) {
+        this.values = values;
+        this.maxDoc = maxDoc;
+    }
+
+    @Override
+    public int docID() {
+        return docID;
+    }
+
+    @Override
+    public int nextDoc() {
+        assert docID != NO_MORE_DOCS;
+        while (true) {
+            docID++;
+            if (docID == maxDoc) {
+                docID = NO_MORE_DOCS;
+                break;
+            }
+            values.setDocument(docID);
+            if (values.count() != 0) {
+                break;
+            }
+        }
+        upto = 0;
+        return docID;
+    }
+
+    @Override
+    public int advance(int target) {
+        if (target < docID) {
+            throw new IllegalArgumentException("cannot advance backwards: docID=" + docID + " target=" + target);
+        }
+        if (target >= maxDoc) {
+            docID = NO_MORE_DOCS;
+        } else {
+            docID = target - 1;
+            nextDoc();
+        }
+        return docID;
+    }
+
+    @Override
+    public boolean advanceExact(int target) throws IOException {
+        docID = target;
+        values.setDocument(docID);
+        upto = 0;
+        return values.count() != 0;
+    }
+
+    @Override
+    public long cost() {
+        return 0;
+    }
+
+    @Override
+    public long nextValue() {
+        return values.valueAt(upto++);
+    }
+
+    @Override
+    public int docValueCount() {
+        return values.count();
+    }
+}

+ 115 - 0
x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/index/LegacySortedSetDocValues.java

@@ -0,0 +1,115 @@
+/*
+ * @notice
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Modifications copyright (C) 2021 Elasticsearch B.V.
+ */
+package org.elasticsearch.xpack.lucene.bwc.codecs.index;
+
+import org.apache.lucene.index.SortedSetDocValues;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.util.BytesRef;
+
+import java.io.IOException;
+
+/**
+ * A per-document set of presorted byte[] values.
+ * <p>
+ * Per-Document values in a SortedDocValues are deduplicated, dereferenced,
+ * and sorted into a dictionary of unique values. A pointer to the
+ * dictionary value (ordinal) can be retrieved for each document. Ordinals
+ * are dense and in increasing sorted order.
+ *
+ * @deprecated Use {@link SortedSetDocValues} instead.
+ */
+@Deprecated
+public abstract class LegacySortedSetDocValues {
+
+    /** Sole constructor. (For invocation by subclass
+     * constructors, typically implicit.) */
+    protected LegacySortedSetDocValues() {}
+
+    /** When returned by {@link #nextOrd()} it means there are no more
+     *  ordinals for the document.
+     */
+    public static final long NO_MORE_ORDS = -1;
+
+    /**
+     * Returns the next ordinal for the current document (previously
+     * set by {@link #setDocument(int)}.
+     * @return next ordinal for the document, or {@link #NO_MORE_ORDS}.
+     *         ordinals are dense, start at 0, then increment by 1 for
+     *         the next value in sorted order.
+     */
+    public abstract long nextOrd();
+
+    /**
+     * Sets iteration to the specified docID
+     * @param docID document ID
+     */
+    public abstract void setDocument(int docID);
+
+    /** Retrieves the value for the specified ordinal. The returned
+     * {@link BytesRef} may be re-used across calls to lookupOrd so make sure to
+     * {@link BytesRef#deepCopyOf(BytesRef) copy it} if you want to keep it
+     * around.
+     * @param ord ordinal to lookup
+     * @see #nextOrd
+     */
+    public abstract BytesRef lookupOrd(long ord);
+
+    /**
+     * Returns the number of unique values.
+     * @return number of unique values in this SortedDocValues. This is
+     *         also equivalent to one plus the maximum ordinal.
+     */
+    public abstract long getValueCount();
+
+    /** If {@code key} exists, returns its ordinal, else
+     *  returns {@code -insertionPoint-1}, like {@code
+     *  Arrays.binarySearch}.
+     *
+     *  @param key Key to look up
+     **/
+    public long lookupTerm(BytesRef key) {
+        long low = 0;
+        long high = getValueCount() - 1;
+
+        while (low <= high) {
+            long mid = (low + high) >>> 1;
+            final BytesRef term = lookupOrd(mid);
+            int cmp = term.compareTo(key);
+
+            if (cmp < 0) {
+                low = mid + 1;
+            } else if (cmp > 0) {
+                high = mid - 1;
+            } else {
+                return mid; // key found
+            }
+        }
+
+        return -(low + 1);  // key not found.
+    }
+
+    /**
+     * Returns a {@link TermsEnum} over the values.
+     * The enum supports {@link TermsEnum#ord()} and {@link TermsEnum#seekExact(long)}.
+     */
+    public TermsEnum termsEnum() throws IOException {
+        throw new UnsupportedOperationException();
+    }
+}

+ 115 - 0
x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/index/LegacySortedSetDocValuesWrapper.java

@@ -0,0 +1,115 @@
+/*
+ * @notice
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Modifications copyright (C) 2021 Elasticsearch B.V.
+ */
+package org.elasticsearch.xpack.lucene.bwc.codecs.index;
+
+import org.apache.lucene.index.SortedSetDocValues;
+import org.apache.lucene.util.BytesRef;
+
+import java.io.IOException;
+
+/**
+ * Wraps a {@link LegacySortedSetDocValues} into a {@link SortedSetDocValues}.
+ *
+ * @deprecated Implement {@link SortedSetDocValues} directly.
+ */
+@Deprecated
+public final class LegacySortedSetDocValuesWrapper extends SortedSetDocValues {
+    private final LegacySortedSetDocValues values;
+    private final int maxDoc;
+    private int docID = -1;
+    private long ord;
+
+    public LegacySortedSetDocValuesWrapper(LegacySortedSetDocValues values, int maxDoc) {
+        this.values = values;
+        this.maxDoc = maxDoc;
+    }
+
+    @Override
+    public int docID() {
+        return docID;
+    }
+
+    @Override
+    public int nextDoc() {
+        assert docID != NO_MORE_DOCS;
+        docID++;
+        while (docID < maxDoc) {
+            values.setDocument(docID);
+            ord = values.nextOrd();
+            if (ord != NO_MORE_ORDS) {
+                return docID;
+            }
+            docID++;
+        }
+        docID = NO_MORE_DOCS;
+        return NO_MORE_DOCS;
+    }
+
+    @Override
+    public int advance(int target) {
+        if (target < docID) {
+            throw new IllegalArgumentException("cannot advance backwards: docID=" + docID + " target=" + target);
+        }
+        if (target >= maxDoc) {
+            this.docID = NO_MORE_DOCS;
+        } else {
+            this.docID = target - 1;
+            nextDoc();
+        }
+        return docID;
+    }
+
+    @Override
+    public boolean advanceExact(int target) throws IOException {
+        docID = target;
+        values.setDocument(docID);
+        ord = values.nextOrd();
+        return ord != NO_MORE_ORDS;
+    }
+
+    @Override
+    public long cost() {
+        return 0;
+    }
+
+    @Override
+    public long nextOrd() {
+        long result = ord;
+        if (result != NO_MORE_ORDS) {
+            ord = values.nextOrd();
+        }
+        return result;
+    }
+
+    @Override
+    public BytesRef lookupOrd(long ord) {
+        return values.lookupOrd((int) ord);
+    }
+
+    @Override
+    public long getValueCount() {
+        return values.getValueCount();
+    }
+
+    @Override
+    public String toString() {
+        return "LegacySortedSetDocValuesWrapper(" + values + ")";
+    }
+}

+ 72 - 0
x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene54/LegacyStringHelper.java

@@ -0,0 +1,72 @@
+/*
+ * @notice
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Modifications copyright (C) 2021 Elasticsearch B.V.
+ */
+package org.elasticsearch.xpack.lucene.bwc.codecs.lucene54;
+
+import org.apache.lucene.util.BytesRef;
+
+/**
+ * Legacy methods for manipulating strings.
+ *
+ * @lucene.internal
+ * @deprecated This is only used for backwards compatibility codecs (they
+ * don't work with the Java9-based replacement methods).
+ */
+@Deprecated
+abstract class LegacyStringHelper {
+
+    /**
+     * Compares two {@link BytesRef}, element by element, and returns the
+     * number of elements common to both arrays (from the start of each).
+     *
+     * @param left The first {@link BytesRef} to compare
+     * @param right The second {@link BytesRef} to compare
+     * @return The number of common elements (from the start of each).
+     */
+    public static int bytesDifference(BytesRef left, BytesRef right) {
+        int len = left.length < right.length ? left.length : right.length;
+        final byte[] bytesLeft = left.bytes;
+        final int offLeft = left.offset;
+        byte[] bytesRight = right.bytes;
+        final int offRight = right.offset;
+        for (int i = 0; i < len; i++)
+            if (bytesLeft[i + offLeft] != bytesRight[i + offRight]) return i;
+        return len;
+    }
+
+    /**
+     * Returns the length of {@code currentTerm} needed for use as a sort key.
+     * so that {@link BytesRef#compareTo(BytesRef)} still returns the same result.
+     * This method assumes currentTerm comes after priorTerm.
+     */
+    public static int sortKeyLength(final BytesRef priorTerm, final BytesRef currentTerm) {
+        final int currentTermOffset = currentTerm.offset;
+        final int priorTermOffset = priorTerm.offset;
+        final int limit = Math.min(priorTerm.length, currentTerm.length);
+        for (int i = 0; i < limit; i++) {
+            if (priorTerm.bytes[priorTermOffset + i] != currentTerm.bytes[currentTermOffset + i]) {
+                return i + 1;
+            }
+        }
+        return Math.min(1 + priorTerm.length, currentTerm.length);
+    }
+
+    private LegacyStringHelper() {}
+
+}

+ 842 - 0
x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene54/Lucene54DocValuesConsumer.java

@@ -0,0 +1,842 @@
+/*
+ * @notice
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Modifications copyright (C) 2021 Elasticsearch B.V.
+ */
+package org.elasticsearch.xpack.lucene.bwc.codecs.lucene54;
+
+import org.apache.lucene.backward_codecs.packed.LegacyDirectMonotonicWriter;
+import org.apache.lucene.backward_codecs.packed.LegacyDirectWriter;
+import org.apache.lucene.backward_codecs.store.EndiannessReverserUtil;
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.codecs.DocValuesConsumer;
+import org.apache.lucene.codecs.DocValuesProducer;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.store.ByteBuffersDataOutput;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefBuilder;
+import org.apache.lucene.util.LongsRef;
+import org.apache.lucene.util.MathUtil;
+import org.apache.lucene.util.PagedBytes;
+import org.apache.lucene.util.packed.MonotonicBlockPackedWriter;
+import org.apache.lucene.util.packed.PackedInts;
+import org.elasticsearch.core.internal.io.IOUtils;
+import org.elasticsearch.xpack.lucene.bwc.codecs.index.LegacyDocValuesIterables;
+
+import java.io.Closeable;
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Set;
+import java.util.SortedSet;
+import java.util.TreeSet;
+import java.util.stream.StreamSupport;
+
+import static org.elasticsearch.xpack.lucene.bwc.codecs.lucene54.Lucene54DocValuesFormat.ALL_LIVE;
+import static org.elasticsearch.xpack.lucene.bwc.codecs.lucene54.Lucene54DocValuesFormat.ALL_MISSING;
+import static org.elasticsearch.xpack.lucene.bwc.codecs.lucene54.Lucene54DocValuesFormat.BINARY_FIXED_UNCOMPRESSED;
+import static org.elasticsearch.xpack.lucene.bwc.codecs.lucene54.Lucene54DocValuesFormat.BINARY_PREFIX_COMPRESSED;
+import static org.elasticsearch.xpack.lucene.bwc.codecs.lucene54.Lucene54DocValuesFormat.BINARY_VARIABLE_UNCOMPRESSED;
+import static org.elasticsearch.xpack.lucene.bwc.codecs.lucene54.Lucene54DocValuesFormat.CONST_COMPRESSED;
+import static org.elasticsearch.xpack.lucene.bwc.codecs.lucene54.Lucene54DocValuesFormat.DELTA_COMPRESSED;
+import static org.elasticsearch.xpack.lucene.bwc.codecs.lucene54.Lucene54DocValuesFormat.DIRECT_MONOTONIC_BLOCK_SHIFT;
+import static org.elasticsearch.xpack.lucene.bwc.codecs.lucene54.Lucene54DocValuesFormat.GCD_COMPRESSED;
+import static org.elasticsearch.xpack.lucene.bwc.codecs.lucene54.Lucene54DocValuesFormat.INTERVAL_COUNT;
+import static org.elasticsearch.xpack.lucene.bwc.codecs.lucene54.Lucene54DocValuesFormat.INTERVAL_MASK;
+import static org.elasticsearch.xpack.lucene.bwc.codecs.lucene54.Lucene54DocValuesFormat.INTERVAL_SHIFT;
+import static org.elasticsearch.xpack.lucene.bwc.codecs.lucene54.Lucene54DocValuesFormat.MONOTONIC_BLOCK_SIZE;
+import static org.elasticsearch.xpack.lucene.bwc.codecs.lucene54.Lucene54DocValuesFormat.MONOTONIC_COMPRESSED;
+import static org.elasticsearch.xpack.lucene.bwc.codecs.lucene54.Lucene54DocValuesFormat.REVERSE_INTERVAL_COUNT;
+import static org.elasticsearch.xpack.lucene.bwc.codecs.lucene54.Lucene54DocValuesFormat.REVERSE_INTERVAL_MASK;
+import static org.elasticsearch.xpack.lucene.bwc.codecs.lucene54.Lucene54DocValuesFormat.SORTED_SET_TABLE;
+import static org.elasticsearch.xpack.lucene.bwc.codecs.lucene54.Lucene54DocValuesFormat.SORTED_SINGLE_VALUED;
+import static org.elasticsearch.xpack.lucene.bwc.codecs.lucene54.Lucene54DocValuesFormat.SORTED_WITH_ADDRESSES;
+import static org.elasticsearch.xpack.lucene.bwc.codecs.lucene54.Lucene54DocValuesFormat.SPARSE_COMPRESSED;
+import static org.elasticsearch.xpack.lucene.bwc.codecs.lucene54.Lucene54DocValuesFormat.TABLE_COMPRESSED;
+
+/** writer for {@link Lucene54DocValuesFormat} */
+final class Lucene54DocValuesConsumer extends DocValuesConsumer implements Closeable {
+
+    enum NumberType {
+        /** Dense ordinals */
+        ORDINAL,
+        /** Random long values */
+        VALUE;
+    }
+
+    IndexOutput data, meta;
+    final int maxDoc;
+
+    /** expert: Creates a new writer */
+    Lucene54DocValuesConsumer(SegmentWriteState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension)
+        throws IOException {
+        boolean success = false;
+        try {
+            String dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension);
+            data = EndiannessReverserUtil.createOutput(state.directory, dataName, state.context);
+            CodecUtil.writeIndexHeader(
+                data,
+                dataCodec,
+                Lucene54DocValuesFormat.VERSION_CURRENT,
+                state.segmentInfo.getId(),
+                state.segmentSuffix
+            );
+            String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension);
+            meta = EndiannessReverserUtil.createOutput(state.directory, metaName, state.context);
+            CodecUtil.writeIndexHeader(
+                meta,
+                metaCodec,
+                Lucene54DocValuesFormat.VERSION_CURRENT,
+                state.segmentInfo.getId(),
+                state.segmentSuffix
+            );
+            maxDoc = state.segmentInfo.maxDoc();
+            success = true;
+        } finally {
+            if (success == false) {
+                IOUtils.closeWhileHandlingException(this);
+            }
+        }
+    }
+
+    @Override
+    public void addNumericField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
+        addNumericField(field, LegacyDocValuesIterables.numericIterable(field, valuesProducer, maxDoc), NumberType.VALUE);
+    }
+
+    void addNumericField(FieldInfo field, Iterable<Number> values, NumberType numberType) throws IOException {
+        long count = 0;
+        long minValue = Long.MAX_VALUE;
+        long maxValue = Long.MIN_VALUE;
+        long gcd = 0;
+        long missingCount = 0;
+        long zeroCount = 0;
+        // TODO: more efficient?
+        HashSet<Long> uniqueValues = null;
+        long missingOrdCount = 0;
+        if (numberType == NumberType.VALUE) {
+            uniqueValues = new HashSet<>();
+
+            for (Number nv : values) {
+                final long v;
+                if (nv == null) {
+                    v = 0;
+                    missingCount++;
+                    zeroCount++;
+                } else {
+                    v = nv.longValue();
+                    if (v == 0) {
+                        zeroCount++;
+                    }
+                }
+
+                if (gcd != 1) {
+                    if (v < Long.MIN_VALUE / 2 || v > Long.MAX_VALUE / 2) {
+                        // in that case v - minValue might overflow and make the GCD computation return
+                        // wrong results. Since these extreme values are unlikely, we just discard
+                        // GCD computation for them
+                        gcd = 1;
+                    } else if (count != 0) { // minValue needs to be set first
+                        gcd = MathUtil.gcd(gcd, v - minValue);
+                    }
+                }
+
+                minValue = Math.min(minValue, v);
+                maxValue = Math.max(maxValue, v);
+
+                if (uniqueValues != null) {
+                    if (uniqueValues.add(v)) {
+                        if (uniqueValues.size() > 256) {
+                            uniqueValues = null;
+                        }
+                    }
+                }
+
+                ++count;
+            }
+        } else {
+            for (Number nv : values) {
+                long v = nv.longValue();
+                if (v == -1L) {
+                    missingOrdCount++;
+                }
+                minValue = Math.min(minValue, v);
+                maxValue = Math.max(maxValue, v);
+                ++count;
+            }
+        }
+
+        final long delta = maxValue - minValue;
+        final int deltaBitsRequired = LegacyDirectWriter.unsignedBitsRequired(delta);
+        final int tableBitsRequired = uniqueValues == null ? Integer.MAX_VALUE : LegacyDirectWriter.bitsRequired(uniqueValues.size() - 1);
+
+        final boolean sparse; // 1% of docs or less have a value
+        switch (numberType) {
+            case VALUE:
+                sparse = (double) missingCount / count >= 0.99;
+                break;
+            case ORDINAL:
+                sparse = (double) missingOrdCount / count >= 0.99;
+                break;
+            default:
+                throw new AssertionError();
+        }
+
+        final int format;
+        if (uniqueValues != null
+            && count <= Integer.MAX_VALUE
+            && (uniqueValues.size() == 1 || (uniqueValues.size() == 2 && missingCount > 0 && zeroCount == missingCount))) {
+            // either one unique value C or two unique values: "missing" and C
+            format = CONST_COMPRESSED;
+        } else if (sparse && count >= 1024) {
+            // require at least 1024 docs to avoid flipping back and forth when doing NRT search
+            format = SPARSE_COMPRESSED;
+        } else if (uniqueValues != null && tableBitsRequired < deltaBitsRequired) {
+            format = TABLE_COMPRESSED;
+        } else if (gcd != 0 && gcd != 1) {
+            final long gcdDelta = (maxValue - minValue) / gcd;
+            final long gcdBitsRequired = LegacyDirectWriter.unsignedBitsRequired(gcdDelta);
+            format = gcdBitsRequired < deltaBitsRequired ? GCD_COMPRESSED : DELTA_COMPRESSED;
+        } else {
+            format = DELTA_COMPRESSED;
+        }
+        meta.writeVInt(field.number);
+        meta.writeByte(Lucene54DocValuesFormat.NUMERIC);
+        meta.writeVInt(format);
+        if (format == SPARSE_COMPRESSED) {
+            meta.writeLong(data.getFilePointer());
+            final long numDocsWithValue;
+            switch (numberType) {
+                case VALUE:
+                    numDocsWithValue = count - missingCount;
+                    break;
+                case ORDINAL:
+                    numDocsWithValue = count - missingOrdCount;
+                    break;
+                default:
+                    throw new AssertionError();
+            }
+            final long maxDoc = writeSparseMissingBitset(values, numberType, numDocsWithValue);
+            assert maxDoc == count;
+        } else if (missingCount == 0) {
+            meta.writeLong(ALL_LIVE);
+        } else if (missingCount == count) {
+            meta.writeLong(ALL_MISSING);
+        } else {
+            meta.writeLong(data.getFilePointer());
+            writeMissingBitset(values);
+        }
+        meta.writeLong(data.getFilePointer());
+        meta.writeVLong(count);
+
+        switch (format) {
+            case CONST_COMPRESSED:
+                // write the constant (nonzero value in the n=2 case, singleton value otherwise)
+                meta.writeLong(minValue < 0 ? Collections.min(uniqueValues) : Collections.max(uniqueValues));
+                break;
+            case GCD_COMPRESSED:
+                meta.writeLong(minValue);
+                meta.writeLong(gcd);
+                final long maxDelta = (maxValue - minValue) / gcd;
+                final int bits = LegacyDirectWriter.unsignedBitsRequired(maxDelta);
+                meta.writeVInt(bits);
+                final LegacyDirectWriter quotientWriter = LegacyDirectWriter.getInstance(data, count, bits);
+                for (Number nv : values) {
+                    long value = nv == null ? 0 : nv.longValue();
+                    quotientWriter.add((value - minValue) / gcd);
+                }
+                quotientWriter.finish();
+                break;
+            case DELTA_COMPRESSED:
+                final long minDelta = delta < 0 ? 0 : minValue;
+                meta.writeLong(minDelta);
+                meta.writeVInt(deltaBitsRequired);
+                final LegacyDirectWriter writer = LegacyDirectWriter.getInstance(data, count, deltaBitsRequired);
+                for (Number nv : values) {
+                    long v = nv == null ? 0 : nv.longValue();
+                    writer.add(v - minDelta);
+                }
+                writer.finish();
+                break;
+            case TABLE_COMPRESSED:
+                final Long[] decode = uniqueValues.toArray(new Long[uniqueValues.size()]);
+                Arrays.sort(decode);
+                final HashMap<Long, Integer> encode = new HashMap<>();
+                meta.writeVInt(decode.length);
+                for (int i = 0; i < decode.length; i++) {
+                    meta.writeLong(decode[i]);
+                    encode.put(decode[i], i);
+                }
+                meta.writeVInt(tableBitsRequired);
+                final LegacyDirectWriter ordsWriter = LegacyDirectWriter.getInstance(data, count, tableBitsRequired);
+                for (Number nv : values) {
+                    ordsWriter.add(encode.get(nv == null ? 0 : nv.longValue()));
+                }
+                ordsWriter.finish();
+                break;
+            case SPARSE_COMPRESSED:
+                final Iterable<Number> filteredMissingValues;
+                switch (numberType) {
+                    case VALUE:
+                        meta.writeByte((byte) 0);
+                        filteredMissingValues = new Iterable<Number>() {
+                            @Override
+                            public Iterator<Number> iterator() {
+                                return StreamSupport.stream(values.spliterator(), false).filter(value -> value != null).iterator();
+                            }
+                        };
+                        break;
+                    case ORDINAL:
+                        meta.writeByte((byte) 1);
+                        filteredMissingValues = new Iterable<Number>() {
+                            @Override
+                            public Iterator<Number> iterator() {
+                                return StreamSupport.stream(values.spliterator(), false)
+                                    .filter(value -> value.longValue() != -1L)
+                                    .iterator();
+                            }
+                        };
+                        break;
+                    default:
+                        throw new AssertionError();
+                }
+                // Write non-missing values as a numeric field
+                addNumericField(field, filteredMissingValues, numberType);
+                break;
+            default:
+                throw new AssertionError();
+        }
+        meta.writeLong(data.getFilePointer());
+    }
+
+    // TODO: in some cases representing missing with minValue-1 wouldn't take up additional space and so on,
+    // but this is very simple, and algorithms only check this for values of 0 anyway (doesnt slow down normal decode)
+    void writeMissingBitset(Iterable<?> values) throws IOException {
+        byte bits = 0;
+        int count = 0;
+        for (Object v : values) {
+            if (count == 8) {
+                data.writeByte(bits);
+                count = 0;
+                bits = 0;
+            }
+            if (v != null) {
+                bits |= 1 << (count & 7);
+            }
+            count++;
+        }
+        if (count > 0) {
+            data.writeByte(bits);
+        }
+    }
+
+    long writeSparseMissingBitset(Iterable<Number> values, NumberType numberType, long numDocsWithValue) throws IOException {
+        meta.writeVLong(numDocsWithValue);
+
+        // Write doc IDs that have a value
+        meta.writeVInt(DIRECT_MONOTONIC_BLOCK_SHIFT);
+        final LegacyDirectMonotonicWriter docIdsWriter = LegacyDirectMonotonicWriter.getInstance(
+            meta,
+            data,
+            numDocsWithValue,
+            DIRECT_MONOTONIC_BLOCK_SHIFT
+        );
+        long docID = 0;
+        for (Number nv : values) {
+            switch (numberType) {
+                case VALUE:
+                    if (nv != null) {
+                        docIdsWriter.add(docID);
+                    }
+                    break;
+                case ORDINAL:
+                    if (nv.longValue() != -1L) {
+                        docIdsWriter.add(docID);
+                    }
+                    break;
+                default:
+                    throw new AssertionError();
+            }
+            docID++;
+        }
+        docIdsWriter.finish();
+        return docID;
+    }
+
+    @Override
+    public void addBinaryField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
+        addBinaryField(field, LegacyDocValuesIterables.binaryIterable(field, valuesProducer, maxDoc));
+    }
+
+    private void addBinaryField(FieldInfo field, Iterable<BytesRef> values) throws IOException {
+        // write the byte[] data
+        meta.writeVInt(field.number);
+        meta.writeByte(Lucene54DocValuesFormat.BINARY);
+        int minLength = Integer.MAX_VALUE;
+        int maxLength = Integer.MIN_VALUE;
+        final long startFP = data.getFilePointer();
+        long count = 0;
+        long missingCount = 0;
+        for (BytesRef v : values) {
+            final int length;
+            if (v == null) {
+                length = 0;
+                missingCount++;
+            } else {
+                length = v.length;
+            }
+            minLength = Math.min(minLength, length);
+            maxLength = Math.max(maxLength, length);
+            if (v != null) {
+                data.writeBytes(v.bytes, v.offset, v.length);
+            }
+            count++;
+        }
+        meta.writeVInt(minLength == maxLength ? BINARY_FIXED_UNCOMPRESSED : BINARY_VARIABLE_UNCOMPRESSED);
+        if (missingCount == 0) {
+            meta.writeLong(ALL_LIVE);
+        } else if (missingCount == count) {
+            meta.writeLong(ALL_MISSING);
+        } else {
+            meta.writeLong(data.getFilePointer());
+            writeMissingBitset(values);
+        }
+        meta.writeVInt(minLength);
+        meta.writeVInt(maxLength);
+        meta.writeVLong(count);
+        meta.writeLong(startFP);
+
+        // if minLength == maxLength, it's a fixed-length byte[], we are done (the addresses are implicit)
+        // otherwise, we need to record the length fields...
+        if (minLength != maxLength) {
+            meta.writeLong(data.getFilePointer());
+            meta.writeVInt(DIRECT_MONOTONIC_BLOCK_SHIFT);
+
+            final LegacyDirectMonotonicWriter writer = LegacyDirectMonotonicWriter.getInstance(
+                meta,
+                data,
+                count + 1,
+                DIRECT_MONOTONIC_BLOCK_SHIFT
+            );
+            long addr = 0;
+            writer.add(addr);
+            for (BytesRef v : values) {
+                if (v != null) {
+                    addr += v.length;
+                }
+                writer.add(addr);
+            }
+            writer.finish();
+            meta.writeLong(data.getFilePointer());
+        }
+    }
+
+    /** expert: writes a value dictionary for a sorted/sortedset field */
+    private void addTermsDict(FieldInfo field, final Iterable<BytesRef> values) throws IOException {
+        // first check if it's a "fixed-length" terms dict, and compressibility if so
+        int minLength = Integer.MAX_VALUE;
+        int maxLength = Integer.MIN_VALUE;
+        long numValues = 0;
+        BytesRefBuilder previousValue = new BytesRefBuilder();
+        long prefixSum = 0; // only valid for fixed-width data, as we have a choice there
+        for (BytesRef v : values) {
+            minLength = Math.min(minLength, v.length);
+            maxLength = Math.max(maxLength, v.length);
+            if (minLength == maxLength) {
+                int termPosition = (int) (numValues & INTERVAL_MASK);
+                if (termPosition == 0) {
+                    // first term in block, save it away to compare against the last term later
+                    previousValue.copyBytes(v);
+                } else if (termPosition == INTERVAL_COUNT - 1) {
+                    // last term in block, accumulate shared prefix against first term
+                    prefixSum += LegacyStringHelper.bytesDifference(previousValue.get(), v);
+                }
+            }
+            numValues++;
+        }
+        // for fixed width data, look at the avg(shared prefix) before deciding how to encode:
+        // prefix compression "costs" worst case 2 bytes per term because we must store suffix lengths.
+        // so if we share at least 3 bytes on average, always compress.
+        if (minLength == maxLength && prefixSum <= 3 * (numValues >> INTERVAL_SHIFT)) {
+            // no index needed: not very compressible, direct addressing by mult
+            addBinaryField(field, values);
+        } else if (numValues < REVERSE_INTERVAL_COUNT) {
+            // low cardinality: waste a few KB of ram, but can't really use fancy index etc
+            addBinaryField(field, values);
+        } else {
+            assert numValues > 0; // we don't have to handle the empty case
+            // header
+            meta.writeVInt(field.number);
+            meta.writeByte(Lucene54DocValuesFormat.BINARY);
+            meta.writeVInt(BINARY_PREFIX_COMPRESSED);
+            meta.writeLong(-1L);
+            // now write the bytes: sharing prefixes within a block
+            final long startFP = data.getFilePointer();
+            // currently, we have to store the delta from expected for every 1/nth term
+            // we could avoid this, but it's not much and less overall RAM than the previous approach!
+            ByteBuffersDataOutput addressBuffer = new ByteBuffersDataOutput();
+            MonotonicBlockPackedWriter termAddresses = new MonotonicBlockPackedWriter(addressBuffer, MONOTONIC_BLOCK_SIZE);
+            // buffers up 16 terms
+            ByteBuffersDataOutput bytesBuffer = new ByteBuffersDataOutput();
+            // buffers up block header
+            ByteBuffersDataOutput headerBuffer = new ByteBuffersDataOutput();
+            BytesRefBuilder lastTerm = new BytesRefBuilder();
+            lastTerm.grow(maxLength);
+            long count = 0;
+            int suffixDeltas[] = new int[INTERVAL_COUNT];
+            for (BytesRef v : values) {
+                int termPosition = (int) (count & INTERVAL_MASK);
+                if (termPosition == 0) {
+                    termAddresses.add(data.getFilePointer() - startFP);
+                    // abs-encode first term
+                    headerBuffer.writeVInt(v.length);
+                    headerBuffer.writeBytes(v.bytes, v.offset, v.length);
+                    lastTerm.copyBytes(v);
+                } else {
+                    // prefix-code: we only share at most 255 characters, to encode the length as a single
+                    // byte and have random access. Larger terms just get less compression.
+                    int sharedPrefix = Math.min(255, LegacyStringHelper.bytesDifference(lastTerm.get(), v));
+                    bytesBuffer.writeByte((byte) sharedPrefix);
+                    bytesBuffer.writeBytes(v.bytes, v.offset + sharedPrefix, v.length - sharedPrefix);
+                    // we can encode one smaller, because terms are unique.
+                    suffixDeltas[termPosition] = v.length - sharedPrefix - 1;
+                }
+
+                count++;
+                // flush block
+                if ((count & INTERVAL_MASK) == 0) {
+                    flushTermsDictBlock(headerBuffer, bytesBuffer, suffixDeltas);
+                }
+            }
+            // flush trailing crap
+            int leftover = (int) (count & INTERVAL_MASK);
+            if (leftover > 0) {
+                Arrays.fill(suffixDeltas, leftover, suffixDeltas.length, 0);
+                flushTermsDictBlock(headerBuffer, bytesBuffer, suffixDeltas);
+            }
+            final long indexStartFP = data.getFilePointer();
+            // write addresses of indexed terms
+            termAddresses.finish();
+            addressBuffer.copyTo(data);
+            addressBuffer = null;
+            termAddresses = null;
+            meta.writeVInt(minLength);
+            meta.writeVInt(maxLength);
+            meta.writeVLong(count);
+            meta.writeLong(startFP);
+            meta.writeLong(indexStartFP);
+            meta.writeVInt(PackedInts.VERSION_MONOTONIC_WITHOUT_ZIGZAG);
+            meta.writeVInt(MONOTONIC_BLOCK_SIZE);
+            addReverseTermIndex(field, values, maxLength);
+        }
+    }
+
+    // writes term dictionary "block"
+    // first term is absolute encoded as vint length + bytes.
+    // lengths of subsequent N terms are encoded as either N bytes or N shorts.
+    // in the double-byte case, the first byte is indicated with -1.
+    // subsequent terms are encoded as byte suffixLength + bytes.
+    private void flushTermsDictBlock(ByteBuffersDataOutput headerBuffer, ByteBuffersDataOutput bytesBuffer, int suffixDeltas[])
+        throws IOException {
+        boolean twoByte = false;
+        for (int i = 1; i < suffixDeltas.length; i++) {
+            if (suffixDeltas[i] > 254) {
+                twoByte = true;
+            }
+        }
+        if (twoByte) {
+            headerBuffer.writeByte((byte) 255);
+            for (int i = 1; i < suffixDeltas.length; i++) {
+                headerBuffer.writeShort((short) suffixDeltas[i]);
+            }
+        } else {
+            for (int i = 1; i < suffixDeltas.length; i++) {
+                headerBuffer.writeByte((byte) suffixDeltas[i]);
+            }
+        }
+        headerBuffer.copyTo(data);
+        headerBuffer.reset();
+        bytesBuffer.copyTo(data);
+        bytesBuffer.reset();
+    }
+
+    // writes reverse term index: used for binary searching a term into a range of 64 blocks
+    // for every 64 blocks (1024 terms) we store a term, trimming any suffix unnecessary for comparison
+    // terms are written as a contiguous byte[], but never spanning 2^15 byte boundaries.
+    private void addReverseTermIndex(FieldInfo field, final Iterable<BytesRef> values, int maxLength) throws IOException {
+        long count = 0;
+        BytesRefBuilder priorTerm = new BytesRefBuilder();
+        priorTerm.grow(maxLength);
+        BytesRef indexTerm = new BytesRef();
+        long startFP = data.getFilePointer();
+        PagedBytes pagedBytes = new PagedBytes(15);
+        MonotonicBlockPackedWriter addresses = new MonotonicBlockPackedWriter(data, MONOTONIC_BLOCK_SIZE);
+
+        for (BytesRef b : values) {
+            int termPosition = (int) (count & REVERSE_INTERVAL_MASK);
+            if (termPosition == 0) {
+                int len = LegacyStringHelper.sortKeyLength(priorTerm.get(), b);
+                indexTerm.bytes = b.bytes;
+                indexTerm.offset = b.offset;
+                indexTerm.length = len;
+                addresses.add(pagedBytes.copyUsingLengthPrefix(indexTerm));
+            } else if (termPosition == REVERSE_INTERVAL_MASK) {
+                priorTerm.copyBytes(b);
+            }
+            count++;
+        }
+        addresses.finish();
+        long numBytes = pagedBytes.getPointer();
+        pagedBytes.freeze(true);
+        PagedBytes.PagedBytesDataInput in = pagedBytes.getDataInput();
+        meta.writeLong(startFP);
+        data.writeVLong(numBytes);
+        data.copyBytes(in, numBytes);
+    }
+
+    @Override
+    public void addSortedField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
+        meta.writeVInt(field.number);
+        meta.writeByte(Lucene54DocValuesFormat.SORTED);
+        addTermsDict(field, LegacyDocValuesIterables.valuesIterable(valuesProducer.getSorted(field)));
+        addNumericField(field, LegacyDocValuesIterables.sortedOrdIterable(valuesProducer, field, maxDoc), NumberType.ORDINAL);
+    }
+
+    private void addSortedField(FieldInfo field, Iterable<BytesRef> values, Iterable<Number> ords) throws IOException {
+        meta.writeVInt(field.number);
+        meta.writeByte(Lucene54DocValuesFormat.SORTED);
+        addTermsDict(field, values);
+        addNumericField(field, ords, NumberType.ORDINAL);
+    }
+
+    @Override
+    public void addSortedNumericField(FieldInfo field, final DocValuesProducer valuesProducer) throws IOException {
+
+        final Iterable<Number> docToValueCount = LegacyDocValuesIterables.sortedNumericToDocCount(valuesProducer, field, maxDoc);
+        final Iterable<Number> values = LegacyDocValuesIterables.sortedNumericToValues(valuesProducer, field);
+
+        meta.writeVInt(field.number);
+        meta.writeByte(Lucene54DocValuesFormat.SORTED_NUMERIC);
+        if (isSingleValued(docToValueCount)) {
+            meta.writeVInt(SORTED_SINGLE_VALUED);
+            // The field is single-valued, we can encode it as NUMERIC
+            addNumericField(field, singletonView(docToValueCount, values, null), NumberType.VALUE);
+        } else {
+            final SortedSet<LongsRef> uniqueValueSets = uniqueValueSets(docToValueCount, values);
+            if (uniqueValueSets != null) {
+                meta.writeVInt(SORTED_SET_TABLE);
+
+                // write the set_id -> values mapping
+                writeDictionary(uniqueValueSets);
+
+                // write the doc -> set_id as a numeric field
+                addNumericField(field, docToSetId(uniqueValueSets, docToValueCount, values), NumberType.ORDINAL);
+            } else {
+                meta.writeVInt(SORTED_WITH_ADDRESSES);
+                // write the stream of values as a numeric field
+                addNumericField(field, values, NumberType.VALUE);
+                // write the doc -> ord count as a absolute index to the stream
+                addOrdIndex(field, docToValueCount);
+            }
+        }
+    }
+
+    @Override
+    public void addSortedSetField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
+
+        Iterable<BytesRef> values = LegacyDocValuesIterables.valuesIterable(valuesProducer.getSortedSet(field));
+        Iterable<Number> docToOrdCount = LegacyDocValuesIterables.sortedSetOrdCountIterable(valuesProducer, field, maxDoc);
+        Iterable<Number> ords = LegacyDocValuesIterables.sortedSetOrdsIterable(valuesProducer, field);
+
+        meta.writeVInt(field.number);
+        meta.writeByte(Lucene54DocValuesFormat.SORTED_SET);
+
+        if (isSingleValued(docToOrdCount)) {
+            meta.writeVInt(SORTED_SINGLE_VALUED);
+            // The field is single-valued, we can encode it as SORTED
+            addSortedField(field, values, singletonView(docToOrdCount, ords, -1L));
+        } else {
+            final SortedSet<LongsRef> uniqueValueSets = uniqueValueSets(docToOrdCount, ords);
+            if (uniqueValueSets != null) {
+                meta.writeVInt(SORTED_SET_TABLE);
+
+                // write the set_id -> ords mapping
+                writeDictionary(uniqueValueSets);
+
+                // write the ord -> byte[] as a binary field
+                addTermsDict(field, values);
+
+                // write the doc -> set_id as a numeric field
+                addNumericField(field, docToSetId(uniqueValueSets, docToOrdCount, ords), NumberType.ORDINAL);
+            } else {
+                meta.writeVInt(SORTED_WITH_ADDRESSES);
+
+                // write the ord -> byte[] as a binary field
+                addTermsDict(field, values);
+
+                // write the stream of ords as a numeric field
+                // NOTE: we could return an iterator that delta-encodes these within a doc
+                addNumericField(field, ords, NumberType.ORDINAL);
+
+                // write the doc -> ord count as a absolute index to the stream
+                addOrdIndex(field, docToOrdCount);
+            }
+        }
+    }
+
+    private SortedSet<LongsRef> uniqueValueSets(Iterable<Number> docToValueCount, Iterable<Number> values) {
+        Set<LongsRef> uniqueValueSet = new HashSet<>();
+        LongsRef docValues = new LongsRef(256);
+
+        Iterator<Number> valueCountIterator = docToValueCount.iterator();
+        Iterator<Number> valueIterator = values.iterator();
+        int totalDictSize = 0;
+        while (valueCountIterator.hasNext()) {
+            docValues.length = valueCountIterator.next().intValue();
+            if (docValues.length > 256) {
+                return null;
+            }
+            for (int i = 0; i < docValues.length; ++i) {
+                docValues.longs[i] = valueIterator.next().longValue();
+            }
+            if (uniqueValueSet.contains(docValues)) {
+                continue;
+            }
+            totalDictSize += docValues.length;
+            if (totalDictSize > 256) {
+                return null;
+            }
+            uniqueValueSet.add(new LongsRef(ArrayUtil.copyOfSubArray(docValues.longs, 0, docValues.length), 0, docValues.length));
+        }
+        assert valueIterator.hasNext() == false;
+        return new TreeSet<>(uniqueValueSet);
+    }
+
+    private void writeDictionary(SortedSet<LongsRef> uniqueValueSets) throws IOException {
+        int lengthSum = 0;
+        for (LongsRef longs : uniqueValueSets) {
+            lengthSum += longs.length;
+        }
+
+        meta.writeInt(lengthSum);
+        for (LongsRef valueSet : uniqueValueSets) {
+            for (int i = 0; i < valueSet.length; ++i) {
+                meta.writeLong(valueSet.longs[valueSet.offset + i]);
+            }
+        }
+
+        meta.writeInt(uniqueValueSets.size());
+        for (LongsRef valueSet : uniqueValueSets) {
+            meta.writeInt(valueSet.length);
+        }
+    }
+
+    private Iterable<Number> docToSetId(SortedSet<LongsRef> uniqueValueSets, Iterable<Number> docToValueCount, Iterable<Number> values) {
+        final Map<LongsRef, Integer> setIds = new HashMap<>();
+        int i = 0;
+        for (LongsRef set : uniqueValueSets) {
+            setIds.put(set, i++);
+        }
+        assert i == uniqueValueSets.size();
+
+        return new Iterable<Number>() {
+
+            @Override
+            public Iterator<Number> iterator() {
+                final Iterator<Number> valueCountIterator = docToValueCount.iterator();
+                final Iterator<Number> valueIterator = values.iterator();
+                final LongsRef docValues = new LongsRef(256);
+                return new Iterator<Number>() {
+
+                    @Override
+                    public boolean hasNext() {
+                        return valueCountIterator.hasNext();
+                    }
+
+                    @Override
+                    public Number next() {
+                        docValues.length = valueCountIterator.next().intValue();
+                        for (int i = 0; i < docValues.length; ++i) {
+                            docValues.longs[i] = valueIterator.next().longValue();
+                        }
+                        final Integer id = setIds.get(docValues);
+                        assert id != null;
+                        return id;
+                    }
+
+                };
+
+            }
+        };
+    }
+
+    // writes addressing information as MONOTONIC_COMPRESSED integer
+    private void addOrdIndex(FieldInfo field, Iterable<Number> values) throws IOException {
+        meta.writeVInt(field.number);
+        meta.writeByte(Lucene54DocValuesFormat.NUMERIC);
+        meta.writeVInt(MONOTONIC_COMPRESSED);
+        meta.writeLong(-1L);
+        meta.writeLong(data.getFilePointer());
+        meta.writeVLong(maxDoc);
+        meta.writeVInt(DIRECT_MONOTONIC_BLOCK_SHIFT);
+
+        final LegacyDirectMonotonicWriter writer = LegacyDirectMonotonicWriter.getInstance(
+            meta,
+            data,
+            maxDoc + 1,
+            DIRECT_MONOTONIC_BLOCK_SHIFT
+        );
+        long addr = 0;
+        writer.add(addr);
+        for (Number v : values) {
+            addr += v.longValue();
+            writer.add(addr);
+        }
+        writer.finish();
+        meta.writeLong(data.getFilePointer());
+    }
+
+    @Override
+    public void close() throws IOException {
+        boolean success = false;
+        try {
+            if (meta != null) {
+                meta.writeVInt(-1); // write EOF marker
+                CodecUtil.writeFooter(meta); // write checksum
+            }
+            if (data != null) {
+                CodecUtil.writeFooter(data); // write checksum
+            }
+            success = true;
+        } finally {
+            if (success) {
+                IOUtils.close(data, meta);
+            } else {
+                IOUtils.closeWhileHandlingException(data, meta);
+            }
+            meta = data = null;
+        }
+    }
+}

+ 119 - 0
x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene54/Lucene54DocValuesFormat.java

@@ -0,0 +1,119 @@
+/*
+ * @notice
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Modifications copyright (C) 2021 Elasticsearch B.V.
+ */
+package org.elasticsearch.xpack.lucene.bwc.codecs.lucene54;
+
+import org.apache.lucene.codecs.DocValuesConsumer;
+import org.apache.lucene.codecs.DocValuesFormat;
+import org.apache.lucene.codecs.DocValuesProducer;
+import org.apache.lucene.index.SegmentReadState;
+import org.apache.lucene.index.SegmentWriteState;
+
+import java.io.IOException;
+
+/**
+ * Lucene 5.4 DocValues format.
+ * @deprecated Only for reading old 6.0+ segments
+ */
+@Deprecated
+public final class Lucene54DocValuesFormat extends DocValuesFormat {
+
+    /** Sole Constructor */
+    public Lucene54DocValuesFormat() {
+        super("Lucene54");
+    }
+
+    @Override
+    public DocValuesConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
+        return new Lucene54DocValuesConsumer(state, DATA_CODEC, DATA_EXTENSION, META_CODEC, META_EXTENSION);
+    }
+
+    @Override
+    public DocValuesProducer fieldsProducer(SegmentReadState state) throws IOException {
+        return new Lucene54DocValuesProducer(state, DATA_CODEC, DATA_EXTENSION, META_CODEC, META_EXTENSION);
+    }
+
+    static final String DATA_CODEC = "Lucene54DocValuesData";
+    static final String DATA_EXTENSION = "dvd";
+    static final String META_CODEC = "Lucene54DocValuesMetadata";
+    static final String META_EXTENSION = "dvm";
+    static final int VERSION_START = 0;
+    static final int VERSION_CURRENT = VERSION_START;
+
+    // indicates docvalues type
+    static final byte NUMERIC = 0;
+    static final byte BINARY = 1;
+    static final byte SORTED = 2;
+    static final byte SORTED_SET = 3;
+    static final byte SORTED_NUMERIC = 4;
+
+    // address terms in blocks of 16 terms
+    static final int INTERVAL_SHIFT = 4;
+    static final int INTERVAL_COUNT = 1 << INTERVAL_SHIFT;
+    static final int INTERVAL_MASK = INTERVAL_COUNT - 1;
+
+    // build reverse index from every 1024th term
+    static final int REVERSE_INTERVAL_SHIFT = 10;
+    static final int REVERSE_INTERVAL_COUNT = 1 << REVERSE_INTERVAL_SHIFT;
+    static final int REVERSE_INTERVAL_MASK = REVERSE_INTERVAL_COUNT - 1;
+
+    // for conversion from reverse index to block
+    static final int BLOCK_INTERVAL_SHIFT = REVERSE_INTERVAL_SHIFT - INTERVAL_SHIFT;
+    static final int BLOCK_INTERVAL_COUNT = 1 << BLOCK_INTERVAL_SHIFT;
+    static final int BLOCK_INTERVAL_MASK = BLOCK_INTERVAL_COUNT - 1;
+
+    /** Compressed using packed blocks of ints. */
+    static final int DELTA_COMPRESSED = 0;
+    /** Compressed by computing the GCD. */
+    static final int GCD_COMPRESSED = 1;
+    /** Compressed by giving IDs to unique values. */
+    static final int TABLE_COMPRESSED = 2;
+    /** Compressed with monotonically increasing values */
+    static final int MONOTONIC_COMPRESSED = 3;
+    /** Compressed with constant value (uses only missing bitset) */
+    static final int CONST_COMPRESSED = 4;
+    /** Compressed with sparse arrays. */
+    static final int SPARSE_COMPRESSED = 5;
+
+    /** Uncompressed binary, written directly (fixed length). */
+    static final int BINARY_FIXED_UNCOMPRESSED = 0;
+    /** Uncompressed binary, written directly (variable length). */
+    static final int BINARY_VARIABLE_UNCOMPRESSED = 1;
+    /** Compressed binary with shared prefixes */
+    static final int BINARY_PREFIX_COMPRESSED = 2;
+
+    /** Standard storage for sorted set values with 1 level of indirection:
+     *  {@code docId -> address -> ord}. */
+    static final int SORTED_WITH_ADDRESSES = 0;
+    /** Single-valued sorted set values, encoded as sorted values, so no level
+     *  of indirection: {@code docId -> ord}. */
+    static final int SORTED_SINGLE_VALUED = 1;
+    /** Compressed giving IDs to unique sets of values:
+     * {@code docId -> setId -> ords} */
+    static final int SORTED_SET_TABLE = 2;
+
+    /** placeholder for missing offset that means there are no missing values */
+    static final int ALL_LIVE = -1;
+    /** placeholder for missing offset that means all values are missing */
+    static final int ALL_MISSING = -2;
+
+    // addressing uses 16k blocks
+    static final int MONOTONIC_BLOCK_SIZE = 16384;
+    static final int DIRECT_MONOTONIC_BLOCK_SHIFT = 16;
+}

+ 1847 - 0
x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene54/Lucene54DocValuesProducer.java

@@ -0,0 +1,1847 @@
+/*
+ * @notice
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Modifications copyright (C) 2021 Elasticsearch B.V.
+ */
+package org.elasticsearch.xpack.lucene.bwc.codecs.lucene54;
+
+import org.apache.lucene.backward_codecs.packed.LegacyDirectMonotonicReader;
+import org.apache.lucene.backward_codecs.packed.LegacyDirectReader;
+import org.apache.lucene.backward_codecs.store.EndiannessReverserUtil;
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.codecs.DocValuesProducer;
+import org.apache.lucene.index.BaseTermsEnum;
+import org.apache.lucene.index.BinaryDocValues;
+import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.DocValues;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.index.ImpactsEnum;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.NumericDocValues;
+import org.apache.lucene.index.PostingsEnum;
+import org.apache.lucene.index.SegmentReadState;
+import org.apache.lucene.index.SortedDocValues;
+import org.apache.lucene.index.SortedNumericDocValues;
+import org.apache.lucene.index.SortedSetDocValues;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.store.ChecksumIndexInput;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.RandomAccessInput;
+import org.apache.lucene.util.Accountable;
+import org.apache.lucene.util.Accountables;
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.LongValues;
+import org.apache.lucene.util.PagedBytes;
+import org.apache.lucene.util.RamUsageEstimator;
+import org.apache.lucene.util.packed.MonotonicBlockPackedReader;
+import org.elasticsearch.core.internal.io.IOUtils;
+import org.elasticsearch.xpack.lucene.bwc.codecs.index.LegacyBinaryDocValues;
+import org.elasticsearch.xpack.lucene.bwc.codecs.index.LegacyBinaryDocValuesWrapper;
+import org.elasticsearch.xpack.lucene.bwc.codecs.index.LegacySortedSetDocValues;
+import org.elasticsearch.xpack.lucene.bwc.codecs.index.LegacySortedSetDocValuesWrapper;
+
+import java.io.Closeable;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.atomic.AtomicLong;
+
+import static org.elasticsearch.xpack.lucene.bwc.codecs.lucene54.Lucene54DocValuesConsumer.NumberType.ORDINAL;
+import static org.elasticsearch.xpack.lucene.bwc.codecs.lucene54.Lucene54DocValuesConsumer.NumberType.VALUE;
+
+/** reader for {@link Lucene54DocValuesFormat} */
+final class Lucene54DocValuesProducer extends DocValuesProducer implements Closeable {
+    private final Map<String, NumericEntry> numerics = new HashMap<>();
+    private final Map<String, BinaryEntry> binaries = new HashMap<>();
+    private final Map<String, SortedSetEntry> sortedSets = new HashMap<>();
+    private final Map<String, SortedSetEntry> sortedNumerics = new HashMap<>();
+    private final Map<String, NumericEntry> ords = new HashMap<>();
+    private final Map<String, NumericEntry> ordIndexes = new HashMap<>();
+    private final int numFields;
+    private final AtomicLong ramBytesUsed;
+    private final IndexInput data;
+    private final int maxDoc;
+
+    // memory-resident structures
+    private final Map<String, MonotonicBlockPackedReader> addressInstances = new HashMap<>();
+    private final Map<String, ReverseTermsIndex> reverseIndexInstances = new HashMap<>();
+    private final Map<String, LegacyDirectMonotonicReader.Meta> directAddressesMeta = new HashMap<>();
+
+    private final boolean merging;
+
+    // clone for merge: when merging we don't do any instances.put()s
+    Lucene54DocValuesProducer(Lucene54DocValuesProducer original) {
+        assert Thread.holdsLock(original);
+        numerics.putAll(original.numerics);
+        binaries.putAll(original.binaries);
+        sortedSets.putAll(original.sortedSets);
+        sortedNumerics.putAll(original.sortedNumerics);
+        ords.putAll(original.ords);
+        ordIndexes.putAll(original.ordIndexes);
+        numFields = original.numFields;
+        ramBytesUsed = new AtomicLong(original.ramBytesUsed.get());
+        data = original.data.clone();
+        maxDoc = original.maxDoc;
+
+        addressInstances.putAll(original.addressInstances);
+        reverseIndexInstances.putAll(original.reverseIndexInstances);
+        merging = true;
+    }
+
+    /** expert: instantiates a new reader */
+    Lucene54DocValuesProducer(SegmentReadState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension)
+        throws IOException {
+        String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension);
+        this.maxDoc = state.segmentInfo.maxDoc();
+        merging = false;
+        ramBytesUsed = new AtomicLong(RamUsageEstimator.shallowSizeOfInstance(getClass()));
+
+        int version = -1;
+        int numFields = -1;
+
+        // read in the entries from the metadata file.
+        try (ChecksumIndexInput in = EndiannessReverserUtil.openChecksumInput(state.directory, metaName, state.context)) {
+            Throwable priorE = null;
+            try {
+                version = CodecUtil.checkIndexHeader(
+                    in,
+                    metaCodec,
+                    Lucene54DocValuesFormat.VERSION_START,
+                    Lucene54DocValuesFormat.VERSION_CURRENT,
+                    state.segmentInfo.getId(),
+                    state.segmentSuffix
+                );
+                numFields = readFields(in, state.fieldInfos);
+            } catch (Throwable exception) {
+                priorE = exception;
+            } finally {
+                CodecUtil.checkFooter(in, priorE);
+            }
+        }
+
+        this.numFields = numFields;
+        String dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension);
+        this.data = EndiannessReverserUtil.openInput(state.directory, dataName, state.context);
+        boolean success = false;
+        try {
+            final int version2 = CodecUtil.checkIndexHeader(
+                data,
+                dataCodec,
+                Lucene54DocValuesFormat.VERSION_START,
+                Lucene54DocValuesFormat.VERSION_CURRENT,
+                state.segmentInfo.getId(),
+                state.segmentSuffix
+            );
+            if (version != version2) {
+                throw new CorruptIndexException("Format versions mismatch: meta=" + version + ", data=" + version2, data);
+            }
+
+            // NOTE: data file is too costly to verify checksum against all the bytes on open,
+            // but for now we at least verify proper structure of the checksum footer: which looks
+            // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
+            // such as file truncation.
+            CodecUtil.retrieveChecksum(data);
+
+            success = true;
+        } finally {
+            if (success == false) {
+                IOUtils.closeWhileHandlingException(this.data);
+            }
+        }
+    }
+
+    private void readSortedField(FieldInfo info, IndexInput meta) throws IOException {
+        // sorted = binary + numeric
+        if (meta.readVInt() != info.number) {
+            throw new CorruptIndexException("sorted entry for field: " + info.name + " is corrupt", meta);
+        }
+        if (meta.readByte() != Lucene54DocValuesFormat.BINARY) {
+            throw new CorruptIndexException("sorted entry for field: " + info.name + " is corrupt", meta);
+        }
+        BinaryEntry b = readBinaryEntry(info, meta);
+        binaries.put(info.name, b);
+
+        if (meta.readVInt() != info.number) {
+            throw new CorruptIndexException("sorted entry for field: " + info.name + " is corrupt", meta);
+        }
+        if (meta.readByte() != Lucene54DocValuesFormat.NUMERIC) {
+            throw new CorruptIndexException("sorted entry for field: " + info.name + " is corrupt", meta);
+        }
+        NumericEntry n = readNumericEntry(info, meta);
+        ords.put(info.name, n);
+    }
+
+    private void readSortedSetFieldWithAddresses(FieldInfo info, IndexInput meta) throws IOException {
+        // sortedset = binary + numeric (addresses) + ordIndex
+        if (meta.readVInt() != info.number) {
+            throw new CorruptIndexException("sortedset entry for field: " + info.name + " is corrupt", meta);
+        }
+        if (meta.readByte() != Lucene54DocValuesFormat.BINARY) {
+            throw new CorruptIndexException("sortedset entry for field: " + info.name + " is corrupt", meta);
+        }
+        BinaryEntry b = readBinaryEntry(info, meta);
+        binaries.put(info.name, b);
+
+        if (meta.readVInt() != info.number) {
+            throw new CorruptIndexException("sortedset entry for field: " + info.name + " is corrupt", meta);
+        }
+        if (meta.readByte() != Lucene54DocValuesFormat.NUMERIC) {
+            throw new CorruptIndexException("sortedset entry for field: " + info.name + " is corrupt", meta);
+        }
+        NumericEntry n1 = readNumericEntry(info, meta);
+        ords.put(info.name, n1);
+
+        if (meta.readVInt() != info.number) {
+            throw new CorruptIndexException("sortedset entry for field: " + info.name + " is corrupt", meta);
+        }
+        if (meta.readByte() != Lucene54DocValuesFormat.NUMERIC) {
+            throw new CorruptIndexException("sortedset entry for field: " + info.name + " is corrupt", meta);
+        }
+        NumericEntry n2 = readNumericEntry(info, meta);
+        ordIndexes.put(info.name, n2);
+    }
+
+    private void readSortedSetFieldWithTable(FieldInfo info, IndexInput meta) throws IOException {
+        // sortedset table = binary + ordset table + ordset index
+        if (meta.readVInt() != info.number) {
+            throw new CorruptIndexException("sortedset entry for field: " + info.name + " is corrupt", meta);
+        }
+        if (meta.readByte() != Lucene54DocValuesFormat.BINARY) {
+            throw new CorruptIndexException("sortedset entry for field: " + info.name + " is corrupt", meta);
+        }
+
+        BinaryEntry b = readBinaryEntry(info, meta);
+        binaries.put(info.name, b);
+
+        if (meta.readVInt() != info.number) {
+            throw new CorruptIndexException("sortedset entry for field: " + info.name + " is corrupt", meta);
+        }
+        if (meta.readByte() != Lucene54DocValuesFormat.NUMERIC) {
+            throw new CorruptIndexException("sortedset entry for field: " + info.name + " is corrupt", meta);
+        }
+        NumericEntry n = readNumericEntry(info, meta);
+        ords.put(info.name, n);
+    }
+
+    private int readFields(IndexInput meta, FieldInfos infos) throws IOException {
+        int numFields = 0;
+        int fieldNumber = meta.readVInt();
+        while (fieldNumber != -1) {
+            numFields++;
+            FieldInfo info = infos.fieldInfo(fieldNumber);
+            if (info == null) {
+                // trickier to validate more: because we use multiple entries for "composite" types like sortedset, etc.
+                throw new CorruptIndexException("Invalid field number: " + fieldNumber, meta);
+            }
+            byte type = meta.readByte();
+            if (type == Lucene54DocValuesFormat.NUMERIC) {
+                numerics.put(info.name, readNumericEntry(info, meta));
+            } else if (type == Lucene54DocValuesFormat.BINARY) {
+                BinaryEntry b = readBinaryEntry(info, meta);
+                binaries.put(info.name, b);
+            } else if (type == Lucene54DocValuesFormat.SORTED) {
+                readSortedField(info, meta);
+            } else if (type == Lucene54DocValuesFormat.SORTED_SET) {
+                SortedSetEntry ss = readSortedSetEntry(meta);
+                sortedSets.put(info.name, ss);
+                if (ss.format == Lucene54DocValuesFormat.SORTED_WITH_ADDRESSES) {
+                    readSortedSetFieldWithAddresses(info, meta);
+                } else if (ss.format == Lucene54DocValuesFormat.SORTED_SET_TABLE) {
+                    readSortedSetFieldWithTable(info, meta);
+                } else if (ss.format == Lucene54DocValuesFormat.SORTED_SINGLE_VALUED) {
+                    if (meta.readVInt() != fieldNumber) {
+                        throw new CorruptIndexException("sortedset entry for field: " + info.name + " is corrupt", meta);
+                    }
+                    if (meta.readByte() != Lucene54DocValuesFormat.SORTED) {
+                        throw new CorruptIndexException("sortedset entry for field: " + info.name + " is corrupt", meta);
+                    }
+                    readSortedField(info, meta);
+                } else {
+                    throw new AssertionError();
+                }
+            } else if (type == Lucene54DocValuesFormat.SORTED_NUMERIC) {
+                SortedSetEntry ss = readSortedSetEntry(meta);
+                sortedNumerics.put(info.name, ss);
+                if (ss.format == Lucene54DocValuesFormat.SORTED_WITH_ADDRESSES) {
+                    if (meta.readVInt() != fieldNumber) {
+                        throw new CorruptIndexException("sortednumeric entry for field: " + info.name + " is corrupt", meta);
+                    }
+                    if (meta.readByte() != Lucene54DocValuesFormat.NUMERIC) {
+                        throw new CorruptIndexException("sortednumeric entry for field: " + info.name + " is corrupt", meta);
+                    }
+                    numerics.put(info.name, readNumericEntry(info, meta));
+                    if (meta.readVInt() != fieldNumber) {
+                        throw new CorruptIndexException("sortednumeric entry for field: " + info.name + " is corrupt", meta);
+                    }
+                    if (meta.readByte() != Lucene54DocValuesFormat.NUMERIC) {
+                        throw new CorruptIndexException("sortednumeric entry for field: " + info.name + " is corrupt", meta);
+                    }
+                    NumericEntry ordIndex = readNumericEntry(info, meta);
+                    ordIndexes.put(info.name, ordIndex);
+                } else if (ss.format == Lucene54DocValuesFormat.SORTED_SET_TABLE) {
+                    if (meta.readVInt() != info.number) {
+                        throw new CorruptIndexException("sortednumeric entry for field: " + info.name + " is corrupt", meta);
+                    }
+                    if (meta.readByte() != Lucene54DocValuesFormat.NUMERIC) {
+                        throw new CorruptIndexException("sortednumeric entry for field: " + info.name + " is corrupt", meta);
+                    }
+                    NumericEntry n = readNumericEntry(info, meta);
+                    ords.put(info.name, n);
+                } else if (ss.format == Lucene54DocValuesFormat.SORTED_SINGLE_VALUED) {
+                    if (meta.readVInt() != fieldNumber) {
+                        throw new CorruptIndexException("sortednumeric entry for field: " + info.name + " is corrupt", meta);
+                    }
+                    if (meta.readByte() != Lucene54DocValuesFormat.NUMERIC) {
+                        throw new CorruptIndexException("sortednumeric entry for field: " + info.name + " is corrupt", meta);
+                    }
+                    numerics.put(info.name, readNumericEntry(info, meta));
+                } else {
+                    throw new AssertionError();
+                }
+            } else {
+                throw new CorruptIndexException("invalid type: " + type, meta);
+            }
+            fieldNumber = meta.readVInt();
+        }
+        return numFields;
+    }
+
+    private NumericEntry readNumericEntry(FieldInfo info, IndexInput meta) throws IOException {
+        NumericEntry entry = new NumericEntry();
+        entry.format = meta.readVInt();
+        entry.missingOffset = meta.readLong();
+        if (entry.format == Lucene54DocValuesFormat.SPARSE_COMPRESSED) {
+            // sparse bits need a bit more metadata
+            entry.numDocsWithValue = meta.readVLong();
+            final int blockShift = meta.readVInt();
+            entry.monotonicMeta = LegacyDirectMonotonicReader.loadMeta(meta, entry.numDocsWithValue, blockShift);
+            ramBytesUsed.addAndGet(entry.monotonicMeta.ramBytesUsed());
+            directAddressesMeta.put(info.name, entry.monotonicMeta);
+        }
+        entry.offset = meta.readLong();
+        entry.count = meta.readVLong();
+        switch (entry.format) {
+            case Lucene54DocValuesFormat.CONST_COMPRESSED:
+                entry.minValue = meta.readLong();
+                if (entry.count > Integer.MAX_VALUE) {
+                    // currently just a limitation e.g. of bits interface and so on.
+                    throw new CorruptIndexException("illegal CONST_COMPRESSED count: " + entry.count, meta);
+                }
+                break;
+            case Lucene54DocValuesFormat.GCD_COMPRESSED:
+                entry.minValue = meta.readLong();
+                entry.gcd = meta.readLong();
+                entry.bitsPerValue = meta.readVInt();
+                break;
+            case Lucene54DocValuesFormat.TABLE_COMPRESSED:
+                final int uniqueValues = meta.readVInt();
+                if (uniqueValues > 256) {
+                    throw new CorruptIndexException(
+                        "TABLE_COMPRESSED cannot have more than 256 distinct values, got=" + uniqueValues,
+                        meta
+                    );
+                }
+                entry.table = new long[uniqueValues];
+                for (int i = 0; i < uniqueValues; ++i) {
+                    entry.table[i] = meta.readLong();
+                }
+                ramBytesUsed.addAndGet(RamUsageEstimator.sizeOf(entry.table));
+                entry.bitsPerValue = meta.readVInt();
+                break;
+            case Lucene54DocValuesFormat.DELTA_COMPRESSED:
+                entry.minValue = meta.readLong();
+                entry.bitsPerValue = meta.readVInt();
+                break;
+            case Lucene54DocValuesFormat.MONOTONIC_COMPRESSED:
+                final int blockShift = meta.readVInt();
+                entry.monotonicMeta = LegacyDirectMonotonicReader.loadMeta(meta, maxDoc + 1, blockShift);
+                ramBytesUsed.addAndGet(entry.monotonicMeta.ramBytesUsed());
+                directAddressesMeta.put(info.name, entry.monotonicMeta);
+                break;
+            case Lucene54DocValuesFormat.SPARSE_COMPRESSED:
+                final byte numberType = meta.readByte();
+                switch (numberType) {
+                    case 0:
+                        entry.numberType = VALUE;
+                        break;
+                    case 1:
+                        entry.numberType = ORDINAL;
+                        break;
+                    default:
+                        throw new CorruptIndexException("Number type can only be 0 or 1, got=" + numberType, meta);
+                }
+
+                // now read the numeric entry for non-missing values
+                final int fieldNumber = meta.readVInt();
+                if (fieldNumber != info.number) {
+                    throw new CorruptIndexException("Field numbers mistmatch: " + fieldNumber + " != " + info.number, meta);
+                }
+                final int dvFormat = meta.readByte();
+                if (dvFormat != Lucene54DocValuesFormat.NUMERIC) {
+                    throw new CorruptIndexException("Formats mistmatch: " + dvFormat + " != " + Lucene54DocValuesFormat.NUMERIC, meta);
+                }
+                entry.nonMissingValues = readNumericEntry(info, meta);
+                break;
+            default:
+                throw new CorruptIndexException("Unknown format: " + entry.format + ", input=", meta);
+        }
+        entry.endOffset = meta.readLong();
+        return entry;
+    }
+
+    private BinaryEntry readBinaryEntry(FieldInfo info, IndexInput meta) throws IOException {
+        BinaryEntry entry = new BinaryEntry();
+        entry.format = meta.readVInt();
+        entry.missingOffset = meta.readLong();
+        entry.minLength = meta.readVInt();
+        entry.maxLength = meta.readVInt();
+        entry.count = meta.readVLong();
+        entry.offset = meta.readLong();
+        switch (entry.format) {
+            case Lucene54DocValuesFormat.BINARY_FIXED_UNCOMPRESSED:
+                break;
+            case Lucene54DocValuesFormat.BINARY_PREFIX_COMPRESSED:
+                entry.addressesOffset = meta.readLong();
+                entry.packedIntsVersion = meta.readVInt();
+                entry.blockSize = meta.readVInt();
+                entry.reverseIndexOffset = meta.readLong();
+                break;
+            case Lucene54DocValuesFormat.BINARY_VARIABLE_UNCOMPRESSED:
+                entry.addressesOffset = meta.readLong();
+                final int blockShift = meta.readVInt();
+                entry.addressesMeta = LegacyDirectMonotonicReader.loadMeta(meta, entry.count + 1, blockShift);
+                ramBytesUsed.addAndGet(entry.addressesMeta.ramBytesUsed());
+                directAddressesMeta.put(info.name, entry.addressesMeta);
+                entry.addressesEndOffset = meta.readLong();
+                break;
+            default:
+                throw new CorruptIndexException("Unknown format: " + entry.format, meta);
+        }
+        return entry;
+    }
+
+    SortedSetEntry readSortedSetEntry(IndexInput meta) throws IOException {
+        SortedSetEntry entry = new SortedSetEntry();
+        entry.format = meta.readVInt();
+        if (entry.format == Lucene54DocValuesFormat.SORTED_SET_TABLE) {
+            final int totalTableLength = meta.readInt();
+            if (totalTableLength > 256) {
+                throw new CorruptIndexException(
+                    "SORTED_SET_TABLE cannot have more than 256 values in its dictionary, got=" + totalTableLength,
+                    meta
+                );
+            }
+            entry.table = new long[totalTableLength];
+            for (int i = 0; i < totalTableLength; ++i) {
+                entry.table[i] = meta.readLong();
+            }
+            ramBytesUsed.addAndGet(RamUsageEstimator.sizeOf(entry.table));
+            final int tableSize = meta.readInt();
+            if (tableSize > totalTableLength + 1) { // +1 because of the empty set
+                throw new CorruptIndexException(
+                    "SORTED_SET_TABLE cannot have more set ids than ords in its dictionary, got "
+                        + totalTableLength
+                        + " ords and "
+                        + tableSize
+                        + " sets",
+                    meta
+                );
+            }
+            entry.tableOffsets = new int[tableSize + 1];
+            for (int i = 1; i < entry.tableOffsets.length; ++i) {
+                entry.tableOffsets[i] = entry.tableOffsets[i - 1] + meta.readInt();
+            }
+            ramBytesUsed.addAndGet(RamUsageEstimator.sizeOf(entry.tableOffsets));
+        } else if (entry.format != Lucene54DocValuesFormat.SORTED_SINGLE_VALUED
+            && entry.format != Lucene54DocValuesFormat.SORTED_WITH_ADDRESSES) {
+                throw new CorruptIndexException("Unknown format: " + entry.format, meta);
+            }
+        return entry;
+    }
+
+    @Override
+    public NumericDocValues getNumeric(FieldInfo field) throws IOException {
+        NumericEntry entry = numerics.get(field.name);
+        Bits docsWithField;
+
+        if (entry.format == Lucene54DocValuesFormat.SPARSE_COMPRESSED) {
+            return getSparseNumericDocValues(entry);
+        } else {
+            if (entry.missingOffset == Lucene54DocValuesFormat.ALL_MISSING) {
+                return DocValues.emptyNumeric();
+            } else if (entry.missingOffset == Lucene54DocValuesFormat.ALL_LIVE) {
+                LongValues values = getNumeric(entry);
+                return new NumericDocValues() {
+                    private int docID = -1;
+
+                    @Override
+                    public int docID() {
+                        return docID;
+                    }
+
+                    @Override
+                    public int nextDoc() {
+                        docID++;
+                        if (docID == maxDoc) {
+                            docID = NO_MORE_DOCS;
+                        }
+                        return docID;
+                    }
+
+                    @Override
+                    public int advance(int target) {
+                        if (target >= maxDoc) {
+                            docID = NO_MORE_DOCS;
+                        } else {
+                            docID = target;
+                        }
+                        return docID;
+                    }
+
+                    @Override
+                    public boolean advanceExact(int target) throws IOException {
+                        docID = target;
+                        return true;
+                    }
+
+                    @Override
+                    public long cost() {
+                        // TODO
+                        return 0;
+                    }
+
+                    @Override
+                    public long longValue() {
+                        return values.get(docID);
+                    }
+                };
+            } else {
+                docsWithField = getLiveBits(entry.missingOffset, maxDoc);
+            }
+        }
+        final LongValues values = getNumeric(entry);
+        return new NumericDocValues() {
+
+            int doc = -1;
+            long value;
+
+            @Override
+            public long longValue() throws IOException {
+                return value;
+            }
+
+            @Override
+            public int docID() {
+                return doc;
+            }
+
+            @Override
+            public int nextDoc() throws IOException {
+                return advance(doc + 1);
+            }
+
+            @Override
+            public int advance(int target) throws IOException {
+                for (int doc = target; doc < maxDoc; ++doc) {
+                    value = values.get(doc);
+                    if (value != 0 || docsWithField.get(doc)) {
+                        return this.doc = doc;
+                    }
+                }
+                return doc = NO_MORE_DOCS;
+            }
+
+            @Override
+            public boolean advanceExact(int target) throws IOException {
+                doc = target;
+                value = values.get(doc);
+                return value != 0 || docsWithField.get(doc);
+            }
+
+            @Override
+            public long cost() {
+                return maxDoc;
+            }
+
+        };
+    }
+
+    @Override
+    public void checkIntegrity() throws IOException {
+        CodecUtil.checksumEntireFile(data);
+    }
+
+    @Override
+    public String toString() {
+        return getClass().getSimpleName() + "(fields=" + numFields + ")";
+    }
+
+    LongValues getNumeric(NumericEntry entry) throws IOException {
+        switch (entry.format) {
+            case Lucene54DocValuesFormat.CONST_COMPRESSED: {
+                final long constant = entry.minValue;
+                final Bits live = getLiveBits(entry.missingOffset, (int) entry.count);
+                return new LongValues() {
+                    @Override
+                    public long get(long index) {
+                        return live.get((int) index) ? constant : 0;
+                    }
+                };
+            }
+            case Lucene54DocValuesFormat.DELTA_COMPRESSED: {
+                RandomAccessInput slice = this.data.randomAccessSlice(entry.offset, entry.endOffset - entry.offset);
+                final long delta = entry.minValue;
+                final LongValues values = LegacyDirectReader.getInstance(slice, entry.bitsPerValue, 0);
+                return new LongValues() {
+                    @Override
+                    public long get(long id) {
+                        return delta + values.get(id);
+                    }
+                };
+            }
+            case Lucene54DocValuesFormat.GCD_COMPRESSED: {
+                RandomAccessInput slice = this.data.randomAccessSlice(entry.offset, entry.endOffset - entry.offset);
+                final long min = entry.minValue;
+                final long mult = entry.gcd;
+                final LongValues quotientReader = LegacyDirectReader.getInstance(slice, entry.bitsPerValue, 0);
+                return new LongValues() {
+                    @Override
+                    public long get(long id) {
+                        return min + mult * quotientReader.get(id);
+                    }
+                };
+            }
+            case Lucene54DocValuesFormat.TABLE_COMPRESSED: {
+                RandomAccessInput slice = this.data.randomAccessSlice(entry.offset, entry.endOffset - entry.offset);
+                final long table[] = entry.table;
+                final LongValues ords = LegacyDirectReader.getInstance(slice, entry.bitsPerValue, 0);
+                return new LongValues() {
+                    @Override
+                    public long get(long id) {
+                        return table[(int) ords.get(id)];
+                    }
+                };
+            }
+            case Lucene54DocValuesFormat.SPARSE_COMPRESSED:
+                final SparseNumericDocValues values = getSparseNumericDocValues(entry);
+                final long missingValue;
+                switch (entry.numberType) {
+                    case ORDINAL:
+                        missingValue = -1L;
+                        break;
+                    case VALUE:
+                        missingValue = 0L;
+                        break;
+                    default:
+                        throw new AssertionError();
+                }
+                return new SparseNumericDocValuesRandomAccessWrapper(values, missingValue);
+            default:
+                throw new AssertionError();
+        }
+    }
+
+    static final class SparseNumericDocValues extends NumericDocValues {
+
+        final int docIDsLength;
+        final LongValues docIds, values;
+
+        int index, doc;
+
+        SparseNumericDocValues(int docIDsLength, LongValues docIDs, LongValues values) {
+            this.docIDsLength = docIDsLength;
+            this.docIds = docIDs;
+            this.values = values;
+            reset();
+        }
+
+        void reset() {
+            index = -1;
+            doc = -1;
+        }
+
+        @Override
+        public int docID() {
+            return doc;
+        }
+
+        @Override
+        public int nextDoc() throws IOException {
+            if (index >= docIDsLength - 1) {
+                index = docIDsLength;
+                return doc = NO_MORE_DOCS;
+            }
+            return doc = (int) docIds.get(++index);
+        }
+
+        @Override
+        public int advance(int target) throws IOException {
+            long loIndex = index;
+            long step = 1;
+            long hiIndex;
+            int hiDoc;
+
+            // gallop forward by exponentially growing the interval
+            // in order to find an interval so that the target doc
+            // is in ]lo, hi]. Compared to a regular binary search,
+            // this optimizes the case that the caller performs many
+            // advance calls by small deltas
+            do {
+                hiIndex = index + step;
+                if (hiIndex >= docIDsLength) {
+                    hiIndex = docIDsLength;
+                    hiDoc = NO_MORE_DOCS;
+                    break;
+                }
+                hiDoc = (int) docIds.get(hiIndex);
+                if (hiDoc >= target) {
+                    break;
+                }
+                step <<= 1;
+            } while (true);
+
+            // now binary search
+            while (loIndex + 1 < hiIndex) {
+                final long midIndex = (loIndex + 1 + hiIndex) >>> 1;
+                final int midDoc = (int) docIds.get(midIndex);
+                if (midDoc >= target) {
+                    hiIndex = midIndex;
+                    hiDoc = midDoc;
+                } else {
+                    loIndex = midIndex;
+                }
+            }
+
+            index = (int) hiIndex;
+            return doc = hiDoc;
+        }
+
+        @Override
+        public boolean advanceExact(int target) throws IOException {
+            if (advance(target) == target) {
+                return true;
+            }
+            --index;
+            doc = target;
+            return index >= 0 && docIds.get(index) == target;
+        }
+
+        @Override
+        public long longValue() {
+            assert index >= 0;
+            assert index < docIDsLength;
+            return values.get(index);
+        }
+
+        @Override
+        public long cost() {
+            return docIDsLength;
+        }
+    }
+
+    static class SparseNumericDocValuesRandomAccessWrapper extends LongValues {
+
+        final SparseNumericDocValues values;
+        final long missingValue;
+
+        SparseNumericDocValuesRandomAccessWrapper(SparseNumericDocValues values, long missingValue) {
+            this.values = values;
+            this.missingValue = missingValue;
+        }
+
+        @Override
+        public long get(long longIndex) {
+            final int index = Math.toIntExact(longIndex);
+            int doc = values.docID();
+            if (doc >= index) {
+                values.reset();
+            }
+            assert values.docID() < index;
+            try {
+                doc = values.advance(index);
+            } catch (IOException e) {
+                throw new RuntimeException(e);
+            }
+            if (doc == index) {
+                return values.longValue();
+            } else {
+                return missingValue;
+            }
+        }
+
+    }
+
+    LegacyBinaryDocValues getLegacyBinary(FieldInfo field) throws IOException {
+        BinaryEntry bytes = binaries.get(field.name);
+        switch (bytes.format) {
+            case Lucene54DocValuesFormat.BINARY_FIXED_UNCOMPRESSED:
+                return getFixedBinary(field, bytes);
+            case Lucene54DocValuesFormat.BINARY_VARIABLE_UNCOMPRESSED:
+                return getVariableBinary(field, bytes);
+            case Lucene54DocValuesFormat.BINARY_PREFIX_COMPRESSED:
+                return getCompressedBinary(field, bytes);
+            default:
+                throw new AssertionError();
+        }
+    }
+
+    @Override
+    public BinaryDocValues getBinary(FieldInfo field) throws IOException {
+        BinaryEntry be = binaries.get(field.name);
+        return new LegacyBinaryDocValuesWrapper(getLiveBits(be.missingOffset, maxDoc), getLegacyBinary(field));
+    }
+
+    private LegacyBinaryDocValues getFixedBinary(FieldInfo field, final BinaryEntry bytes) throws IOException {
+        final IndexInput data = this.data.slice("fixed-binary", bytes.offset, bytes.count * bytes.maxLength);
+
+        final BytesRef term = new BytesRef(bytes.maxLength);
+        final byte[] buffer = term.bytes;
+        final int length = term.length = bytes.maxLength;
+
+        return new LongBinaryDocValues() {
+            @Override
+            public BytesRef get(long id) {
+                try {
+                    data.seek(id * length);
+                    data.readBytes(buffer, 0, buffer.length);
+                    return term;
+                } catch (IOException e) {
+                    throw new RuntimeException(e);
+                }
+            }
+        };
+    }
+
+    private LegacyBinaryDocValues getVariableBinary(FieldInfo field, final BinaryEntry bytes) throws IOException {
+        final RandomAccessInput addressesData = this.data.randomAccessSlice(
+            bytes.addressesOffset,
+            bytes.addressesEndOffset - bytes.addressesOffset
+        );
+        final LongValues addresses = LegacyDirectMonotonicReader.getInstance(bytes.addressesMeta, addressesData);
+
+        final IndexInput data = this.data.slice("var-binary", bytes.offset, bytes.addressesOffset - bytes.offset);
+        final BytesRef term = new BytesRef(Math.max(0, bytes.maxLength));
+        final byte buffer[] = term.bytes;
+
+        return new LongBinaryDocValues() {
+            @Override
+            public BytesRef get(long id) {
+                long startAddress = addresses.get(id);
+                long endAddress = addresses.get(id + 1);
+                int length = (int) (endAddress - startAddress);
+                try {
+                    data.seek(startAddress);
+                    data.readBytes(buffer, 0, length);
+                    term.length = length;
+                    return term;
+                } catch (IOException e) {
+                    throw new RuntimeException(e);
+                }
+            }
+        };
+    }
+
+    /** returns an address instance for prefix-compressed binary values. */
+    private synchronized MonotonicBlockPackedReader getIntervalInstance(FieldInfo field, BinaryEntry bytes) throws IOException {
+        MonotonicBlockPackedReader addresses = addressInstances.get(field.name);
+        if (addresses == null) {
+            data.seek(bytes.addressesOffset);
+            final long size = (bytes.count + Lucene54DocValuesFormat.INTERVAL_MASK) >>> Lucene54DocValuesFormat.INTERVAL_SHIFT;
+            addresses = MonotonicBlockPackedReader.of(data, bytes.packedIntsVersion, bytes.blockSize, size);
+            if (merging == false) {
+                addressInstances.put(field.name, addresses);
+                ramBytesUsed.addAndGet(addresses.ramBytesUsed() + Integer.BYTES);
+            }
+        }
+        return addresses;
+    }
+
+    /** returns a reverse lookup instance for prefix-compressed binary values. */
+    private synchronized ReverseTermsIndex getReverseIndexInstance(FieldInfo field, BinaryEntry bytes) throws IOException {
+        ReverseTermsIndex index = reverseIndexInstances.get(field.name);
+        if (index == null) {
+            index = new ReverseTermsIndex();
+            data.seek(bytes.reverseIndexOffset);
+            long size = (bytes.count + Lucene54DocValuesFormat.REVERSE_INTERVAL_MASK) >>> Lucene54DocValuesFormat.REVERSE_INTERVAL_SHIFT;
+            index.termAddresses = MonotonicBlockPackedReader.of(data, bytes.packedIntsVersion, bytes.blockSize, size);
+            long dataSize = data.readVLong();
+            PagedBytes pagedBytes = new PagedBytes(15);
+            pagedBytes.copy(data, dataSize);
+            index.terms = pagedBytes.freeze(true);
+            if (merging == false) {
+                reverseIndexInstances.put(field.name, index);
+                ramBytesUsed.addAndGet(index.ramBytesUsed());
+            }
+        }
+        return index;
+    }
+
+    private LegacyBinaryDocValues getCompressedBinary(FieldInfo field, final BinaryEntry bytes) throws IOException {
+        final MonotonicBlockPackedReader addresses = getIntervalInstance(field, bytes);
+        final ReverseTermsIndex index = getReverseIndexInstance(field, bytes);
+        assert addresses.size() > 0; // we don't have to handle empty case
+        IndexInput slice = data.slice("terms", bytes.offset, bytes.addressesOffset - bytes.offset);
+        return new CompressedBinaryDocValues(bytes, addresses, index, slice);
+    }
+
+    @Override
+    public SortedDocValues getSorted(FieldInfo field) throws IOException {
+        final int valueCount = (int) binaries.get(field.name).count;
+        final LegacyBinaryDocValues binary = getLegacyBinary(field);
+        NumericEntry entry = ords.get(field.name);
+        final LongValues ordinals = getNumeric(entry);
+        if (entry.format == Lucene54DocValuesFormat.SPARSE_COMPRESSED) {
+            final SparseNumericDocValues sparseValues = ((SparseNumericDocValuesRandomAccessWrapper) ordinals).values;
+            return new SortedDocValues() {
+
+                @Override
+                public int ordValue() {
+                    return (int) sparseValues.longValue();
+                }
+
+                @Override
+                public BytesRef lookupOrd(int ord) {
+                    return binary.get(ord);
+                }
+
+                @Override
+                public int getValueCount() {
+                    return valueCount;
+                }
+
+                @Override
+                public int docID() {
+                    return sparseValues.docID();
+                }
+
+                @Override
+                public int nextDoc() throws IOException {
+                    return sparseValues.nextDoc();
+                }
+
+                @Override
+                public int advance(int target) throws IOException {
+                    return sparseValues.advance(target);
+                }
+
+                @Override
+                public boolean advanceExact(int target) throws IOException {
+                    return sparseValues.advanceExact(target);
+                }
+
+                @Override
+                public long cost() {
+                    return sparseValues.cost();
+                }
+
+            };
+        }
+        return new SortedDocValues() {
+            private int docID = -1;
+            private int ord;
+
+            @Override
+            public int docID() {
+                return docID;
+            }
+
+            @Override
+            public int nextDoc() throws IOException {
+                assert docID != NO_MORE_DOCS;
+                while (true) {
+                    docID++;
+                    if (docID == maxDoc) {
+                        docID = NO_MORE_DOCS;
+                        break;
+                    }
+                    ord = (int) ordinals.get(docID);
+                    if (ord != -1) {
+                        break;
+                    }
+                }
+                return docID;
+            }
+
+            @Override
+            public int advance(int target) throws IOException {
+                if (target >= maxDoc) {
+                    docID = NO_MORE_DOCS;
+                    return docID;
+                } else {
+                    docID = target - 1;
+                    return nextDoc();
+                }
+            }
+
+            @Override
+            public boolean advanceExact(int target) throws IOException {
+                docID = target;
+                ord = (int) ordinals.get(target);
+                return ord != -1;
+            }
+
+            @Override
+            public int ordValue() {
+                return ord;
+            }
+
+            @Override
+            public long cost() {
+                // TODO
+                return 0;
+            }
+
+            @Override
+            public BytesRef lookupOrd(int ord) {
+                return binary.get(ord);
+            }
+
+            @Override
+            public int getValueCount() {
+                return valueCount;
+            }
+
+            @Override
+            public int lookupTerm(BytesRef key) throws IOException {
+                if (binary instanceof CompressedBinaryDocValues) {
+                    return (int) ((CompressedBinaryDocValues) binary).lookupTerm(key);
+                } else {
+                    return super.lookupTerm(key);
+                }
+            }
+
+            @Override
+            public TermsEnum termsEnum() throws IOException {
+                if (binary instanceof CompressedBinaryDocValues) {
+                    return ((CompressedBinaryDocValues) binary).getTermsEnum();
+                } else {
+                    return super.termsEnum();
+                }
+            }
+        };
+    }
+
+    /** returns an address instance for sortedset ordinal lists */
+    private LongValues getOrdIndexInstance(FieldInfo field, NumericEntry entry) throws IOException {
+        RandomAccessInput data = this.data.randomAccessSlice(entry.offset, entry.endOffset - entry.offset);
+        return LegacyDirectMonotonicReader.getInstance(entry.monotonicMeta, data);
+    }
+
+    @Override
+    public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException {
+        SortedSetEntry ss = sortedNumerics.get(field.name);
+        if (ss.format == Lucene54DocValuesFormat.SORTED_SINGLE_VALUED) {
+            NumericEntry numericEntry = numerics.get(field.name);
+            final LongValues values = getNumeric(numericEntry);
+            if (numericEntry.format == Lucene54DocValuesFormat.SPARSE_COMPRESSED) {
+                SparseNumericDocValues sparseValues = ((SparseNumericDocValuesRandomAccessWrapper) values).values;
+                return new SortedNumericDocValues() {
+
+                    @Override
+                    public long nextValue() throws IOException {
+                        return sparseValues.longValue();
+                    }
+
+                    @Override
+                    public int docValueCount() {
+                        return 1;
+                    }
+
+                    @Override
+                    public int docID() {
+                        return sparseValues.docID();
+                    }
+
+                    @Override
+                    public int nextDoc() throws IOException {
+                        return sparseValues.nextDoc();
+                    }
+
+                    @Override
+                    public int advance(int target) throws IOException {
+                        return sparseValues.advance(target);
+                    }
+
+                    @Override
+                    public boolean advanceExact(int target) throws IOException {
+                        return sparseValues.advanceExact(target);
+                    }
+
+                    @Override
+                    public long cost() {
+                        return sparseValues.cost();
+                    }
+
+                };
+            }
+            final Bits docsWithField = getLiveBits(numericEntry.missingOffset, maxDoc);
+            return new SortedNumericDocValues() {
+                int docID = -1;
+
+                @Override
+                public int docID() {
+                    return docID;
+                }
+
+                @Override
+                public int nextDoc() {
+                    while (true) {
+                        docID++;
+                        if (docID == maxDoc) {
+                            docID = NO_MORE_DOCS;
+                            break;
+                        }
+
+                        if (docsWithField.get(docID)) {
+                            // TODO: use .nextSetBit here, at least!!
+                            break;
+                        }
+                    }
+                    return docID;
+                }
+
+                @Override
+                public int advance(int target) {
+                    if (target >= maxDoc) {
+                        docID = NO_MORE_DOCS;
+                        return docID;
+                    } else {
+                        docID = target - 1;
+                        return nextDoc();
+                    }
+                }
+
+                @Override
+                public boolean advanceExact(int target) throws IOException {
+                    docID = target;
+                    return docsWithField.get(docID);
+                }
+
+                @Override
+                public long cost() {
+                    // TODO
+                    return 0;
+                }
+
+                @Override
+                public int docValueCount() {
+                    return 1;
+                }
+
+                @Override
+                public long nextValue() {
+                    return values.get(docID);
+                }
+            };
+        } else if (ss.format == Lucene54DocValuesFormat.SORTED_WITH_ADDRESSES) {
+            NumericEntry numericEntry = numerics.get(field.name);
+            final LongValues values = getNumeric(numericEntry);
+            final LongValues ordIndex = getOrdIndexInstance(field, ordIndexes.get(field.name));
+
+            return new SortedNumericDocValues() {
+                long startOffset;
+                long endOffset;
+                int docID = -1;
+                long upto;
+
+                @Override
+                public int docID() {
+                    return docID;
+                }
+
+                @Override
+                public int nextDoc() {
+                    while (true) {
+                        docID++;
+                        if (docID == maxDoc) {
+                            docID = NO_MORE_DOCS;
+                            return docID;
+                        }
+                        startOffset = ordIndex.get(docID);
+                        endOffset = ordIndex.get(docID + 1L);
+                        if (endOffset > startOffset) {
+                            break;
+                        }
+                    }
+                    upto = startOffset;
+                    return docID;
+                }
+
+                @Override
+                public int advance(int target) {
+                    if (target >= maxDoc) {
+                        docID = NO_MORE_DOCS;
+                        return docID;
+                    } else {
+                        docID = target - 1;
+                        return nextDoc();
+                    }
+                }
+
+                @Override
+                public boolean advanceExact(int target) throws IOException {
+                    docID = target;
+                    startOffset = ordIndex.get(docID);
+                    endOffset = ordIndex.get(docID + 1L);
+                    upto = startOffset;
+                    return endOffset > startOffset;
+                }
+
+                @Override
+                public long cost() {
+                    // TODO
+                    return 0;
+                }
+
+                @Override
+                public int docValueCount() {
+                    return (int) (endOffset - startOffset);
+                }
+
+                @Override
+                public long nextValue() {
+                    return values.get(upto++);
+                }
+            };
+        } else if (ss.format == Lucene54DocValuesFormat.SORTED_SET_TABLE) {
+            NumericEntry entry = ords.get(field.name);
+            final LongValues ordinals = getNumeric(entry);
+
+            final long[] table = ss.table;
+            final int[] offsets = ss.tableOffsets;
+            return new SortedNumericDocValues() {
+                int startOffset;
+                int endOffset;
+                int docID = -1;
+                int upto;
+
+                @Override
+                public int docID() {
+                    return docID;
+                }
+
+                @Override
+                public int nextDoc() {
+                    while (true) {
+                        docID++;
+                        if (docID == maxDoc) {
+                            docID = NO_MORE_DOCS;
+                            return docID;
+                        }
+                        int ord = (int) ordinals.get(docID);
+                        startOffset = offsets[ord];
+                        endOffset = offsets[ord + 1];
+                        if (endOffset > startOffset) {
+                            break;
+                        }
+                    }
+                    upto = startOffset;
+                    return docID;
+                }
+
+                @Override
+                public int advance(int target) {
+                    if (target >= maxDoc) {
+                        docID = NO_MORE_DOCS;
+                        return docID;
+                    } else {
+                        docID = target - 1;
+                        return nextDoc();
+                    }
+                }
+
+                @Override
+                public boolean advanceExact(int target) throws IOException {
+                    docID = target;
+                    int ord = (int) ordinals.get(docID);
+                    startOffset = offsets[ord];
+                    endOffset = offsets[ord + 1];
+                    upto = startOffset;
+                    return endOffset > startOffset;
+                }
+
+                @Override
+                public long cost() {
+                    // TODO
+                    return 0;
+                }
+
+                @Override
+                public int docValueCount() {
+                    return endOffset - startOffset;
+                }
+
+                @Override
+                public long nextValue() {
+                    return table[upto++];
+                }
+            };
+        } else {
+            throw new AssertionError();
+        }
+    }
+
+    @Override
+    public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException {
+        SortedSetEntry ss = sortedSets.get(field.name);
+        switch (ss.format) {
+            case Lucene54DocValuesFormat.SORTED_SINGLE_VALUED:
+                return DocValues.singleton(getSorted(field));
+            case Lucene54DocValuesFormat.SORTED_WITH_ADDRESSES:
+                return getSortedSetWithAddresses(field);
+            case Lucene54DocValuesFormat.SORTED_SET_TABLE:
+                return getSortedSetTable(field, ss);
+            default:
+                throw new AssertionError();
+        }
+    }
+
+    private SortedSetDocValues getSortedSetWithAddresses(FieldInfo field) throws IOException {
+        final long valueCount = binaries.get(field.name).count;
+        // we keep the byte[]s and list of ords on disk, these could be large
+        final LongBinaryDocValues binary = (LongBinaryDocValues) getLegacyBinary(field);
+        final LongValues ordinals = getNumeric(ords.get(field.name));
+        // but the addresses to the ord stream are in RAM
+        final LongValues ordIndex = getOrdIndexInstance(field, ordIndexes.get(field.name));
+
+        return new LegacySortedSetDocValuesWrapper(new LegacySortedSetDocValues() {
+            long startOffset;
+            long offset;
+            long endOffset;
+
+            @Override
+            public long nextOrd() {
+                if (offset == endOffset) {
+                    return NO_MORE_ORDS;
+                } else {
+                    long ord = ordinals.get(offset);
+                    offset++;
+                    return ord;
+                }
+            }
+
+            @Override
+            public void setDocument(int docID) {
+                startOffset = offset = ordIndex.get(docID);
+                endOffset = ordIndex.get(docID + 1L);
+            }
+
+            @Override
+            public BytesRef lookupOrd(long ord) {
+                return binary.get(ord);
+            }
+
+            @Override
+            public long getValueCount() {
+                return valueCount;
+            }
+
+            @Override
+            public long lookupTerm(BytesRef key) {
+                if (binary instanceof CompressedBinaryDocValues) {
+                    return ((CompressedBinaryDocValues) binary).lookupTerm(key);
+                } else {
+                    return super.lookupTerm(key);
+                }
+            }
+
+            @Override
+            public TermsEnum termsEnum() throws IOException {
+                if (binary instanceof CompressedBinaryDocValues) {
+                    return ((CompressedBinaryDocValues) binary).getTermsEnum();
+                } else {
+                    return super.termsEnum();
+                }
+            }
+        }, maxDoc);
+    }
+
+    private SortedSetDocValues getSortedSetTable(FieldInfo field, SortedSetEntry ss) throws IOException {
+        final long valueCount = binaries.get(field.name).count;
+        final LongBinaryDocValues binary = (LongBinaryDocValues) getLegacyBinary(field);
+        final NumericEntry ordinalsEntry = ords.get(field.name);
+        final LongValues ordinals = getNumeric(ordinalsEntry);
+
+        final long[] table = ss.table;
+        final int[] offsets = ss.tableOffsets;
+
+        return new LegacySortedSetDocValuesWrapper(new LegacySortedSetDocValues() {
+
+            int offset, startOffset, endOffset;
+
+            @Override
+            public void setDocument(int docID) {
+                final int ord = (int) ordinals.get(docID);
+                offset = startOffset = offsets[ord];
+                endOffset = offsets[ord + 1];
+            }
+
+            @Override
+            public long nextOrd() {
+                if (offset == endOffset) {
+                    return NO_MORE_ORDS;
+                } else {
+                    return table[offset++];
+                }
+            }
+
+            @Override
+            public BytesRef lookupOrd(long ord) {
+                return binary.get(ord);
+            }
+
+            @Override
+            public long getValueCount() {
+                return valueCount;
+            }
+
+            @Override
+            public long lookupTerm(BytesRef key) {
+                if (binary instanceof CompressedBinaryDocValues) {
+                    return ((CompressedBinaryDocValues) binary).lookupTerm(key);
+                } else {
+                    return super.lookupTerm(key);
+                }
+            }
+
+            @Override
+            public TermsEnum termsEnum() throws IOException {
+                if (binary instanceof CompressedBinaryDocValues) {
+                    return ((CompressedBinaryDocValues) binary).getTermsEnum();
+                } else {
+                    return super.termsEnum();
+                }
+            }
+        }, maxDoc);
+    }
+
+    private Bits getLiveBits(final long offset, final int count) throws IOException {
+        if (offset == Lucene54DocValuesFormat.ALL_MISSING) {
+            return new Bits.MatchNoBits(count);
+        } else if (offset == Lucene54DocValuesFormat.ALL_LIVE) {
+            return new Bits.MatchAllBits(count);
+        } else {
+            int length = (int) ((count + 7L) >>> 3);
+            final RandomAccessInput in = data.randomAccessSlice(offset, length);
+            return new Bits() {
+                @Override
+                public boolean get(int index) {
+                    try {
+                        return (in.readByte(index >> 3) & (1 << (index & 7))) != 0;
+                    } catch (IOException e) {
+                        throw new RuntimeException(e);
+                    }
+                }
+
+                @Override
+                public int length() {
+                    return count;
+                }
+            };
+        }
+    }
+
+    private SparseNumericDocValues getSparseNumericDocValues(NumericEntry entry) throws IOException {
+        final RandomAccessInput docIdsData = this.data.randomAccessSlice(entry.missingOffset, entry.offset - entry.missingOffset);
+        final LongValues docIDs = LegacyDirectMonotonicReader.getInstance(entry.monotonicMeta, docIdsData);
+        final LongValues values = getNumeric(entry.nonMissingValues); // cannot be sparse
+        return new SparseNumericDocValues(Math.toIntExact(entry.numDocsWithValue), docIDs, values);
+    }
+
+    @Override
+    public synchronized DocValuesProducer getMergeInstance() {
+        return new Lucene54DocValuesProducer(this);
+    }
+
+    @Override
+    public void close() throws IOException {
+        data.close();
+    }
+
+    /** metadata entry for a numeric docvalues field */
+    static class NumericEntry {
+        private NumericEntry() {}
+
+        /** offset to the bitset representing docsWithField, or -1 if no documents have missing values */
+        long missingOffset;
+        /** offset to the actual numeric values */
+        public long offset;
+        /** end offset to the actual numeric values */
+        public long endOffset;
+        /** bits per value used to pack the numeric values */
+        public int bitsPerValue;
+
+        int format;
+        /** count of values written */
+        public long count;
+
+        /** monotonic meta */
+        public LegacyDirectMonotonicReader.Meta monotonicMeta;
+
+        long minValue;
+        long gcd;
+        long table[];
+
+        /** for sparse compression */
+        long numDocsWithValue;
+        NumericEntry nonMissingValues;
+        Lucene54DocValuesConsumer.NumberType numberType;
+    }
+
+    /** metadata entry for a binary docvalues field */
+    static class BinaryEntry {
+        private BinaryEntry() {}
+
+        /** offset to the bitset representing docsWithField, or -1 if no documents have missing values */
+        long missingOffset;
+        /** offset to the actual binary values */
+        long offset;
+
+        int format;
+        /** count of values written */
+        public long count;
+        int minLength;
+        int maxLength;
+        /** offset to the addressing data that maps a value to its slice of the byte[] */
+        public long addressesOffset, addressesEndOffset;
+        /** meta data for addresses */
+        public LegacyDirectMonotonicReader.Meta addressesMeta;
+        /** offset to the reverse index */
+        public long reverseIndexOffset;
+        /** packed ints version used to encode addressing information */
+        public int packedIntsVersion;
+        /** packed ints blocksize */
+        public int blockSize;
+    }
+
+    /** metadata entry for a sorted-set docvalues field */
+    static class SortedSetEntry {
+        private SortedSetEntry() {}
+
+        int format;
+
+        long[] table;
+        int[] tableOffsets;
+    }
+
+    // internally we compose complex dv (sorted/sortedset) from other ones
+    abstract static class LongBinaryDocValues extends LegacyBinaryDocValues {
+        @Override
+        public final BytesRef get(int docID) {
+            return get((long) docID);
+        }
+
+        abstract BytesRef get(long id);
+    }
+
+    // used for reverse lookup to a small range of blocks
+    static class ReverseTermsIndex implements Accountable {
+        public MonotonicBlockPackedReader termAddresses;
+        public PagedBytes.Reader terms;
+
+        @Override
+        public long ramBytesUsed() {
+            return termAddresses.ramBytesUsed() + terms.ramBytesUsed();
+        }
+
+        @Override
+        public Collection<Accountable> getChildResources() {
+            List<Accountable> resources = new ArrayList<>();
+            resources.add(Accountables.namedAccountable("term bytes", terms));
+            resources.add(Accountables.namedAccountable("term addresses", termAddresses));
+            return Collections.unmodifiableList(resources);
+        }
+
+        @Override
+        public String toString() {
+            return getClass().getSimpleName() + "(size=" + termAddresses.size() + ")";
+        }
+    }
+
+    // in the compressed case, we add a few additional operations for
+    // more efficient reverse lookup and enumeration
+    static final class CompressedBinaryDocValues extends LongBinaryDocValues {
+        final long numValues;
+        final long numIndexValues;
+        final int maxTermLength;
+        final MonotonicBlockPackedReader addresses;
+        final IndexInput data;
+        final CompressedBinaryTermsEnum termsEnum;
+        final PagedBytes.Reader reverseTerms;
+        final MonotonicBlockPackedReader reverseAddresses;
+        final long numReverseIndexValues;
+
+        CompressedBinaryDocValues(BinaryEntry bytes, MonotonicBlockPackedReader addresses, ReverseTermsIndex index, IndexInput data)
+            throws IOException {
+            this.maxTermLength = bytes.maxLength;
+            this.numValues = bytes.count;
+            this.addresses = addresses;
+            this.numIndexValues = addresses.size();
+            this.data = data;
+            this.reverseTerms = index.terms;
+            this.reverseAddresses = index.termAddresses;
+            this.numReverseIndexValues = reverseAddresses.size();
+            this.termsEnum = getTermsEnum(data);
+        }
+
+        @Override
+        public BytesRef get(long id) {
+            try {
+                termsEnum.seekExact(id);
+                return termsEnum.term();
+            } catch (IOException e) {
+                throw new RuntimeException(e);
+            }
+        }
+
+        long lookupTerm(BytesRef key) {
+            try {
+                switch (termsEnum.seekCeil(key)) {
+                    case FOUND:
+                        return termsEnum.ord();
+                    case NOT_FOUND:
+                        return -termsEnum.ord() - 1;
+                    default:
+                        return -numValues - 1;
+                }
+            } catch (IOException bogus) {
+                throw new RuntimeException(bogus);
+            }
+        }
+
+        TermsEnum getTermsEnum() throws IOException {
+            return getTermsEnum(data.clone());
+        }
+
+        private CompressedBinaryTermsEnum getTermsEnum(IndexInput input) throws IOException {
+            return new CompressedBinaryTermsEnum(input);
+        }
+
+        class CompressedBinaryTermsEnum extends BaseTermsEnum {
+            private long currentOrd = -1;
+            // offset to the start of the current block
+            private long currentBlockStart;
+            private final IndexInput input;
+            // delta from currentBlockStart to start of each term
+            private final int offsets[] = new int[Lucene54DocValuesFormat.INTERVAL_COUNT];
+            private final byte buffer[] = new byte[2 * Lucene54DocValuesFormat.INTERVAL_COUNT - 1];
+
+            private final BytesRef term = new BytesRef(maxTermLength);
+            private final BytesRef firstTerm = new BytesRef(maxTermLength);
+            private final BytesRef scratch = new BytesRef();
+
+            CompressedBinaryTermsEnum(IndexInput input) throws IOException {
+                this.input = input;
+                input.seek(0);
+            }
+
+            private void readHeader() throws IOException {
+                firstTerm.length = input.readVInt();
+                input.readBytes(firstTerm.bytes, 0, firstTerm.length);
+                input.readBytes(buffer, 0, Lucene54DocValuesFormat.INTERVAL_COUNT - 1);
+                if (buffer[0] == -1) {
+                    readShortAddresses();
+                } else {
+                    readByteAddresses();
+                }
+                currentBlockStart = input.getFilePointer();
+            }
+
+            // read single byte addresses: each is delta - 2
+            // (shared prefix byte and length > 0 are both implicit)
+            private void readByteAddresses() throws IOException {
+                int addr = 0;
+                for (int i = 1; i < offsets.length; i++) {
+                    addr += 2 + (buffer[i - 1] & 0xFF);
+                    offsets[i] = addr;
+                }
+            }
+
+            // read double byte addresses: each is delta - 2
+            // (shared prefix byte and length > 0 are both implicit)
+            private void readShortAddresses() throws IOException {
+                input.readBytes(buffer, Lucene54DocValuesFormat.INTERVAL_COUNT - 1, Lucene54DocValuesFormat.INTERVAL_COUNT);
+                int addr = 0;
+                for (int i = 1; i < offsets.length; i++) {
+                    int x = i << 1;
+                    addr += 2 + ((buffer[x - 1] << 8) | (buffer[x] & 0xFF));
+                    offsets[i] = addr;
+                }
+            }
+
+            // set term to the first term
+            private void readFirstTerm() throws IOException {
+                term.length = firstTerm.length;
+                System.arraycopy(firstTerm.bytes, firstTerm.offset, term.bytes, 0, term.length);
+            }
+
+            // read term at offset, delta encoded from first term
+            private void readTerm(int offset) throws IOException {
+                int start = input.readByte() & 0xFF;
+                System.arraycopy(firstTerm.bytes, firstTerm.offset, term.bytes, 0, start);
+                int suffix = offsets[offset] - offsets[offset - 1] - 1;
+                input.readBytes(term.bytes, start, suffix);
+                term.length = start + suffix;
+            }
+
+            @Override
+            public BytesRef next() throws IOException {
+                currentOrd++;
+                if (currentOrd >= numValues) {
+                    return null;
+                } else {
+                    int offset = (int) (currentOrd & Lucene54DocValuesFormat.INTERVAL_MASK);
+                    if (offset == 0) {
+                        // switch to next block
+                        readHeader();
+                        readFirstTerm();
+                    } else {
+                        readTerm(offset);
+                    }
+                    return term;
+                }
+            }
+
+            // binary search reverse index to find smaller
+            // range of blocks to search
+            long binarySearchIndex(BytesRef text) throws IOException {
+                long low = 0;
+                long high = numReverseIndexValues - 1;
+                while (low <= high) {
+                    long mid = (low + high) >>> 1;
+                    reverseTerms.fill(scratch, reverseAddresses.get(mid));
+                    int cmp = scratch.compareTo(text);
+
+                    if (cmp < 0) {
+                        low = mid + 1;
+                    } else if (cmp > 0) {
+                        high = mid - 1;
+                    } else {
+                        return mid;
+                    }
+                }
+                return high;
+            }
+
+            // binary search against first term in block range
+            // to find term's block
+            long binarySearchBlock(BytesRef text, long low, long high) throws IOException {
+                while (low <= high) {
+                    long mid = (low + high) >>> 1;
+                    input.seek(addresses.get(mid));
+                    term.length = input.readVInt();
+                    input.readBytes(term.bytes, 0, term.length);
+                    int cmp = term.compareTo(text);
+
+                    if (cmp < 0) {
+                        low = mid + 1;
+                    } else if (cmp > 0) {
+                        high = mid - 1;
+                    } else {
+                        return mid;
+                    }
+                }
+                return high;
+            }
+
+            @Override
+            public SeekStatus seekCeil(BytesRef text) throws IOException {
+                // locate block: narrow to block range with index, then search blocks
+                final long block;
+                long indexPos = binarySearchIndex(text);
+                if (indexPos < 0) {
+                    block = 0;
+                } else {
+                    long low = indexPos << Lucene54DocValuesFormat.BLOCK_INTERVAL_SHIFT;
+                    long high = Math.min(numIndexValues - 1, low + Lucene54DocValuesFormat.BLOCK_INTERVAL_MASK);
+                    block = Math.max(low, binarySearchBlock(text, low, high));
+                }
+
+                // position before block, then scan to term.
+                input.seek(addresses.get(block));
+                currentOrd = (block << Lucene54DocValuesFormat.INTERVAL_SHIFT) - 1;
+
+                while (next() != null) {
+                    int cmp = term.compareTo(text);
+                    if (cmp == 0) {
+                        return SeekStatus.FOUND;
+                    } else if (cmp > 0) {
+                        return SeekStatus.NOT_FOUND;
+                    }
+                }
+                return SeekStatus.END;
+            }
+
+            @Override
+            public void seekExact(long ord) throws IOException {
+                long block = ord >>> Lucene54DocValuesFormat.INTERVAL_SHIFT;
+                if (block != currentOrd >>> Lucene54DocValuesFormat.INTERVAL_SHIFT) {
+                    // switch to different block
+                    input.seek(addresses.get(block));
+                    readHeader();
+                }
+
+                currentOrd = ord;
+
+                int offset = (int) (ord & Lucene54DocValuesFormat.INTERVAL_MASK);
+                if (offset == 0) {
+                    readFirstTerm();
+                } else {
+                    input.seek(currentBlockStart + offsets[offset - 1]);
+                    readTerm(offset);
+                }
+            }
+
+            @Override
+            public BytesRef term() throws IOException {
+                return term;
+            }
+
+            @Override
+            public long ord() throws IOException {
+                return currentOrd;
+            }
+
+            @Override
+            public int docFreq() throws IOException {
+                throw new UnsupportedOperationException();
+            }
+
+            @Override
+            public long totalTermFreq() throws IOException {
+                return -1;
+            }
+
+            @Override
+            public PostingsEnum postings(PostingsEnum reuse, int flags) throws IOException {
+                throw new UnsupportedOperationException();
+            }
+
+            @Override
+            public ImpactsEnum impacts(int flags) throws IOException {
+                throw new UnsupportedOperationException();
+            }
+
+        }
+    }
+}

+ 16 - 1
x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene60/Lucene60Codec.java

@@ -24,12 +24,15 @@ import org.apache.lucene.backward_codecs.lucene50.Lucene50LiveDocsFormat;
 import org.apache.lucene.backward_codecs.lucene50.Lucene50StoredFieldsFormat;
 import org.apache.lucene.backward_codecs.lucene60.Lucene60FieldInfosFormat;
 import org.apache.lucene.codecs.CompoundFormat;
+import org.apache.lucene.codecs.DocValuesFormat;
 import org.apache.lucene.codecs.FieldInfosFormat;
 import org.apache.lucene.codecs.LiveDocsFormat;
 import org.apache.lucene.codecs.SegmentInfoFormat;
 import org.apache.lucene.codecs.StoredFieldsFormat;
+import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
 import org.elasticsearch.xpack.lucene.bwc.codecs.BWCCodec;
 import org.elasticsearch.xpack.lucene.bwc.codecs.lucene50.Lucene50SegmentInfoFormat;
+import org.elasticsearch.xpack.lucene.bwc.codecs.lucene54.Lucene54DocValuesFormat;
 
 import java.util.Objects;
 
@@ -44,8 +47,14 @@ public class Lucene60Codec extends BWCCodec {
     private final SegmentInfoFormat segmentInfosFormat = wrap(new Lucene50SegmentInfoFormat());
     private final LiveDocsFormat liveDocsFormat = new Lucene50LiveDocsFormat();
     private final CompoundFormat compoundFormat = new Lucene50CompoundFormat();
-
     private final StoredFieldsFormat storedFieldsFormat;
+    private final DocValuesFormat defaultDocValuesFormat = new Lucene54DocValuesFormat();
+    private final DocValuesFormat docValuesFormat = new PerFieldDocValuesFormat() {
+        @Override
+        public DocValuesFormat getDocValuesFormatForField(String field) {
+            return defaultDocValuesFormat;
+        }
+    };
 
     /**
      * Instantiates a new codec.
@@ -89,4 +98,10 @@ public class Lucene60Codec extends BWCCodec {
     public final CompoundFormat compoundFormat() {
         return compoundFormat;
     }
+
+    @Override
+    public DocValuesFormat docValuesFormat() {
+        return docValuesFormat;
+    }
+
 }

+ 15 - 0
x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene62/Lucene62Codec.java

@@ -24,11 +24,14 @@ import org.apache.lucene.backward_codecs.lucene50.Lucene50LiveDocsFormat;
 import org.apache.lucene.backward_codecs.lucene50.Lucene50StoredFieldsFormat;
 import org.apache.lucene.backward_codecs.lucene60.Lucene60FieldInfosFormat;
 import org.apache.lucene.codecs.CompoundFormat;
+import org.apache.lucene.codecs.DocValuesFormat;
 import org.apache.lucene.codecs.FieldInfosFormat;
 import org.apache.lucene.codecs.LiveDocsFormat;
 import org.apache.lucene.codecs.SegmentInfoFormat;
 import org.apache.lucene.codecs.StoredFieldsFormat;
+import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
 import org.elasticsearch.xpack.lucene.bwc.codecs.BWCCodec;
+import org.elasticsearch.xpack.lucene.bwc.codecs.lucene54.Lucene54DocValuesFormat;
 
 import java.util.Objects;
 
@@ -44,6 +47,13 @@ public class Lucene62Codec extends BWCCodec {
     private final LiveDocsFormat liveDocsFormat = new Lucene50LiveDocsFormat();
     private final CompoundFormat compoundFormat = new Lucene50CompoundFormat();
     private final StoredFieldsFormat storedFieldsFormat;
+    private final DocValuesFormat defaultDocValuesFormat = new Lucene54DocValuesFormat();
+    private final DocValuesFormat docValuesFormat = new PerFieldDocValuesFormat() {
+        @Override
+        public DocValuesFormat getDocValuesFormatForField(String field) {
+            return defaultDocValuesFormat;
+        }
+    };
 
     public Lucene62Codec() {
         this(Lucene50StoredFieldsFormat.Mode.BEST_SPEED);
@@ -78,4 +88,9 @@ public class Lucene62Codec extends BWCCodec {
     public final CompoundFormat compoundFormat() {
         return compoundFormat;
     }
+
+    @Override
+    public DocValuesFormat docValuesFormat() {
+        return docValuesFormat;
+    }
 }

+ 14 - 0
x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/BWCLucene70Codec.java

@@ -15,10 +15,12 @@ import org.apache.lucene.backward_codecs.lucene50.Lucene50StoredFieldsFormat;
 import org.apache.lucene.backward_codecs.lucene60.Lucene60FieldInfosFormat;
 import org.apache.lucene.backward_codecs.lucene70.Lucene70SegmentInfoFormat;
 import org.apache.lucene.codecs.CompoundFormat;
+import org.apache.lucene.codecs.DocValuesFormat;
 import org.apache.lucene.codecs.FieldInfosFormat;
 import org.apache.lucene.codecs.LiveDocsFormat;
 import org.apache.lucene.codecs.SegmentInfoFormat;
 import org.apache.lucene.codecs.StoredFieldsFormat;
+import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
 import org.elasticsearch.xpack.lucene.bwc.codecs.BWCCodec;
 
 public class BWCLucene70Codec extends BWCCodec {
@@ -28,6 +30,13 @@ public class BWCLucene70Codec extends BWCCodec {
     private final LiveDocsFormat liveDocsFormat = new Lucene50LiveDocsFormat();
     private final CompoundFormat compoundFormat = new Lucene50CompoundFormat();
     private final StoredFieldsFormat storedFieldsFormat;
+    private final DocValuesFormat defaultDVFormat = DocValuesFormat.forName("Lucene70");
+    private final DocValuesFormat docValuesFormat = new PerFieldDocValuesFormat() {
+        @Override
+        public DocValuesFormat getDocValuesFormatForField(String field) {
+            return defaultDVFormat;
+        }
+    };
 
     public BWCLucene70Codec() {
         super("BWCLucene70Codec");
@@ -58,4 +67,9 @@ public class BWCLucene70Codec extends BWCCodec {
     public CompoundFormat compoundFormat() {
         return compoundFormat;
     }
+
+    @Override
+    public final DocValuesFormat docValuesFormat() {
+        return docValuesFormat;
+    }
 }

+ 16 - 0
x-pack/plugin/old-lucene-versions/src/main/resources/META-INF/services/org.apache.lucene.codecs.DocValuesFormat

@@ -0,0 +1,16 @@
+#  Licensed to the Apache Software Foundation (ASF) under one or more
+#  contributor license agreements.  See the NOTICE file distributed with
+#  this work for additional information regarding copyright ownership.
+#  The ASF licenses this file to You under the Apache License, Version 2.0
+#  (the "License"); you may not use this file except in compliance with
+#  the License.  You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+org.elasticsearch.xpack.lucene.bwc.codecs.lucene54.Lucene54DocValuesFormat

+ 22 - 0
x-pack/plugin/old-lucene-versions/src/test/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene54/Lucene54DocValuesFormatTests.java

@@ -0,0 +1,22 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.lucene.bwc.codecs.lucene54;
+
+import org.apache.lucene.codecs.Codec;
+import org.apache.lucene.index.BaseDocValuesFormatTestCase;
+import org.apache.lucene.util.TestUtil;
+
+public class Lucene54DocValuesFormatTests extends BaseDocValuesFormatTestCase {
+
+    private final Codec codec = TestUtil.alwaysDocValuesFormat(new Lucene54DocValuesFormat());
+
+    @Override
+    protected Codec getCodec() {
+        return codec;
+    }
+}

+ 85 - 17
x-pack/qa/repository-old-versions/src/test/java/org/elasticsearch/oldrepos/OldRepositoryAccessIT.java

@@ -25,10 +25,12 @@ import org.elasticsearch.client.RequestOptions;
 import org.elasticsearch.client.RestClient;
 import org.elasticsearch.client.RestHighLevelClient;
 import org.elasticsearch.client.indices.CloseIndexRequest;
+import org.elasticsearch.client.indices.PutMappingRequest;
 import org.elasticsearch.client.searchable_snapshots.MountSnapshotRequest;
 import org.elasticsearch.cluster.SnapshotsInProgress;
 import org.elasticsearch.cluster.health.ClusterHealthStatus;
 import org.elasticsearch.cluster.metadata.IndexMetadata;
+import org.elasticsearch.common.Strings;
 import org.elasticsearch.common.settings.SecureString;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.common.util.concurrent.ThreadContext;
@@ -37,14 +39,20 @@ import org.elasticsearch.core.internal.io.IOUtils;
 import org.elasticsearch.index.query.QueryBuilders;
 import org.elasticsearch.search.SearchHit;
 import org.elasticsearch.search.builder.SearchSourceBuilder;
+import org.elasticsearch.search.sort.SortBuilders;
+import org.elasticsearch.search.sort.SortOrder;
 import org.elasticsearch.snapshots.SnapshotInfo;
 import org.elasticsearch.snapshots.SnapshotState;
 import org.elasticsearch.test.hamcrest.ElasticsearchAssertions;
 import org.elasticsearch.test.rest.ESRestTestCase;
+import org.elasticsearch.xcontent.XContentBuilder;
+import org.elasticsearch.xcontent.XContentFactory;
+import org.elasticsearch.xcontent.json.JsonXContent;
 
 import java.io.IOException;
 import java.util.Arrays;
 import java.util.Collections;
+import java.util.Comparator;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
@@ -90,7 +98,8 @@ public class OldRepositoryAccessIT extends ESRestTestCase {
         );
 
         int oldEsPort = Integer.parseInt(System.getProperty("tests.es.port"));
-        int numDocs = 5;
+        int numDocs = 10;
+        int extraDocs = 1;
         final Set<String> expectedIds = new HashSet<>();
         try (
             RestHighLevelClient client = highLevelClient(adminClient());
@@ -99,12 +108,23 @@ public class OldRepositoryAccessIT extends ESRestTestCase {
             try {
                 Request createIndex = new Request("PUT", "/test");
                 int numberOfShards = randomIntBetween(1, 3);
-                createIndex.setJsonEntity("""
-                    {"settings":{"number_of_shards": %s}}
-                    """.formatted(numberOfShards));
+
+                XContentBuilder settingsBuilder = XContentFactory.jsonBuilder().startObject().startObject("settings");
+                settingsBuilder.field("index.number_of_shards", numberOfShards);
+
+                // 6.5.0 started using soft-deletes, but it was only enabled by default on 7.0
+                if (oldVersion.onOrAfter(Version.fromString("6.5.0"))
+                    && oldVersion.before(Version.fromString("7.0.0"))
+                    && randomBoolean()) {
+                    settingsBuilder.field("index.soft_deletes.enabled", true);
+                }
+
+                settingsBuilder.endObject().endObject();
+
+                createIndex.setJsonEntity(Strings.toString(settingsBuilder));
                 assertOK(oldEs.performRequest(createIndex));
 
-                for (int i = 0; i < numDocs; i++) {
+                for (int i = 0; i < numDocs + extraDocs; i++) {
                     String id = "testdoc" + i;
                     expectedIds.add(id);
                     Request doc = new Request("PUT", "/test/doc/" + id);
@@ -113,6 +133,14 @@ public class OldRepositoryAccessIT extends ESRestTestCase {
                     assertOK(oldEs.performRequest(doc));
                 }
 
+                for (int i = 0; i < extraDocs; i++) {
+                    String id = randomFrom(expectedIds);
+                    expectedIds.remove(id);
+                    Request doc = new Request("DELETE", "/test/doc/" + id);
+                    doc.addParameter("refresh", "true");
+                    oldEs.performRequest(doc);
+                }
+
                 // register repo on old ES and take snapshot
                 Request createRepoRequest = new Request("PUT", "/_snapshot/testrepo");
                 createRepoRequest.setJsonEntity(sourceOnlyRepository ? """
@@ -190,7 +218,7 @@ public class OldRepositoryAccessIT extends ESRestTestCase {
 
                 if (Build.CURRENT.isSnapshot()) {
                     // restore / mount and check whether searches work
-                    restoreMountAndVerify(numDocs, expectedIds, client, numberOfShards);
+                    restoreMountAndVerify(numDocs, expectedIds, client, numberOfShards, sourceOnlyRepository);
 
                     // close indices
                     assertTrue(
@@ -208,7 +236,7 @@ public class OldRepositoryAccessIT extends ESRestTestCase {
                     );
 
                     // restore / mount again
-                    restoreMountAndVerify(numDocs, expectedIds, client, numberOfShards);
+                    restoreMountAndVerify(numDocs, expectedIds, client, numberOfShards, sourceOnlyRepository);
                 }
             } finally {
                 IOUtils.closeWhileHandlingException(
@@ -233,8 +261,13 @@ public class OldRepositoryAccessIT extends ESRestTestCase {
     }
 
     @SuppressWarnings("removal")
-    private void restoreMountAndVerify(int numDocs, Set<String> expectedIds, RestHighLevelClient client, int numberOfShards)
-        throws IOException {
+    private void restoreMountAndVerify(
+        int numDocs,
+        Set<String> expectedIds,
+        RestHighLevelClient client,
+        int numberOfShards,
+        boolean sourceOnlyRepository
+    ) throws IOException {
         // restore index
         RestoreSnapshotResponse restoreSnapshotResponse = client.snapshot()
             .restore(
@@ -259,7 +292,7 @@ public class OldRepositoryAccessIT extends ESRestTestCase {
         );
 
         // run a search against the index
-        assertDocs("restored_test", numDocs, expectedIds, client);
+        assertDocs("restored_test", numDocs, expectedIds, client, sourceOnlyRepository);
 
         // mount as full copy searchable snapshot
         RestoreSnapshotResponse mountSnapshotResponse = client.searchableSnapshots()
@@ -285,7 +318,7 @@ public class OldRepositoryAccessIT extends ESRestTestCase {
         );
 
         // run a search against the index
-        assertDocs("mounted_full_copy_test", numDocs, expectedIds, client);
+        assertDocs("mounted_full_copy_test", numDocs, expectedIds, client, sourceOnlyRepository);
 
         // mount as shared cache searchable snapshot
         mountSnapshotResponse = client.searchableSnapshots()
@@ -300,11 +333,12 @@ public class OldRepositoryAccessIT extends ESRestTestCase {
         assertEquals(numberOfShards, mountSnapshotResponse.getRestoreInfo().successfulShards());
 
         // run a search against the index
-        assertDocs("mounted_shared_cache_test", numDocs, expectedIds, client);
+        assertDocs("mounted_shared_cache_test", numDocs, expectedIds, client, sourceOnlyRepository);
     }
 
     @SuppressWarnings("removal")
-    private void assertDocs(String index, int numDocs, Set<String> expectedIds, RestHighLevelClient client) throws IOException {
+    private void assertDocs(String index, int numDocs, Set<String> expectedIds, RestHighLevelClient client, boolean sourceOnlyRepository)
+        throws IOException {
         // run a search against the index
         SearchResponse searchResponse = client.search(new SearchRequest(index), RequestOptions.DEFAULT);
         logger.info(searchResponse);
@@ -318,21 +352,55 @@ public class OldRepositoryAccessIT extends ESRestTestCase {
         assertTrue(Arrays.stream(searchResponse.getHits().getHits()).allMatch(SearchHit::hasSource));
         // check that correct _source present for each document
         for (SearchHit h : searchResponse.getHits().getHits()) {
-            assertEquals(sourceForDoc(Integer.parseInt(h.getId().substring("testdoc".length()))), h.getSourceAsString());
+            assertEquals(sourceForDoc(getIdAsNumeric(h.getId())), h.getSourceAsString());
         }
 
+        String id = randomFrom(expectedIds);
+        int num = getIdAsNumeric(id);
         // run a search using runtime fields against the index
         searchResponse = client.search(
             new SearchRequest(index).source(
                 SearchSourceBuilder.searchSource()
-                    .query(QueryBuilders.matchQuery("val", 2))
+                    .query(QueryBuilders.matchQuery("val", num))
                     .runtimeMappings(Map.of("val", Map.of("type", "long")))
             ),
             RequestOptions.DEFAULT
         );
         logger.info(searchResponse);
         assertEquals(1, searchResponse.getHits().getTotalHits().value);
-        assertEquals("testdoc2", searchResponse.getHits().getHits()[0].getId());
-        assertEquals(sourceForDoc(2), searchResponse.getHits().getHits()[0].getSourceAsString());
+        assertEquals(id, searchResponse.getHits().getHits()[0].getId());
+        assertEquals(sourceForDoc(num), searchResponse.getHits().getHits()[0].getSourceAsString());
+
+        if (sourceOnlyRepository == false) {
+            // check that doc values can be accessed by (reverse) sorting on numeric val field
+            // first add mapping for field (this will be done automatically in the future)
+            XContentBuilder mappingBuilder = JsonXContent.contentBuilder();
+            mappingBuilder.startObject().startObject("properties").startObject("val");
+            mappingBuilder.field("type", "long");
+            mappingBuilder.endObject().endObject().endObject();
+            assertTrue(
+                client.indices().putMapping(new PutMappingRequest(index).source(mappingBuilder), RequestOptions.DEFAULT).isAcknowledged()
+            );
+
+            // search using reverse sort on val
+            searchResponse = client.search(
+                new SearchRequest(index).source(
+                    SearchSourceBuilder.searchSource()
+                        .query(QueryBuilders.matchAllQuery())
+                        .sort(SortBuilders.fieldSort("val").order(SortOrder.DESC))
+                ),
+                RequestOptions.DEFAULT
+            );
+            logger.info(searchResponse);
+            // check sort order
+            assertEquals(
+                expectedIds.stream().sorted(Comparator.comparingInt(this::getIdAsNumeric).reversed()).collect(Collectors.toList()),
+                Arrays.stream(searchResponse.getHits().getHits()).map(SearchHit::getId).collect(Collectors.toList())
+            );
+        }
+    }
+
+    private int getIdAsNumeric(String id) {
+        return Integer.parseInt(id.substring("testdoc".length()));
     }
 }