|
|
@@ -7,7 +7,6 @@
|
|
|
|
|
|
package org.elasticsearch.xpack.lucene.bwc.codecs;
|
|
|
|
|
|
-import org.apache.lucene.backward_codecs.lucene70.Lucene70Codec;
|
|
|
import org.apache.lucene.codecs.Codec;
|
|
|
import org.apache.lucene.codecs.FieldInfosFormat;
|
|
|
import org.apache.lucene.codecs.FieldsConsumer;
|
|
|
@@ -18,6 +17,7 @@ import org.apache.lucene.codecs.NormsProducer;
|
|
|
import org.apache.lucene.codecs.PostingsFormat;
|
|
|
import org.apache.lucene.codecs.SegmentInfoFormat;
|
|
|
import org.apache.lucene.codecs.TermVectorsFormat;
|
|
|
+import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
|
|
|
import org.apache.lucene.index.FieldInfo;
|
|
|
import org.apache.lucene.index.FieldInfos;
|
|
|
import org.apache.lucene.index.Fields;
|
|
|
@@ -27,7 +27,12 @@ import org.apache.lucene.index.SegmentWriteState;
|
|
|
import org.apache.lucene.index.Terms;
|
|
|
import org.apache.lucene.store.Directory;
|
|
|
import org.apache.lucene.store.IOContext;
|
|
|
+import org.apache.lucene.util.Version;
|
|
|
import org.elasticsearch.xpack.lucene.bwc.codecs.lucene70.BWCLucene70Codec;
|
|
|
+import org.elasticsearch.xpack.lucene.bwc.codecs.lucene80.BWCLucene80Codec;
|
|
|
+import org.elasticsearch.xpack.lucene.bwc.codecs.lucene84.BWCLucene84Codec;
|
|
|
+import org.elasticsearch.xpack.lucene.bwc.codecs.lucene86.BWCLucene86Codec;
|
|
|
+import org.elasticsearch.xpack.lucene.bwc.codecs.lucene87.BWCLucene87Codec;
|
|
|
|
|
|
import java.io.IOException;
|
|
|
import java.util.ArrayList;
|
|
|
@@ -39,55 +44,122 @@ import java.util.List;
|
|
|
*/
|
|
|
public abstract class BWCCodec extends Codec {
|
|
|
|
|
|
+ private final FieldInfosFormat fieldInfosFormat;
|
|
|
+ private final SegmentInfoFormat segmentInfosFormat;
|
|
|
+ private final PostingsFormat postingsFormat;
|
|
|
+
|
|
|
protected BWCCodec(String name) {
|
|
|
super(name);
|
|
|
- }
|
|
|
|
|
|
- @Override
|
|
|
- public NormsFormat normsFormat() {
|
|
|
- throw new UnsupportedOperationException();
|
|
|
- }
|
|
|
+ this.fieldInfosFormat = new FieldInfosFormat() {
|
|
|
+ final FieldInfosFormat wrappedFormat = originalFieldInfosFormat();
|
|
|
|
|
|
- @Override
|
|
|
- public TermVectorsFormat termVectorsFormat() {
|
|
|
- throw new UnsupportedOperationException();
|
|
|
- }
|
|
|
+ @Override
|
|
|
+ public FieldInfos read(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, IOContext iocontext)
|
|
|
+ throws IOException {
|
|
|
+ return filterFields(wrappedFormat.read(directory, segmentInfo, segmentSuffix, iocontext));
|
|
|
+ }
|
|
|
|
|
|
- @Override
|
|
|
- public KnnVectorsFormat knnVectorsFormat() {
|
|
|
- throw new UnsupportedOperationException();
|
|
|
- }
|
|
|
+ @Override
|
|
|
+ public void write(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, FieldInfos infos, IOContext context)
|
|
|
+ throws IOException {
|
|
|
+ wrappedFormat.write(directory, segmentInfo, segmentSuffix, infos, context);
|
|
|
+ }
|
|
|
+ };
|
|
|
+
|
|
|
+ this.segmentInfosFormat = new SegmentInfoFormat() {
|
|
|
+ final SegmentInfoFormat wrappedFormat = originalSegmentInfoFormat();
|
|
|
|
|
|
- protected static SegmentInfoFormat wrap(SegmentInfoFormat wrapped) {
|
|
|
- return new SegmentInfoFormat() {
|
|
|
@Override
|
|
|
public SegmentInfo read(Directory directory, String segmentName, byte[] segmentID, IOContext context) throws IOException {
|
|
|
- return wrap(wrapped.read(directory, segmentName, segmentID, context));
|
|
|
+ return wrap(wrappedFormat.read(directory, segmentName, segmentID, context));
|
|
|
}
|
|
|
|
|
|
@Override
|
|
|
public void write(Directory dir, SegmentInfo info, IOContext ioContext) throws IOException {
|
|
|
- wrapped.write(dir, info, ioContext);
|
|
|
+ wrappedFormat.write(dir, info, ioContext);
|
|
|
}
|
|
|
};
|
|
|
- }
|
|
|
|
|
|
- protected static FieldInfosFormat wrap(FieldInfosFormat wrapped) {
|
|
|
- return new FieldInfosFormat() {
|
|
|
+ this.postingsFormat = new PerFieldPostingsFormat() {
|
|
|
@Override
|
|
|
- public FieldInfos read(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, IOContext iocontext)
|
|
|
- throws IOException {
|
|
|
- return filterFields(wrapped.read(directory, segmentInfo, segmentSuffix, iocontext));
|
|
|
- }
|
|
|
-
|
|
|
- @Override
|
|
|
- public void write(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, FieldInfos infos, IOContext context)
|
|
|
- throws IOException {
|
|
|
- wrapped.write(directory, segmentInfo, segmentSuffix, infos, context);
|
|
|
+ public PostingsFormat getPostingsFormatForField(String field) {
|
|
|
+ throw new UnsupportedOperationException("Old codecs can't be used for writing");
|
|
|
}
|
|
|
};
|
|
|
}
|
|
|
|
|
|
+ @Override
|
|
|
+ public final FieldInfosFormat fieldInfosFormat() {
|
|
|
+ return fieldInfosFormat;
|
|
|
+ }
|
|
|
+
|
|
|
+ @Override
|
|
|
+ public final SegmentInfoFormat segmentInfoFormat() {
|
|
|
+ return segmentInfosFormat;
|
|
|
+ }
|
|
|
+
|
|
|
+ @Override
|
|
|
+ public PostingsFormat postingsFormat() {
|
|
|
+ return postingsFormat;
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * This method is not supported for archive indices and older codecs and will always throw an {@link UnsupportedOperationException}.
|
|
|
+ * This method is never called in practice, as we rewrite field infos to override the info about which features are present in
|
|
|
+ * the index. Even if norms are present, field info lies about it.
|
|
|
+ *
|
|
|
+ * @return nothing, as this method always throws an exception
|
|
|
+ * @throws UnsupportedOperationException always thrown to indicate that this method is not supported
|
|
|
+ */
|
|
|
+ @Override
|
|
|
+ public final NormsFormat normsFormat() {
|
|
|
+ throw new UnsupportedOperationException();
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * This method is not supported for archive indices and older codecs and will always throw an {@link UnsupportedOperationException}.
|
|
|
+ * This method is never called in practice, as we rewrite field infos to override the info about which features are present in
|
|
|
+ * the index. Even if term vectors are present, field info lies about it.
|
|
|
+ *
|
|
|
+ * @return nothing, as this method always throws an exception
|
|
|
+ * @throws UnsupportedOperationException always thrown to indicate that this method is not supported
|
|
|
+ */
|
|
|
+ @Override
|
|
|
+ public final TermVectorsFormat termVectorsFormat() {
|
|
|
+ throw new UnsupportedOperationException();
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * This method is not supported for archive indices and older codecs and will always throw an {@link UnsupportedOperationException}.
|
|
|
+ * The knn vectors can't be present because it is not supported yet in any of the lucene versions that we support for archive indices.
|
|
|
+ *
|
|
|
+ * @return nothing, as this method always throws an exception
|
|
|
+ * @throws UnsupportedOperationException always thrown to indicate that this method is not supported
|
|
|
+ */
|
|
|
+ @Override
|
|
|
+ public final KnnVectorsFormat knnVectorsFormat() {
|
|
|
+ throw new UnsupportedOperationException();
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Returns the original {@link SegmentInfoFormat} used by this codec.
|
|
|
+ * This method should be implemented by subclasses to provide the specific
|
|
|
+ * {@link SegmentInfoFormat} that this codec is intended to use.
|
|
|
+ *
|
|
|
+ * @return the original {@link SegmentInfoFormat} used by this codec
|
|
|
+ */
|
|
|
+ protected abstract SegmentInfoFormat originalSegmentInfoFormat();
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Returns the original {@link FieldInfosFormat} used by this codec.
|
|
|
+ * This method should be implemented by subclasses to provide the specific
|
|
|
+ * {@link FieldInfosFormat} that this codec is intended to use.
|
|
|
+ *
|
|
|
+ * @return the original {@link FieldInfosFormat} used by this codec
|
|
|
+ */
|
|
|
+ protected abstract FieldInfosFormat originalFieldInfosFormat();
|
|
|
+
|
|
|
// mark all fields as no term vectors, no norms, no payloads, and no vectors.
|
|
|
private static FieldInfos filterFields(FieldInfos fieldInfos) {
|
|
|
List<FieldInfo> fieldInfoCopy = new ArrayList<>(fieldInfos.size());
|
|
|
@@ -119,15 +191,14 @@ public abstract class BWCCodec extends Codec {
|
|
|
}
|
|
|
|
|
|
public static SegmentInfo wrap(SegmentInfo segmentInfo) {
|
|
|
- // special handling for Lucene70Codec (which is currently bundled with Lucene)
|
|
|
- // Use BWCLucene70Codec instead as that one extends BWCCodec (similar to all other older codecs)
|
|
|
- final Codec codec = segmentInfo.getCodec() instanceof Lucene70Codec ? new BWCLucene70Codec() : segmentInfo.getCodec();
|
|
|
+ Codec codec = getBackwardCompatibleCodec(segmentInfo.getCodec());
|
|
|
+
|
|
|
final SegmentInfo segmentInfo1 = new SegmentInfo(
|
|
|
segmentInfo.dir,
|
|
|
// Use Version.LATEST instead of original version, otherwise SegmentCommitInfo will bark when processing (N-1 limitation)
|
|
|
// TODO: perhaps store the original version information in attributes so that we can retrieve it later when needed?
|
|
|
- org.apache.lucene.util.Version.LATEST,
|
|
|
- org.apache.lucene.util.Version.LATEST,
|
|
|
+ Version.LATEST,
|
|
|
+ Version.LATEST,
|
|
|
segmentInfo.name,
|
|
|
segmentInfo.maxDoc(),
|
|
|
segmentInfo.getUseCompoundFile(),
|
|
|
@@ -142,6 +213,28 @@ public abstract class BWCCodec extends Codec {
|
|
|
return segmentInfo1;
|
|
|
}
|
|
|
|
|
|
+ /**
|
|
|
+ * Returns a backward-compatible codec for the given codec. If the codec is one of the known Lucene 8.x codecs,
|
|
|
+ * it returns a corresponding read-only backward-compatible codec. Otherwise, it returns the original codec.
|
|
|
+ * Lucene 8.x codecs are still shipped with the current version of Lucene.
|
|
|
+ * Earlier codecs we are providing directly they will also be read-only backward-compatible, but they don't require the renaming.
|
|
|
+ *
|
|
|
+ * This switch is only for indices created in ES 6.x, later written into in ES 7.x (Lucene 8.x). Indices created
|
|
|
+ * in ES 7.x can be read directly by ES if marked read-only, without going through archive indices.
|
|
|
+ */
|
|
|
+ private static Codec getBackwardCompatibleCodec(Codec codec) {
|
|
|
+ if (codec == null) return null;
|
|
|
+
|
|
|
+ return switch (codec.getClass().getSimpleName()) {
|
|
|
+ case "Lucene70Codec" -> new BWCLucene70Codec();
|
|
|
+ case "Lucene80Codec" -> new BWCLucene80Codec();
|
|
|
+ case "Lucene84Codec" -> new BWCLucene84Codec();
|
|
|
+ case "Lucene86Codec" -> new BWCLucene86Codec();
|
|
|
+ case "Lucene87Codec" -> new BWCLucene87Codec();
|
|
|
+ default -> codec;
|
|
|
+ };
|
|
|
+ }
|
|
|
+
|
|
|
/**
|
|
|
* In-memory postings format that shows no postings available.
|
|
|
*/
|