瀏覽代碼

Fix PerFieldFormatSupplier mistake (#134008)

Correctly use seqno_field_use_tsdb_doc_values_format feature flag to ensure 
_seq_no doesn't get excluded.

Closes #133875
Closes #133993
Martijn van Groningen 1 月之前
父節點
當前提交
5e7159e32b

+ 0 - 6
muted-tests.yml

@@ -513,12 +513,6 @@ tests:
 - class: org.elasticsearch.xpack.ml.integration.InferenceIT
   method: testInferClassificationModel
   issue: https://github.com/elastic/elasticsearch/issues/133448
-- class: org.elasticsearch.xpack.logsdb.LogsIndexingIT
-  method: testRouteOnSortFields
-  issue: https://github.com/elastic/elasticsearch/issues/133993
-- class: org.elasticsearch.xpack.logsdb.LogsIndexingIT
-  method: testShrink
-  issue: https://github.com/elastic/elasticsearch/issues/133875
 - class: org.elasticsearch.xpack.esql.action.CrossClusterQueryWithPartialResultsIT
   method: testPartialResults
   issue: https://github.com/elastic/elasticsearch/issues/131481

+ 24 - 8
server/src/main/java/org/elasticsearch/index/codec/PerFieldFormatSupplier.java

@@ -27,15 +27,37 @@ import org.elasticsearch.index.mapper.IdFieldMapper;
 import org.elasticsearch.index.mapper.Mapper;
 import org.elasticsearch.index.mapper.MapperService;
 import org.elasticsearch.index.mapper.SeqNoFieldMapper;
+import org.elasticsearch.index.mapper.TimeSeriesIdFieldMapper;
+import org.elasticsearch.index.mapper.TimeSeriesRoutingHashFieldMapper;
 import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper;
 
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.Set;
+
 /**
  * Class that encapsulates the logic of figuring out the most appropriate file format for a given field, across postings, doc values and
  * vectors.
  */
 public class PerFieldFormatSupplier {
 
-    private static final FeatureFlag SEQNO_FIELD_USE_TSDB_DOC_VALUES_FORMAT = new FeatureFlag("seqno_field_use_tsdb_doc_values_format");
+    static final FeatureFlag SEQNO_FIELD_USE_TSDB_DOC_VALUES_FORMAT = new FeatureFlag("seqno_field_use_tsdb_doc_values_format");
+    private static final Set<String> INCLUDE_META_FIELDS;
+
+    static {
+        // TODO: should we just allow all fields to use tsdb doc values codec?
+        // Avoid using tsdb codec for fields like _seq_no, _primary_term.
+        // But _tsid and _ts_routing_hash should always use the tsdb codec.
+        Set<String> includeMetaField = new HashSet<>(3);
+        includeMetaField.add(TimeSeriesIdFieldMapper.NAME);
+        includeMetaField.add(TimeSeriesRoutingHashFieldMapper.NAME);
+        if (SEQNO_FIELD_USE_TSDB_DOC_VALUES_FORMAT.isEnabled()) {
+            includeMetaField.add(SeqNoFieldMapper.NAME);
+        }
+        // Don't the include _recovery_source_size and _recovery_source fields, since their values can be trimmed away in
+        // RecoverySourcePruneMergePolicy, which leads to inconsistencies between merge stats and actual values.
+        INCLUDE_META_FIELDS = Collections.unmodifiableSet(includeMetaField);
+    }
 
     private static final DocValuesFormat docValuesFormat = new Lucene90DocValuesFormat();
     private static final KnnVectorsFormat knnVectorsFormat = new Lucene99HnswVectorsFormat();
@@ -126,13 +148,7 @@ public class PerFieldFormatSupplier {
     }
 
     private boolean excludeFields(String fieldName) {
-        // TODO: should we just allow all fields to use tsdb doc values codec?
-        // Avoid using tsdb codec for fields like _seq_no, _primary_term.
-        // But _tsid and _ts_routing_hash should always use the tsdb codec.
-        return fieldName.startsWith("_")
-            && fieldName.equals("_tsid") == false
-            && fieldName.equals("_ts_routing_hash") == false
-            && (SEQNO_FIELD_USE_TSDB_DOC_VALUES_FORMAT.isEnabled() && fieldName.equals(SeqNoFieldMapper.NAME) == false);
+        return fieldName.startsWith("_") && INCLUDE_META_FIELDS.contains(fieldName) == false;
     }
 
     private boolean isTimeSeriesModeIndex() {

+ 31 - 0
server/src/test/java/org/elasticsearch/index/codec/PerFieldMapperCodecTests.java

@@ -21,6 +21,10 @@ import org.elasticsearch.index.MapperTestUtils;
 import org.elasticsearch.index.codec.bloomfilter.ES87BloomFilterPostingsFormat;
 import org.elasticsearch.index.codec.postings.ES812PostingsFormat;
 import org.elasticsearch.index.mapper.MapperService;
+import org.elasticsearch.index.mapper.SeqNoFieldMapper;
+import org.elasticsearch.index.mapper.SourceFieldMapper;
+import org.elasticsearch.index.mapper.TimeSeriesIdFieldMapper;
+import org.elasticsearch.index.mapper.TimeSeriesRoutingHashFieldMapper;
 import org.elasticsearch.test.ESTestCase;
 
 import java.io.IOException;
@@ -201,6 +205,33 @@ public class PerFieldMapperCodecTests extends ESTestCase {
         assertThat((perFieldMapperCodec.useTSDBDocValuesFormat("response_size")), is(true));
     }
 
+    public void testMetaFields() throws IOException {
+        PerFieldFormatSupplier perFieldMapperCodec = createFormatSupplier(true, IndexMode.LOGSDB, MAPPING_3);
+        assertThat((perFieldMapperCodec.useTSDBDocValuesFormat(TimeSeriesIdFieldMapper.NAME)), is(true));
+        assertThat((perFieldMapperCodec.useTSDBDocValuesFormat(TimeSeriesRoutingHashFieldMapper.NAME)), is(true));
+        // See: PerFieldFormatSupplier why these fields shouldn't use tsdb codec
+        assertThat((perFieldMapperCodec.useTSDBDocValuesFormat(SourceFieldMapper.RECOVERY_SOURCE_NAME)), is(false));
+        assertThat((perFieldMapperCodec.useTSDBDocValuesFormat(SourceFieldMapper.RECOVERY_SOURCE_SIZE_NAME)), is(false));
+    }
+
+    public void testSeqnoField() throws IOException {
+        assumeTrue(
+            "seqno_field_use_tsdb_doc_values_format should be enabled",
+            PerFieldFormatSupplier.SEQNO_FIELD_USE_TSDB_DOC_VALUES_FORMAT.isEnabled()
+        );
+        PerFieldFormatSupplier perFieldMapperCodec = createFormatSupplier(true, IndexMode.LOGSDB, MAPPING_3);
+        assertThat((perFieldMapperCodec.useTSDBDocValuesFormat(SeqNoFieldMapper.NAME)), is(true));
+    }
+
+    public void testSeqnoFieldFeatureFlagDisabled() throws IOException {
+        assumeTrue(
+            "seqno_field_use_tsdb_doc_values_format should be disabled",
+            PerFieldFormatSupplier.SEQNO_FIELD_USE_TSDB_DOC_VALUES_FORMAT.isEnabled() == false
+        );
+        PerFieldFormatSupplier perFieldMapperCodec = createFormatSupplier(true, IndexMode.LOGSDB, MAPPING_3);
+        assertThat((perFieldMapperCodec.useTSDBDocValuesFormat(SeqNoFieldMapper.NAME)), is(false));
+    }
+
     private PerFieldFormatSupplier createFormatSupplier(boolean enableES87TSDBCodec, IndexMode mode, String mapping) throws IOException {
         Settings.Builder settings = Settings.builder();
         settings.put(IndexSettings.MODE.getKey(), mode);