فهرست منبع

Remove unnecessary optimizations for TermsSetQueryBuilder (#67637)

Relates #67223
gf2121 4 سال پیش
والد
کامیت
7063d00263

+ 13 - 100
server/src/main/java/org/elasticsearch/index/query/TermsSetQueryBuilder.java

@@ -29,12 +29,8 @@ import org.apache.lucene.search.LongValues;
 import org.apache.lucene.search.LongValuesSource;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.TermQuery;
-import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.BytesRefBuilder;
 import org.elasticsearch.common.ParseField;
 import org.elasticsearch.common.ParsingException;
-import org.elasticsearch.common.bytes.BytesReference;
-import org.elasticsearch.common.io.stream.BytesStreamOutput;
 import org.elasticsearch.common.io.stream.StreamInput;
 import org.elasticsearch.common.io.stream.StreamOutput;
 import org.elasticsearch.common.lucene.BytesRefs;
@@ -47,17 +43,12 @@ import org.elasticsearch.script.Script;
 import org.elasticsearch.script.TermsSetQueryScript;
 
 import java.io.IOException;
-import java.nio.CharBuffer;
 import java.util.AbstractList;
 import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collections;
 import java.util.HashMap;
-import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Objects;
-import java.util.Set;
 import java.util.stream.Collectors;
 
 public final class TermsSetQueryBuilder extends AbstractQueryBuilder<TermsSetQueryBuilder> {
@@ -75,8 +66,17 @@ public final class TermsSetQueryBuilder extends AbstractQueryBuilder<TermsSetQue
     private Script minimumShouldMatchScript;
 
     public TermsSetQueryBuilder(String fieldName, List<?> values) {
+        this(fieldName, values, true);
+    }
+
+    private TermsSetQueryBuilder(String fieldName, List<?> values, boolean convert) {
         this.fieldName = Objects.requireNonNull(fieldName);
-        this.values = convert(Objects.requireNonNull(values));
+        Objects.requireNonNull(values);
+        if (convert) {
+            this.values = values.stream().map(AbstractQueryBuilder::maybeConvertToBytesRef).collect(Collectors.toList());
+        } else {
+            this.values = values;
+        }
     }
 
     public TermsSetQueryBuilder(StreamInput in) throws IOException {
@@ -220,7 +220,7 @@ public final class TermsSetQueryBuilder extends AbstractQueryBuilder<TermsSetQue
             throw new ParsingException(parser.getTokenLocation(), "[" + NAME + "] unknown token [" + token + "]");
         }
 
-        TermsSetQueryBuilder queryBuilder = new TermsSetQueryBuilder(fieldName, values)
+        TermsSetQueryBuilder queryBuilder = new TermsSetQueryBuilder(fieldName, values, false)
                 .queryName(queryName).boost(boost);
         if (minimumShouldMatchField != null) {
             queryBuilder.setMinimumShouldMatchField(minimumShouldMatchField);
@@ -353,19 +353,10 @@ public final class TermsSetQueryBuilder extends AbstractQueryBuilder<TermsSetQue
 
     }
 
-    //TODO: remove these optimizations in a follow up (https://github.com/elastic/elasticsearch/pull/67223)
-    private static final Set<Class<? extends Number>> INTEGER_TYPES = new HashSet<>(
-        Arrays.asList(Byte.class, Short.class, Integer.class, Long.class));
-    private static final Set<Class<?>> STRING_TYPES = new HashSet<>(
-        Arrays.asList(BytesRef.class, String.class));
-
     /**
      * Convert the internal {@link List} of values back to a user-friendly list.
-     * Integers are kept as-is since the terms query does not make any difference
-     * between {@link Integer}s and {@link Long}s, but {@link BytesRef}s are
-     * converted back to {@link String}s.
      */
-    static List<Object> convertBack(List<?> list) {
+    private static List<Object> convertBack(List<?> list) {
         return new AbstractList<Object>() {
             @Override
             public int size() {
@@ -373,89 +364,11 @@ public final class TermsSetQueryBuilder extends AbstractQueryBuilder<TermsSetQue
             }
             @Override
             public Object get(int index) {
-                Object o = list.get(index);
-                if (o instanceof BytesRef) {
-                    o = ((BytesRef) o).utf8ToString();
-                }
-                // we do not convert longs, all integer types are equivalent
-                // as far as this query is concerned
-                return o;
+                return maybeConvertToString(list.get(index));
             }
         };
     }
 
-    /**
-     * Convert the list in a way that optimizes storage in the case that all
-     * elements are either integers or {@link String}s/{@link BytesRef}/
-     * {@link CharBuffer}s. This is useful to help garbage collections for
-     * use-cases that involve sending very large terms queries to Elasticsearch.
-     * If the list does not only contain integers or {@link String}s, then a
-     * list is returned where all {@link String}/{@link CharBuffer}s have been
-     * replaced with {@link BytesRef}s.
-     */
-    static List<?> convert(List<?> list) {
-        if (list.isEmpty()) {
-            return Collections.emptyList();
-        }
-
-        final boolean allNumbers = list.stream().allMatch(o -> o != null && INTEGER_TYPES.contains(o.getClass()));
-        if (allNumbers) {
-            final long[] elements = list.stream().mapToLong(o -> ((Number) o).longValue()).toArray();
-            return new AbstractList<Object>() {
-                @Override
-                public Object get(int index) {
-                    return elements[index];
-                }
-                @Override
-                public int size() {
-                    return elements.length;
-                }
-            };
-        }
-
-        final boolean allStrings = list.stream().allMatch(o -> o != null && STRING_TYPES.contains(o.getClass()));
-        if (allStrings) {
-            final BytesRefBuilder builder = new BytesRefBuilder();
-            try (BytesStreamOutput bytesOut = new BytesStreamOutput()) {
-                final int[] endOffsets = new int[list.size()];
-                int i = 0;
-                for (Object o : list) {
-                    BytesRef b;
-                    if (o instanceof BytesRef) {
-                        b = (BytesRef) o;
-                    } else if (o instanceof CharBuffer) {
-                        b = new BytesRef((CharBuffer) o);
-                    } else {
-                        builder.copyChars(o.toString());
-                        b = builder.get();
-                    }
-                    bytesOut.writeBytes(b.bytes, b.offset, b.length);
-                    if (i == 0) {
-                        endOffsets[0] = b.length;
-                    } else {
-                        endOffsets[i] = Math.addExact(endOffsets[i-1], b.length);
-                    }
-                    ++i;
-                }
-                final BytesReference bytes = bytesOut.bytes();
-                return new AbstractList<Object>() {
-                    @Override
-                    public Object get(int i) {
-                        final int startOffset = i == 0 ? 0 : endOffsets[i-1];
-                        final int endOffset = endOffsets[i];
-                        return bytes.slice(startOffset, endOffset - startOffset).toBytesRef();
-                    }
-                    @Override
-                    public int size() {
-                        return endOffsets.length;
-                    }
-                };
-            }
-        }
-
-        return list.stream().map(o -> o instanceof String ? new BytesRef(o.toString()) : o).collect(Collectors.toList());
-    }
-
     // Forked from LongValuesSource.FieldValuesSource and changed getValues() method to always use sorted numeric
     // doc values, because that is what is being used in NumberFieldMapper.
     static class FieldValuesSource extends LongValuesSource {

+ 0 - 23
server/src/test/java/org/elasticsearch/index/query/TermsSetQueryBuilderTests.java

@@ -38,7 +38,6 @@ import org.apache.lucene.search.SortField;
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.TopDocs;
 import org.apache.lucene.store.Directory;
-import org.apache.lucene.util.BytesRef;
 import org.elasticsearch.action.admin.indices.mapping.put.PutMappingRequest;
 import org.elasticsearch.common.Strings;
 import org.elasticsearch.common.compress.CompressedXContent;
@@ -333,27 +332,5 @@ public class TermsSetQueryBuilderTests extends AbstractQueryTestCase<TermsSetQue
         }
     }
 
-    public void testConversion() {
-        List<Object> list = Arrays.asList();
-        assertSame(Collections.emptyList(), TermsSetQueryBuilder.convert(list));
-        assertEquals(list, TermsSetQueryBuilder.convertBack(TermsSetQueryBuilder.convert(list)));
-
-        list = Arrays.asList("abc");
-        assertEquals(Arrays.asList(new BytesRef("abc")), TermsSetQueryBuilder.convert(list));
-        assertEquals(list, TermsSetQueryBuilder.convertBack(TermsSetQueryBuilder.convert(list)));
-
-        list = Arrays.asList("abc", new BytesRef("def"));
-        assertEquals(Arrays.asList(new BytesRef("abc"), new BytesRef("def")), TermsSetQueryBuilder.convert(list));
-        assertEquals(Arrays.asList("abc", "def"), TermsSetQueryBuilder.convertBack(TermsSetQueryBuilder.convert(list)));
-
-        list = Arrays.asList(5, 42L);
-        assertEquals(Arrays.asList(5L, 42L), TermsSetQueryBuilder.convert(list));
-        assertEquals(Arrays.asList(5L, 42L), TermsSetQueryBuilder.convertBack(TermsSetQueryBuilder.convert(list)));
-
-        list = Arrays.asList(5, 42d);
-        assertEquals(Arrays.asList(5, 42d), TermsSetQueryBuilder.convert(list));
-        assertEquals(Arrays.asList(5, 42d), TermsSetQueryBuilder.convertBack(TermsSetQueryBuilder.convert(list)));
-    }
-
 }