|
@@ -7,99 +7,263 @@
|
|
|
|
|
|
package org.elasticsearch.xpack.esql.expression.function;
|
|
|
|
|
|
+import org.apache.commons.codec.EncoderException;
|
|
|
+import org.apache.commons.codec.net.PercentCodec;
|
|
|
import org.apache.lucene.util.BytesRef;
|
|
|
import org.elasticsearch.common.lucene.BytesRefs;
|
|
|
+import org.elasticsearch.core.Tuple;
|
|
|
import org.elasticsearch.xpack.esql.core.type.DataType;
|
|
|
+import org.elasticsearch.xpack.esql.expression.function.scalar.util.UrlCodecUtils;
|
|
|
|
|
|
-import java.net.URLEncoder;
|
|
|
import java.nio.charset.StandardCharsets;
|
|
|
import java.util.ArrayList;
|
|
|
+import java.util.HashSet;
|
|
|
import java.util.List;
|
|
|
import java.util.Locale;
|
|
|
+import java.util.Set;
|
|
|
import java.util.function.Supplier;
|
|
|
|
|
|
import static org.hamcrest.Matchers.equalTo;
|
|
|
+import static org.hamcrest.Matchers.is;
|
|
|
+import static org.hamcrest.Matchers.nullValue;
|
|
|
|
|
|
public abstract class AbstractUrlEncodeDecodeTestCase extends AbstractScalarFunctionTestCase {
|
|
|
|
|
|
- private record RandomUrl(String plain, String encoded) {}
|
|
|
+ private static final PercentCodec urlEncodeCodec;
|
|
|
+ private static final PercentCodec urlEncodeComponentCodec;
|
|
|
+
|
|
|
+ public enum PercentCodecTestType {
|
|
|
+ ENCODE("UrlEncodeEvaluator[val=Attribute[channel=0]]"),
|
|
|
+ ENCODE_COMPONENT("UrlEncodeComponentEvaluator[val=Attribute[channel=0]]"),
|
|
|
+ DECODE("UrlDecodeEvaluator[val=Attribute[channel=0]]");
|
|
|
+
|
|
|
+ public final String evaluatorToString;
|
|
|
+
|
|
|
+ PercentCodecTestType(String evaluatorToString) {
|
|
|
+ this.evaluatorToString = evaluatorToString;
|
|
|
+ }
|
|
|
+
|
|
|
+ public PercentCodec getCodec() {
|
|
|
+ return switch (this) {
|
|
|
+ case ENCODE -> urlEncodeCodec;
|
|
|
+ case ENCODE_COMPONENT -> urlEncodeComponentCodec;
|
|
|
|
|
|
- public static Iterable<Object[]> createParameters(boolean isEncoderTest) {
|
|
|
- String evaluatorToString = isEncoderTest
|
|
|
- ? "UrlEncodeEvaluator[val=Attribute[channel=0]]"
|
|
|
- : "UrlDecodeEvaluator[val=Attribute[channel=0]]";
|
|
|
+ // Randomized decoder tests apply a random encoder to the input to make it decodable. Fixed bad cases for the decoder skip
|
|
|
+ // this by design, in order to assert undecodable input is handled gracefully.
|
|
|
+ case DECODE -> randomBoolean() ? urlEncodeCodec : urlEncodeComponentCodec;
|
|
|
+ };
|
|
|
+ }
|
|
|
+ }
|
|
|
|
|
|
+ static {
|
|
|
+ // Both codecs percent-encode all characters in the input except for alphanumerics, '-', '.', '_', and '~'. The space character is a
|
|
|
+ // special case, as it can be either percent-encoded or replaced with a '+'.
|
|
|
+ // During testing, the values generated by both encoders are considered as ground truth, so the results of our implementation
|
|
|
+ // must match that.
|
|
|
+
|
|
|
+ // encodes spaces as '+'
|
|
|
+ byte[] b1 = buildUnsafeBytes(Set.of(' '));
|
|
|
+ urlEncodeCodec = new PercentCodec(b1, true);
|
|
|
+
|
|
|
+ // encodes spaces as '%20'
|
|
|
+ byte[] b2 = buildUnsafeBytes(Set.of());
|
|
|
+ urlEncodeComponentCodec = new PercentCodec(b2, false);
|
|
|
+ }
|
|
|
+
|
|
|
+ private record RandomUrl(String plain, String encoded) {}
|
|
|
+
|
|
|
+ public static Iterable<Object[]> createParameters(PercentCodecTestType codecTestType) {
|
|
|
List<TestCaseSupplier> suppliers = new ArrayList<>();
|
|
|
|
|
|
for (DataType dataType : DataType.stringTypes()) {
|
|
|
- Supplier<TestCaseSupplier.TestCase> caseSupplier = () -> createTestCaseWithRandomUrl(
|
|
|
- dataType,
|
|
|
- evaluatorToString,
|
|
|
- isEncoderTest
|
|
|
- );
|
|
|
-
|
|
|
+ // random URL tests
|
|
|
+ Supplier<TestCaseSupplier.TestCase> caseSupplier = () -> createTestCaseWithRandomUrl(dataType, codecTestType);
|
|
|
suppliers.add(new TestCaseSupplier(List.of(dataType), caseSupplier));
|
|
|
|
|
|
+ // random strings tests
|
|
|
for (TestCaseSupplier.TypedDataSupplier supplier : TestCaseSupplier.stringCases(dataType)) {
|
|
|
TestCaseSupplier testCaseSupplier = new TestCaseSupplier(
|
|
|
supplier.name(),
|
|
|
List.of(supplier.type()),
|
|
|
- () -> createTestCaseWithRandomString(dataType, evaluatorToString, isEncoderTest, supplier)
|
|
|
+ () -> createTestCaseWithRandomString(dataType, codecTestType, supplier)
|
|
|
);
|
|
|
suppliers.add(testCaseSupplier);
|
|
|
}
|
|
|
+
|
|
|
+ // fixed input tests
|
|
|
+ String[] fixedInputs = new String[] {
|
|
|
+ // all safe chars plus a space
|
|
|
+ "foo bar",
|
|
|
+
|
|
|
+ // unicode: right-to-left override (U+202E), math symbols, etc.
|
|
|
+ "ab \u202E cd \u202E ef sigma:\u2211 delta:\u2206 tunes:\u266B radioactive:\u2622 hourglass:\u23F3",
|
|
|
+
|
|
|
+ // safe and unsafe chars
|
|
|
+ "!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~",
|
|
|
+
|
|
|
+ // all ASCII chars
|
|
|
+ new String(allAsciiChars(), StandardCharsets.UTF_8) };
|
|
|
+
|
|
|
+ for (String input : fixedInputs) {
|
|
|
+ suppliers.add(createFixedTestCase(dataType, input, codecTestType));
|
|
|
+ }
|
|
|
+
|
|
|
+ if (codecTestType == PercentCodecTestType.DECODE) {
|
|
|
+ // bad inputs for decoder tests aren't encoded first (as they wouldn't be bad then), but are expected to be handled
|
|
|
+ // gracefully by the decoder.
|
|
|
+
|
|
|
+ List<Tuple<String, String>> tuples = List.of(
|
|
|
+ // incomplete sequence
|
|
|
+ Tuple.tuple("%1", "Line 1:1: java.lang.IllegalArgumentException: URLDecoder: Incomplete trailing escape (%) pattern"),
|
|
|
+
|
|
|
+ // missing sequence
|
|
|
+ Tuple.tuple("%", "Line 1:1: java.lang.IllegalArgumentException: URLDecoder: Incomplete trailing escape (%) pattern"),
|
|
|
+
|
|
|
+ // invalid hex digits
|
|
|
+ Tuple.tuple(
|
|
|
+ "%xy",
|
|
|
+ "Line 1:1: java.lang.IllegalArgumentException: URLDecoder: Illegal hex characters in escape (%) pattern - "
|
|
|
+ + "not a hexadecimal digit: \"x\" = 120"
|
|
|
+ ),
|
|
|
+
|
|
|
+ // valid and invalid sequences
|
|
|
+ Tuple.tuple(
|
|
|
+ "foo+bar%20qux%mn",
|
|
|
+ "Line 1:1: java.lang.IllegalArgumentException: URLDecoder: Illegal hex characters in escape (%) pattern - "
|
|
|
+ + "not a hexadecimal digit: \"m\" = 109"
|
|
|
+ )
|
|
|
+ );
|
|
|
+
|
|
|
+ for (Tuple<String, String> t : tuples) {
|
|
|
+ String undecodableInput = t.v1();
|
|
|
+ String expectedErrorMessage = t.v2();
|
|
|
+ suppliers.add(createBadDecoderTestCase(dataType, undecodableInput, expectedErrorMessage));
|
|
|
+ }
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
return parameterSuppliersFromTypedDataWithDefaultChecksNoErrors(false, suppliers);
|
|
|
|
|
|
}
|
|
|
|
|
|
- public static TestCaseSupplier.TestCase createTestCaseWithRandomUrl(
|
|
|
- DataType dataType,
|
|
|
- String evaluatorToString,
|
|
|
- boolean isEncoderTest
|
|
|
- ) {
|
|
|
- RandomUrl url = generateRandomUrl();
|
|
|
+ public static TestCaseSupplier.TestCase createTestCaseWithRandomUrl(DataType dataType, PercentCodecTestType codecTestType) {
|
|
|
+ boolean isEncoderTest = (codecTestType != PercentCodecTestType.DECODE);
|
|
|
+ RandomUrl url = generateRandomUrl(codecTestType);
|
|
|
BytesRef input = new BytesRef(isEncoderTest ? url.plain() : url.encoded());
|
|
|
BytesRef output = new BytesRef(isEncoderTest ? url.encoded() : url.plain());
|
|
|
TestCaseSupplier.TypedData fieldTypedData = new TestCaseSupplier.TypedData(input, dataType, "string");
|
|
|
|
|
|
- return new TestCaseSupplier.TestCase(List.of(fieldTypedData), evaluatorToString, dataType, equalTo(output));
|
|
|
+ return new TestCaseSupplier.TestCase(List.of(fieldTypedData), codecTestType.evaluatorToString, dataType, equalTo(output));
|
|
|
}
|
|
|
|
|
|
public static TestCaseSupplier.TestCase createTestCaseWithRandomString(
|
|
|
DataType dataType,
|
|
|
- String evaluatorToString,
|
|
|
- boolean isEncoderTest,
|
|
|
+ PercentCodecTestType codecTestType,
|
|
|
TestCaseSupplier.TypedDataSupplier supplier
|
|
|
) {
|
|
|
+ boolean isEncoderTest = (codecTestType != PercentCodecTestType.DECODE);
|
|
|
TestCaseSupplier.TypedData fieldTypedData = supplier.get();
|
|
|
String plain = BytesRefs.toBytesRef(fieldTypedData.data()).utf8ToString();
|
|
|
- String encoded = encode(plain);
|
|
|
+ String encoded = encode(plain, codecTestType);
|
|
|
BytesRef input = new BytesRef(isEncoderTest ? plain : encoded);
|
|
|
BytesRef output = new BytesRef(isEncoderTest ? encoded : plain);
|
|
|
|
|
|
return new TestCaseSupplier.TestCase(
|
|
|
List.of(new TestCaseSupplier.TypedData(input, dataType, "string")),
|
|
|
- evaluatorToString,
|
|
|
+ codecTestType.evaluatorToString,
|
|
|
dataType,
|
|
|
equalTo(output)
|
|
|
);
|
|
|
}
|
|
|
|
|
|
- private static RandomUrl generateRandomUrl() {
|
|
|
+ private static RandomUrl generateRandomUrl(PercentCodecTestType codecTestType) {
|
|
|
String protocol = randomFrom("http://", "https://", "");
|
|
|
String domain = String.format(Locale.ROOT, "%s.com", randomAlphaOfLengthBetween(3, 10));
|
|
|
String path = randomFrom("", "/" + randomAlphanumericOfLength(5) + "/");
|
|
|
String query = randomFrom("", "?" + randomAlphaOfLength(5) + "=" + randomAlphanumericOfLength(5));
|
|
|
+ String space = " "; // ensure the correct encoding for space (+ or %20)
|
|
|
|
|
|
- String plain = String.format(Locale.ROOT, "%s%s%s%s", protocol, domain, path, query);
|
|
|
- String encoded = encode(plain);
|
|
|
+ String plain = String.format(Locale.ROOT, "%s%s%s%s%s", protocol, domain, path, query, space);
|
|
|
+ String encoded = encode(plain, codecTestType);
|
|
|
|
|
|
return new RandomUrl(plain, encoded);
|
|
|
}
|
|
|
|
|
|
- private static String encode(String plain) {
|
|
|
- return URLEncoder.encode(plain, StandardCharsets.UTF_8);
|
|
|
+ private static String encode(String plain, PercentCodecTestType codecTestType) {
|
|
|
+ byte[] plainBytes = plain.getBytes(StandardCharsets.UTF_8);
|
|
|
+ byte[] encoded = null;
|
|
|
+
|
|
|
+ try {
|
|
|
+ encoded = codecTestType.getCodec().encode(plainBytes);
|
|
|
+ } catch (EncoderException ex) {
|
|
|
+ // Checked exception isn't really thrown, but we must handle it given the signature of PercentCodec.encode().
|
|
|
+ throw new RuntimeException(ex);
|
|
|
+ }
|
|
|
+
|
|
|
+ return new String(encoded, StandardCharsets.UTF_8);
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Builds the list of individual ASCII bytes that are considered unsafe; must always be percent-encoded. Bytes outside the
|
|
|
+ * ASCII range are always percent-encoded by the codecs are don't need to be included in our list.
|
|
|
+ *
|
|
|
+ * @param additionallySafe
|
|
|
+ * @return unsafe ASCII chars
|
|
|
+ */
|
|
|
+ private static byte[] buildUnsafeBytes(final Set<Character> additionallySafe) {
|
|
|
+ Set<Byte> unsafe = new HashSet<>();
|
|
|
+
|
|
|
+ for (int i = 0; i <= Byte.MAX_VALUE; ++i) {
|
|
|
+ char c = (char) i;
|
|
|
+ if (additionallySafe.contains(c) == false && UrlCodecUtils.isRfc3986Safe(c) == false) {
|
|
|
+ unsafe.add((byte) i);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ byte[] bytes = new byte[unsafe.size()];
|
|
|
+
|
|
|
+ int i = 0;
|
|
|
+ for (byte b : unsafe) {
|
|
|
+ bytes[i++] = b;
|
|
|
+ }
|
|
|
+
|
|
|
+ return bytes;
|
|
|
+ }
|
|
|
+
|
|
|
+ private static TestCaseSupplier createFixedTestCase(DataType dataType, String plain, PercentCodecTestType codecTestType) {
|
|
|
+ return new TestCaseSupplier(List.of(dataType), () -> {
|
|
|
+ boolean isEncoderTest = (codecTestType != PercentCodecTestType.DECODE);
|
|
|
+ String encoded = encode(plain, codecTestType);
|
|
|
+ String input = (isEncoderTest) ? plain : encoded;
|
|
|
+ String output = isEncoderTest ? encoded : plain;
|
|
|
+
|
|
|
+ return new TestCaseSupplier.TestCase(
|
|
|
+ List.of(new TestCaseSupplier.TypedData(new BytesRef(input), dataType, "string")),
|
|
|
+ codecTestType.evaluatorToString,
|
|
|
+ dataType,
|
|
|
+ equalTo(new BytesRef(output))
|
|
|
+ );
|
|
|
+ });
|
|
|
+ }
|
|
|
+
|
|
|
+ private static TestCaseSupplier createBadDecoderTestCase(DataType dataType, String undecodable, String exceptionMessage) {
|
|
|
+ return new TestCaseSupplier(
|
|
|
+ List.of(dataType),
|
|
|
+ () -> new TestCaseSupplier.TestCase(
|
|
|
+ List.of(new TestCaseSupplier.TypedData(new BytesRef(undecodable), dataType, "string")),
|
|
|
+ PercentCodecTestType.DECODE.evaluatorToString,
|
|
|
+ dataType,
|
|
|
+ is(nullValue())
|
|
|
+ ).withWarning("Line 1:1: evaluation of [source] failed, treating result as null. Only first 20 failures recorded.")
|
|
|
+ .withWarning(exceptionMessage)
|
|
|
+ );
|
|
|
+ }
|
|
|
+
|
|
|
+ private static byte[] allAsciiChars() {
|
|
|
+ byte[] bytes = new byte[Byte.MAX_VALUE + 1];
|
|
|
+ for (int i = 0; i < bytes.length; ++i) {
|
|
|
+ bytes[i] = (byte) i;
|
|
|
+ }
|
|
|
+ return bytes;
|
|
|
}
|
|
|
}
|