Browse Source

Painless: add "".replaceAll and "".replaceFirst

These are useful methods in groovy that give you control over
the replacements used:
```
'the quick brown fox'.replaceAll(/[aeiou]/,
		m -> m.group().toUpperCase(Locale.ROOT))
```
Nik Everett 9 years ago
parent
commit
67bfecc070

+ 67 - 14
docs/reference/modules/scripting/painless.asciidoc

@@ -237,8 +237,8 @@ POST hockey/player/_update_by_query
 ----------------------------------------------------------------
 // CONSOLE
 
-Or you can use the `Pattern.matcher` directory to get a `Matcher` instance and
-remove all of the vowels in all of their names:
+You can use the `Pattern.matcher` directly to get a `Matcher` instance and
+remove all of the vowels in all of their last names:
 
 [source,js]
 ----------------------------------------------------------------
@@ -252,6 +252,59 @@ POST hockey/player/_update_by_query
 ----------------------------------------------------------------
 // CONSOLE
 
+`Matcher.replaceAll` is just a call to Java's `Matcher`'s
+http://docs.oracle.com/javase/8/docs/api/java/util/regex/Matcher.html#replaceAll-java.lang.String-[replaceAll]
+method so it supports `$1` and `\1` for replacements:
+
+[source,js]
+----------------------------------------------------------------
+POST hockey/player/_update_by_query
+{
+  "script": {
+    "lang": "painless",
+    "inline": "ctx._source.last = /n([aeiou])/.matcher(ctx._source.last).replaceAll('$1')"
+  }
+}
+----------------------------------------------------------------
+// CONSOLE
+
+If you need more control over replacements you can call `replaceAll` on a
+`CharSequence` with a `Function<Matcher, String>` that builds the replacement.
+This does not support `$1` or `\1` to access replacements because you already
+have a reference to the matcher and can get them with `m.group(1)`.
+
+IMPORTANT: Calling `Matcher.find` inside of the function that builds the
+replacement is rude and will likely break the replacement process.
+
+This will make all of the vowels in the hockey player's last names upper case:
+
+[source,js]
+----------------------------------------------------------------
+POST hockey/player/_update_by_query
+{
+  "script": {
+    "lang": "painless",
+    "inline": "ctx._source.last = ctx._source.last.replaceAll(/[aeiou]/, m -> m.group().toUpperCase(Locale.ROOT))"
+  }
+}
+----------------------------------------------------------------
+// CONSOLE
+
+Or you can use the `CharSequence.replaceFirst` to make the first vowel in their
+last names upper case:
+
+[source,js]
+----------------------------------------------------------------
+POST hockey/player/_update_by_query
+{
+  "script": {
+    "lang": "painless",
+    "inline": "ctx._source.last = ctx._source.last.replaceFirst(/[aeiou]/, m -> m.group().toUpperCase(Locale.ROOT))"
+  }
+}
+----------------------------------------------------------------
+// CONSOLE
+
 
 Note: all of the `_update_by_query` examples above could really do with a
 `query` to limit the data that they pull back. While you *could* use a
@@ -265,18 +318,18 @@ documents that they have to check.
 
 The following Java packages are available for use in the Painless language:
 
-* https://docs.oracle.com/javase/8/docs/api/java/lang/package-summary.html[java.lang] 
-* https://docs.oracle.com/javase/8/docs/api/java/math/package-summary.html[java.math] 
-* https://docs.oracle.com/javase/8/docs/api/java/text/package-summary.html[java.text] 
-* https://docs.oracle.com/javase/8/docs/api/java/time/package-summary.html[java.time] 
-* https://docs.oracle.com/javase/8/docs/api/java/time/chrono/package-summary.html[java.time.chrono] 
-* https://docs.oracle.com/javase/8/docs/api/java/time/format/package-summary.html[java.time.format] 
-* https://docs.oracle.com/javase/8/docs/api/java/time/temporal/package-summary.html[java.time.temporal] 
-* https://docs.oracle.com/javase/8/docs/api/java/time/zone/package-summary.html[java.time.zone] 
-* https://docs.oracle.com/javase/8/docs/api/java/util/package-summary.html[java.util] 
-* https://docs.oracle.com/javase/8/docs/api/java/util/function/package-summary.html[java.util.function] 
-* https://docs.oracle.com/javase/8/docs/api/java/util/regex/package-summary.html[java.util.regex] 
-* https://docs.oracle.com/javase/8/docs/api/java/util/stream/package-summary.html[java.util.stream] 
+* https://docs.oracle.com/javase/8/docs/api/java/lang/package-summary.html[java.lang]
+* https://docs.oracle.com/javase/8/docs/api/java/math/package-summary.html[java.math]
+* https://docs.oracle.com/javase/8/docs/api/java/text/package-summary.html[java.text]
+* https://docs.oracle.com/javase/8/docs/api/java/time/package-summary.html[java.time]
+* https://docs.oracle.com/javase/8/docs/api/java/time/chrono/package-summary.html[java.time.chrono]
+* https://docs.oracle.com/javase/8/docs/api/java/time/format/package-summary.html[java.time.format]
+* https://docs.oracle.com/javase/8/docs/api/java/time/temporal/package-summary.html[java.time.temporal]
+* https://docs.oracle.com/javase/8/docs/api/java/time/zone/package-summary.html[java.time.zone]
+* https://docs.oracle.com/javase/8/docs/api/java/util/package-summary.html[java.util]
+* https://docs.oracle.com/javase/8/docs/api/java/util/function/package-summary.html[java.util.function]
+* https://docs.oracle.com/javase/8/docs/api/java/util/regex/package-summary.html[java.util.regex]
+* https://docs.oracle.com/javase/8/docs/api/java/util/stream/package-summary.html[java.util.stream]
 
 Note that unsafe classes and methods are not included, there is no support for:
 

+ 44 - 0
modules/lang-painless/src/main/java/org/elasticsearch/painless/Augmentation.java

@@ -34,6 +34,7 @@ import java.util.function.ObjIntConsumer;
 import java.util.function.Predicate;
 import java.util.function.ToDoubleFunction;
 import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 
 /** Additional methods added to classes. These must be static methods with receiver as first argument */
 public class Augmentation {
@@ -442,4 +443,47 @@ public class Augmentation {
         }
         return map;
     }
+
+    // CharSequence augmentation
+    /**
+     * Replace all matches. Similar to {@link Matcher#replaceAll(String)} but allows you to customize the replacement based on the match.
+     */
+    public static String replaceAll(CharSequence receiver, Pattern pattern, Function<Matcher, String> replacementBuilder) {
+        Matcher m = pattern.matcher(receiver);
+        if (false == m.find()) {
+            // CharSequqence's toString is *supposed* to always return the characters in the sequence as a String
+            return receiver.toString();
+        }
+        StringBuffer result = new StringBuffer(initialBufferForReplaceWith(receiver));
+        do {
+            m.appendReplacement(result, Matcher.quoteReplacement(replacementBuilder.apply(m)));
+        } while (m.find());
+        m.appendTail(result);
+        return result.toString();
+    }
+
+    /**
+     * Replace the first match. Similar to {@link Matcher#replaceFirst(String)} but allows you to customize the replacement based on the
+     * match.
+     */
+    public static String replaceFirst(CharSequence receiver, Pattern pattern, Function<Matcher, String> replacementBuilder) {
+        Matcher m = pattern.matcher(receiver);
+        if (false == m.find()) {
+            // CharSequqence's toString is *supposed* to always return the characters in the sequence as a String
+            return receiver.toString();
+        }
+        StringBuffer result = new StringBuffer(initialBufferForReplaceWith(receiver));
+        m.appendReplacement(result, Matcher.quoteReplacement(replacementBuilder.apply(m)));
+        m.appendTail(result);
+        return result.toString();
+    }
+
+    /**
+     * The initial size of the {@link StringBuilder} used for {@link #replaceFirst(CharSequence, Pattern, Function)} and
+     * {@link #replaceAll(CharSequence, Pattern, Function)} for a particular sequence. We ape
+     * {{@link StringBuilder#StringBuilder(CharSequence)} here and add 16 extra chars to the buffer to have a little room for growth.
+     */
+    private static int initialBufferForReplaceWith(CharSequence seq) {
+        return seq.length() + 16;
+    }
 }

+ 2 - 0
modules/lang-painless/src/main/resources/org/elasticsearch/painless/java.lang.txt

@@ -36,6 +36,8 @@ class CharSequence -> java.lang.CharSequence {
   IntStream chars()
   IntStream codePoints()
   int length()
+  String replaceAll*(Pattern,Function)
+  String replaceFirst*(Pattern,Function)
   CharSequence subSequence(int,int)
   String toString()
 }

+ 56 - 0
modules/lang-painless/src/test/java/org/elasticsearch/painless/RegexTests.java

@@ -19,6 +19,7 @@
 
 package org.elasticsearch.painless;
 
+import java.nio.CharBuffer;
 import java.util.Arrays;
 import java.util.HashSet;
 import java.util.regex.Pattern;
@@ -175,6 +176,61 @@ public class RegexTests extends ScriptTestCase {
         assertEquals(Pattern.CANON_EQ | Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE | Pattern.COMMENTS, exec("/./ciux.flags()"));
     }
 
+    public void testReplaceAllMatchesString() {
+        assertEquals("thE qUIck brOwn fOx", exec("'the quick brown fox'.replaceAll(/[aeiou]/, m -> m.group().toUpperCase(Locale.ROOT))"));
+    }
+
+    public void testReplaceAllMatchesCharSequence() {
+        CharSequence charSequence = CharBuffer.wrap("the quick brown fox");
+        assertEquals("thE qUIck brOwn fOx",
+                exec("params.a.replaceAll(/[aeiou]/, m -> m.group().toUpperCase(Locale.ROOT))", singletonMap("a", charSequence)));
+    }
+
+    public void testReplaceAllNoMatchString() {
+        assertEquals("i am cat", exec("'i am cat'.replaceAll(/dolphin/, m -> m.group().toUpperCase(Locale.ROOT))"));
+    }
+
+    public void testReplaceAllNoMatchCharSequence() {
+        CharSequence charSequence = CharBuffer.wrap("i am cat");
+        assertEquals("i am cat",
+                exec("params.a.replaceAll(/dolphin/, m -> m.group().toUpperCase(Locale.ROOT))", singletonMap("a", charSequence)));
+    }
+
+    public void testReplaceAllQuoteReplacement() {
+        assertEquals("th/E q/U/Ick br/Own f/Ox",
+                exec("'the quick brown fox'.replaceAll(/[aeiou]/, m -> '/' + m.group().toUpperCase(Locale.ROOT))"));
+        assertEquals("th$E q$U$Ick br$Own f$Ox",
+                exec("'the quick brown fox'.replaceAll(/[aeiou]/, m -> '$' + m.group().toUpperCase(Locale.ROOT))"));
+    }
+
+    public void testReplaceFirstMatchesString() {
+        assertEquals("thE quick brown fox",
+                exec("'the quick brown fox'.replaceFirst(/[aeiou]/, m -> m.group().toUpperCase(Locale.ROOT))"));
+    }
+
+    public void testReplaceFirstMatchesCharSequence() {
+        CharSequence charSequence = CharBuffer.wrap("the quick brown fox");
+        assertEquals("thE quick brown fox",
+                exec("params.a.replaceFirst(/[aeiou]/, m -> m.group().toUpperCase(Locale.ROOT))", singletonMap("a", charSequence)));
+    }
+
+    public void testReplaceFirstNoMatchString() {
+        assertEquals("i am cat", exec("'i am cat'.replaceFirst(/dolphin/, m -> m.group().toUpperCase(Locale.ROOT))"));
+    }
+
+    public void testReplaceFirstNoMatchCharSequence() {
+        CharSequence charSequence = CharBuffer.wrap("i am cat");
+        assertEquals("i am cat",
+                exec("params.a.replaceFirst(/dolphin/, m -> m.group().toUpperCase(Locale.ROOT))", singletonMap("a", charSequence)));
+    }
+
+    public void testReplaceFirstQuoteReplacement() {
+        assertEquals("th/E quick brown fox",
+                exec("'the quick brown fox'.replaceFirst(/[aeiou]/, m -> '/' + m.group().toUpperCase(Locale.ROOT))"));
+        assertEquals("th$E quick brown fox",
+                exec("'the quick brown fox'.replaceFirst(/[aeiou]/, m -> '$' + m.group().toUpperCase(Locale.ROOT))"));
+    }
+
     public void testCantUsePatternCompile() {
         IllegalArgumentException e = expectScriptThrows(IllegalArgumentException.class, () -> {
             exec("Pattern.compile('aa')");