9 years ago · 1e16c22d03
--- a/modules/lang-painless/src/main/antlr/PainlessLexer.g4
+++ b/modules/lang-painless/src/main/antlr/PainlessLexer.g4
@@ -56,7 +56,7 @@ THIS:      'this';
 
				 BOOLNOT: '!';
			
 
				 BWNOT:   '~';
			
 
				 MUL:     '*';
			
 
				-DIV:     '/' { false == SlashStrategy.slashIsRegex(_factory) }?;
			
 
				+DIV:     '/' { false == SlashStrategy.slashIsRegex(this) }?;
			
 
				 REM:     '%';
			
 
				 ADD:     '+';
			
 
				 SUB:     '-';
			
@@ -104,7 +104,7 @@ INTEGER: ( '0' | [1-9] [0-9]* ) [lLfFdD]?;
 
				 DECIMAL: ( '0' | [1-9] [0-9]* ) (DOT [0-9]+)? ( [eE] [+\-]? [0-9]+ )? [fF]?;
			
 
				 
			
 
				 STRING: ( '"' ( '\\"' | '\\\\' | ~[\\"] )*? '"' ) | ( '\'' ( '\\\'' | '\\\\' | ~[\\"] )*? '\'' );
			
 
				-REGEX: '/' ( ~('/' | '\n') | '\\' ~'\n' )+ '/' [cilmsUux]* { SlashStrategy.slashIsRegex(_factory) }?;
			
 
				+REGEX: '/' ( ~('/' | '\n') | '\\' ~'\n' )+ '/' [cilmsUux]* { SlashStrategy.slashIsRegex(this) }?;
			
 
				 
			
 
				 TRUE:  'true';
			
 
				 FALSE: 'false';
			
--- a/modules/lang-painless/src/main/antlr/PainlessParser.g4
+++ b/modules/lang-painless/src/main/antlr/PainlessParser.g4
@@ -92,17 +92,6 @@ trap
 
				 delimiter
			
 
				     : SEMICOLON
			
 
				     | EOF
			
 
				-    // RBRACK is a delimiter but we don't consume it because it is only valid
			
 
				-    // in places where RBRACK can follow the statement. It is simpler to not
			
 
				-    // consume it here then it is to consume it here. Unfortunately, they
			
 
				-    // obvious syntax to do this `| { _input.LA(1) == RBRACK }?` generates an
			
 
				-    // amazingly intense `adaptivePredict` call that doesn't actually work
			
 
				-    // and builds a serious DFA. Huge. So instead we use standard ANTLR syntax
			
 
				-    // to consume the token and then undo the consumption. This looks hairy but
			
 
				-    // it is better than the alternatives.
			
 
				-    |   { int mark = _input.mark(); int index = _input.index(); }
			
 
				-            RBRACK
			
 
				-        { _input.seek(index); _input.release(mark); }
			
 
				     ;
			
 
				 
			
 
				 // Note we return the boolean s.  This is returned as true
			
--- a/modules/lang-painless/src/main/java/org/elasticsearch/painless/antlr/EnhancedPainlessLexer.java
+++ b/modules/lang-painless/src/main/java/org/elasticsearch/painless/antlr/EnhancedPainlessLexer.java
@@ -0,0 +1,96 @@
 
				+/*
			
 
				+ * Licensed to Elasticsearch under one or more contributor
			
 
				+ * license agreements. See the NOTICE file distributed with
			
 
				+ * this work for additional information regarding copyright
			
 
				+ * ownership. Elasticsearch licenses this file to you under
			
 
				+ * the Apache License, Version 2.0 (the "License"); you may
			
 
				+ * not use this file except in compliance with the License.
			
 
				+ * You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing,
			
 
				+ * software distributed under the License is distributed on an
			
 
				+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
			
 
				+ * KIND, either express or implied.  See the License for the
			
 
				+ * specific language governing permissions and limitations
			
 
				+ * under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.elasticsearch.painless.antlr;
			
 
				+
			
 
				+import org.antlr.v4.runtime.CharStream;
			
 
				+import org.antlr.v4.runtime.Lexer;
			
 
				+import org.antlr.v4.runtime.LexerNoViableAltException;
			
 
				+import org.antlr.v4.runtime.Token;
			
 
				+import org.antlr.v4.runtime.TokenSource;
			
 
				+import org.antlr.v4.runtime.misc.Interval;
			
 
				+import org.antlr.v4.runtime.misc.Pair;
			
 
				+import org.elasticsearch.painless.Location;
			
 
				+
			
 
				+/**
			
 
				+ * A lexer that is customized for painless. It will:
			
 
				+ * <ul>
			
 
				+ * <li>will override the default error behavior to fail on the first error
			
 
				+ * <li>store the last token in case we need to do lookbehind for semicolon insertion and regex vs division detection
			
 
				+ * <li>insert semicolons where they'd improve the language's readability. Rather than hack this into the parser and create a ton of
			
 
				+ * ambiguity we hack them here where we can use heuristics to do it quickly.
			
 
				+ * </ul>
			
 
				+ */
			
 
				+final class EnhancedPainlessLexer extends PainlessLexer {
			
 
				+    final String sourceName;
			
 
				+    private Token stashedNext = null;
			
 
				+    private Token previous = null;
			
 
				+
			
 
				+    EnhancedPainlessLexer(CharStream charStream, String sourceName) {
			
 
				+        super(charStream);
			
 
				+        this.sourceName = sourceName;
			
 
				+    }
			
 
				+
			
 
				+    public Token getPreviousToken() {
			
 
				+        return previous;
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    public Token nextToken() {
			
 
				+        if (stashedNext != null) {
			
 
				+            previous = stashedNext;
			
 
				+            stashedNext = null;
			
 
				+            return previous;
			
 
				+        }
			
 
				+        Token next = super.nextToken();
			
 
				+        if (insertSemicolon(previous, next)) {
			
 
				+            stashedNext = next;
			
 
				+            previous = _factory.create(new Pair<TokenSource, CharStream>(this, _input), PainlessLexer.SEMICOLON, ";",
			
 
				+                    Lexer.DEFAULT_TOKEN_CHANNEL, next.getStartIndex(), next.getStopIndex(), next.getLine(), next.getCharPositionInLine());
			
 
				+            return previous;
			
 
				+        } else {
			
 
				+            previous = next;
			
 
				+            return next;
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    public void recover(final LexerNoViableAltException lnvae) {
			
 
				+        final CharStream charStream = lnvae.getInputStream();
			
 
				+        final int startIndex = lnvae.getStartIndex();
			
 
				+        final String text = charStream.getText(Interval.of(startIndex, charStream.index()));
			
 
				+
			
 
				+        Location location = new Location(sourceName, _tokenStartCharIndex);
			
 
				+        throw location.createError(new IllegalArgumentException("unexpected character [" + getErrorDisplay(text) + "].", lnvae));
			
 
				+    }
			
 
				+
			
 
				+    private static boolean insertSemicolon(Token previous, Token next) {
			
 
				+        if (previous == null || next.getType() != PainlessLexer.RBRACK) {
			
 
				+            return false;
			
 
				+        }
			
 
				+        switch (previous.getType()) {
			
 
				+        case PainlessLexer.RBRACK:     // };} would be weird!
			
 
				+        case PainlessLexer.SEMICOLON:  // already have a semicolon, no need to add one
			
 
				+        case PainlessLexer.LBRACK:     // empty blocks don't need a semicolon
			
 
				+            return false;
			
 
				+        default:
			
 
				+            return true;
			
 
				+        }
			
 
				+    }
			
 
				+}
			
--- a/modules/lang-painless/src/main/java/org/elasticsearch/painless/antlr/PainlessLexer.java
+++ b/modules/lang-painless/src/main/java/org/elasticsearch/painless/antlr/PainlessLexer.java
@@ -140,14 +140,14 @@ class PainlessLexer extends Lexer {
 
				   private boolean DIV_sempred(RuleContext _localctx, int predIndex) {
			
 
				     switch (predIndex) {
			
 
				     case 0:
			
 
				-      return  false == SlashStrategy.slashIsRegex(_factory) ;
			
 
				+      return  false == SlashStrategy.slashIsRegex(this) ;
			
 
				     }
			
 
				     return true;
			
 
				   }
			
 
				   private boolean REGEX_sempred(RuleContext _localctx, int predIndex) {
			
 
				     switch (predIndex) {
			
 
				     case 1:
			
 
				-      return  SlashStrategy.slashIsRegex(_factory) ;
			
 
				+      return  SlashStrategy.slashIsRegex(this) ;
			
 
				     }
			
 
				     return true;
			
 
				   }
			
--- a/modules/lang-painless/src/main/java/org/elasticsearch/painless/antlr/PainlessParser.java
+++ b/modules/lang-painless/src/main/java/org/elasticsearch/painless/antlr/PainlessParser.java
--- a/modules/lang-painless/src/main/java/org/elasticsearch/painless/antlr/SlashStrategy.java
+++ b/modules/lang-painless/src/main/java/org/elasticsearch/painless/antlr/SlashStrategy.java
@@ -20,15 +20,14 @@
 
				 package org.elasticsearch.painless.antlr;
			
 
				 
			
 
				 import org.antlr.v4.runtime.Token;
			
 
				-import org.antlr.v4.runtime.TokenFactory;
			
 
				 
			
 
				 /**
			
 
				  * Utility to figure out if a {@code /} is division or the start of a regex literal.
			
 
				  */
			
 
				 public class SlashStrategy {
			
 
				-    public static boolean slashIsRegex(TokenFactory<?> factory) {
			
 
				-        StashingTokenFactory<?> stashingFactory = (StashingTokenFactory<?>) factory;
			
 
				-        Token lastToken = stashingFactory.getLastToken();
			
 
				+    public static boolean slashIsRegex(PainlessLexer lexer) {
			
 
				+        EnhancedPainlessLexer realLexer = (EnhancedPainlessLexer) lexer;
			
 
				+        Token lastToken = realLexer.getPreviousToken();
			
 
				         if (lastToken == null) {
			
 
				             return true;
			
 
				         }
			
--- a/modules/lang-painless/src/main/java/org/elasticsearch/painless/antlr/Walker.java
+++ b/modules/lang-painless/src/main/java/org/elasticsearch/painless/antlr/Walker.java
@@ -183,7 +183,7 @@ public final class Walker extends PainlessParserBaseVisitor<Object> {
 
				 
			
 
				     private SourceContext buildAntlrTree(String source) {
			
 
				         ANTLRInputStream stream = new ANTLRInputStream(source);
			
 
				-        PainlessLexer lexer = new ErrorHandlingLexer(stream, sourceName);
			
 
				+        PainlessLexer lexer = new EnhancedPainlessLexer(stream, sourceName);
			
 
				         PainlessParser parser = new PainlessParser(new CommonTokenStream(lexer));
			
 
				         ParserErrorStrategy strategy = new ParserErrorStrategy(sourceName);
			
 
				 
			
--- a/modules/lang-painless/src/test/java/org/elasticsearch/painless/LambdaTests.java
+++ b/modules/lang-painless/src/test/java/org/elasticsearch/painless/LambdaTests.java
@@ -79,7 +79,11 @@ public class LambdaTests extends ScriptTestCase {
 
				     }
			
 
				 
			
 
				     public void testMultipleStatements() {
			
 
				-        assertEquals(2, exec("int applyOne(IntFunction arg) { arg.apply(1) } applyOne(x -> { x = x + 1; return x;})"));
			
 
				+        assertEquals(2, exec("int applyOne(IntFunction arg) { arg.apply(1) } applyOne(x -> { x = x + 1; return x })"));
			
 
				+    }
			
 
				+
			
 
				+    public void testUnneededCurlyStatements() {
			
 
				+        assertEquals(2, exec("int applyOne(IntFunction arg) { arg.apply(1) } applyOne(x -> { x + 1 })"));
			
 
				     }
			
 
				 
			
 
				     public void testTwoLambdas() {
			
--- a/modules/lang-painless/src/test/java/org/elasticsearch/painless/RegexTests.java
+++ b/modules/lang-painless/src/test/java/org/elasticsearch/painless/RegexTests.java
@@ -201,6 +201,6 @@ public class RegexTests extends ScriptTestCase {
 
				         IllegalArgumentException e = expectScriptThrows(IllegalArgumentException.class, () -> {
			
 
				             exec("/asdf/b", emptyMap(), emptyMap(), null); // Not picky so we get a non-assertion error
			
 
				         });
			
 
				-        assertEquals("invalid sequence of tokens near ['b'].", e.getMessage());
			
 
				+        assertEquals("unexpected token ['b'] was expecting one of [{<EOF>, ';'}].", e.getMessage());
			
 
				     }
			
 
				 }
			
--- a/modules/lang-painless/src/test/java/org/elasticsearch/painless/antlr/ParserTests.java
+++ b/modules/lang-painless/src/test/java/org/elasticsearch/painless/antlr/ParserTests.java
@@ -40,7 +40,7 @@ public class ParserTests extends ScriptTestCase {
 
				 
			
 
				     private SourceContext buildAntlrTree(String source) {
			
 
				         ANTLRInputStream stream = new ANTLRInputStream(source);
			
 
				-        PainlessLexer lexer = new ErrorHandlingLexer(stream, "testing");
			
 
				+        PainlessLexer lexer = new EnhancedPainlessLexer(stream, "testing");
			
 
				         PainlessParser parser = new PainlessParser(new CommonTokenStream(lexer));
			
 
				         ParserErrorStrategy strategy = new ParserErrorStrategy("testing");