Browse Source

Mappings: Add transform to document before index.

Closes #6566
Nik Everett 11 years ago
parent
commit
da5fb34163

+ 1 - 0
docs/reference/mapping.asciidoc

@@ -75,3 +75,4 @@ include::mapping/conf-mappings.asciidoc[]
 
 include::mapping/meta.asciidoc[]
 
+include::mapping/transform.asciidoc[]

+ 61 - 0
docs/reference/mapping/transform.asciidoc

@@ -0,0 +1,61 @@
+[[mapping-transform]]
+== Transform
+coming[1.3.0]
+
+The document can be transformed before it is indexed by registering a
+script in the `transform` element of the mapping.  The result of the
+transform is indexed but the original source is stored in the `_source`
+field.  Example:
+
+[source,js]
+--------------------------------------------------
+{
+    "example" : {
+        "transform" : {
+            "script" : "if (ctx._source['title']?.startsWith('t')) ctx._source['suggest'] = ctx._source['content']",
+            "params" : {
+                "variable" : "not used but an example anyway"
+            },
+            "lang": "groovy"
+        },
+        "properties": {
+           "title": { "type": "string" },
+           "content": { "type": "string" },
+           "suggest": { "type": "string" }
+        }
+    }
+}
+--------------------------------------------------
+
+Its also possible to specify multiple transforms:
+[source,js]
+--------------------------------------------------
+{
+    "example" : {
+        "transform" : [
+            {"script": "ctx._source['suggest'] = ctx._source['content']"}
+            {"script": "ctx._source['foo'] = ctx._source['bar'];"}
+        ]
+    }
+}
+--------------------------------------------------
+
+Because the result isn't stored in the source it can't normally be fetched by
+source filtering.  It can be highlighted if it is marked as stored.
+
+=== Get Transformed
+The get endpoint will retransform the source if the `_source_transform`
+parameter is set.  Example:
+
+[source,bash]
+--------------------------------------------------
+curl -XGET "http://localhost:9200/test/example/3?pretty&_source_transform"
+--------------------------------------------------
+
+The transform is performed before any source filtering but it is mostly
+designed to make it easy to see what was passed to the index for debugging.
+
+=== Immutable Transformation
+Once configured the transform script cannot be modified.  This is not
+because that is technically impossible but instead because madness lies
+down that road.

+ 20 - 5
src/main/java/org/elasticsearch/action/get/GetRequestBuilder.java

@@ -23,8 +23,8 @@ import org.elasticsearch.action.ActionListener;
 import org.elasticsearch.action.support.single.shard.SingleShardOperationRequestBuilder;
 import org.elasticsearch.client.Client;
 import org.elasticsearch.common.Nullable;
-import org.elasticsearch.index.VersionType;
 import org.elasticsearch.common.Strings;
+import org.elasticsearch.index.VersionType;
 import org.elasticsearch.search.fetch.source.FetchSourceContext;
 
 /**
@@ -95,10 +95,8 @@ public class GetRequestBuilder extends SingleShardOperationRequestBuilder<GetReq
     }
 
     /**
-     * Indicates whether the response should contain the stored _source
-     *
-     * @param fetch
-     * @return
+     * Indicates whether the response should contain the stored _source.
+     * @return this for chaining
      */
     public GetRequestBuilder setFetchSource(boolean fetch) {
         FetchSourceContext context = request.fetchSourceContext();
@@ -111,6 +109,23 @@ public class GetRequestBuilder extends SingleShardOperationRequestBuilder<GetReq
         return this;
     }
 
+    /**
+     * Should the source be transformed using the script to used at index time
+     * (if any)? Note that calling this without having called setFetchSource
+     * will automatically turn on source fetching.
+     *
+     * @return this for chaining
+     */
+    public GetRequestBuilder setTransformSource(boolean transform) {
+        FetchSourceContext context = request.fetchSourceContext();
+        if (context == null) {
+            context = new FetchSourceContext(true);
+            request.fetchSourceContext(context);
+        }
+        context.transformSource(transform);
+        return this;
+    }
+
     /**
      * Indicate that _source should be returned, with an "include" and/or "exclude" set which can include simple wildcard
      * elements.

+ 1 - 1
src/main/java/org/elasticsearch/cluster/metadata/MappingMetaData.java

@@ -43,7 +43,7 @@ import java.util.Map;
 import static org.elasticsearch.common.xcontent.support.XContentMapValues.nodeBooleanValue;
 
 /**
- *
+ * Mapping configuration for a type.
  */
 public class MappingMetaData {
 

+ 22 - 22
src/main/java/org/elasticsearch/index/get/ShardGetService.java

@@ -50,10 +50,8 @@ import org.elasticsearch.index.shard.ShardId;
 import org.elasticsearch.index.shard.service.IndexShard;
 import org.elasticsearch.index.translog.Translog;
 import org.elasticsearch.script.ScriptService;
-import org.elasticsearch.script.SearchScript;
 import org.elasticsearch.search.fetch.source.FetchSourceContext;
 import org.elasticsearch.search.lookup.SearchLookup;
-import org.elasticsearch.search.lookup.SourceLookup;
 
 import java.io.IOException;
 import java.util.HashMap;
@@ -218,7 +216,6 @@ public class ShardGetService extends AbstractIndexShardComponent {
 
                 // we can only load scripts that can run against the source
                 if (gFields != null && gFields.length > 0) {
-                    Map<String, Object> sourceAsMap = null;
                     for (String field : gFields) {
                         if (SourceFieldMapper.NAME.equals(field)) {
                             // dealt with when normalizing fetchSourceContext.
@@ -284,25 +281,24 @@ public class ShardGetService extends AbstractIndexShardComponent {
                     // We must first apply the field mapper filtering to make sure we get correct results
                     // in the case that the fetchSourceContext white lists something that's not included by the field mapper
 
-                    Map<String, Object> filteredSource = null;
-                    XContentType sourceContentType = null;
-                    if (sourceFieldMapper.includes().length > 0 || sourceFieldMapper.excludes().length > 0) {
+                    boolean sourceFieldFiltering = sourceFieldMapper.includes().length > 0 || sourceFieldMapper.excludes().length > 0;
+                    boolean sourceFetchFiltering = fetchSourceContext.includes().length > 0 || fetchSourceContext.excludes().length > 0;
+                    if (fetchSourceContext.transformSource() || sourceFieldFiltering || sourceFetchFiltering) {
                         // TODO: The source might parsed and available in the sourceLookup but that one uses unordered maps so different. Do we care?
                         Tuple<XContentType, Map<String, Object>> typeMapTuple = XContentHelper.convertToMap(source.source, true);
-                        sourceContentType = typeMapTuple.v1();
-                        filteredSource = XContentMapValues.filter(typeMapTuple.v2(), sourceFieldMapper.includes(), sourceFieldMapper.excludes());
-                    }
-                    if (fetchSourceContext.includes().length > 0 || fetchSourceContext.excludes().length > 0) {
-                        if (filteredSource == null) {
-                            Tuple<XContentType, Map<String, Object>> typeMapTuple = XContentHelper.convertToMap(source.source, true);
-                            sourceContentType = typeMapTuple.v1();
-                            filteredSource = typeMapTuple.v2();
+                        XContentType sourceContentType = typeMapTuple.v1();
+                        Map<String, Object> sourceAsMap = typeMapTuple.v2();
+                        if (fetchSourceContext.transformSource()) {
+                            sourceAsMap = docMapper.transformSourceAsMap(sourceAsMap);
+                        }
+                        if (sourceFieldFiltering) {
+                            sourceAsMap = XContentMapValues.filter(sourceAsMap, sourceFieldMapper.includes(), sourceFieldMapper.excludes());
+                        }
+                        if (sourceFetchFiltering) {
+                            sourceAsMap = XContentMapValues.filter(sourceAsMap, fetchSourceContext.includes(), fetchSourceContext.excludes());
                         }
-                        filteredSource = XContentMapValues.filter(filteredSource, fetchSourceContext.includes(), fetchSourceContext.excludes());
-                    }
-                    if (filteredSource != null) {
                         try {
-                            sourceToBeReturned = XContentFactory.contentBuilder(sourceContentType).map(filteredSource).bytes();
+                            sourceToBeReturned = XContentFactory.contentBuilder(sourceContentType).map(sourceAsMap).bytes();
                         } catch (IOException e) {
                             throw new ElasticsearchException("Failed to get type [" + type + "] and id [" + id + "] with includes/excludes set", e);
                         }
@@ -381,15 +377,19 @@ public class ShardGetService extends AbstractIndexShardComponent {
 
         if (!fetchSourceContext.fetchSource()) {
             source = null;
-        } else if (fetchSourceContext.includes().length > 0 || fetchSourceContext.excludes().length > 0) {
-            Map<String, Object> filteredSource;
+        } else if (fetchSourceContext.transformSource() || fetchSourceContext.includes().length > 0 || fetchSourceContext.excludes().length > 0) {
+            Map<String, Object> sourceAsMap;
             XContentType sourceContentType = null;
             // TODO: The source might parsed and available in the sourceLookup but that one uses unordered maps so different. Do we care?
             Tuple<XContentType, Map<String, Object>> typeMapTuple = XContentHelper.convertToMap(source, true);
             sourceContentType = typeMapTuple.v1();
-            filteredSource = XContentMapValues.filter(typeMapTuple.v2(), fetchSourceContext.includes(), fetchSourceContext.excludes());
+            sourceAsMap = typeMapTuple.v2();
+            if (fetchSourceContext.transformSource()) {
+                sourceAsMap = docMapper.transformSourceAsMap(sourceAsMap);
+            }
+            sourceAsMap = XContentMapValues.filter(sourceAsMap, fetchSourceContext.includes(), fetchSourceContext.excludes());
             try {
-                source = XContentFactory.contentBuilder(sourceContentType).map(filteredSource).bytes();
+                source = XContentFactory.contentBuilder(sourceContentType).map(sourceAsMap).bytes();
             } catch (IOException e) {
                 throw new ElasticsearchException("Failed to get type [" + type + "] and id [" + id + "] with includes/excludes set", e);
             }

+ 128 - 3
src/main/java/org/elasticsearch/index/mapper/DocumentMapper.java

@@ -29,6 +29,7 @@ import org.apache.lucene.index.IndexableField;
 import org.apache.lucene.search.Filter;
 import org.apache.lucene.util.CloseableThreadLocal;
 import org.elasticsearch.ElasticsearchGenerationException;
+import org.elasticsearch.ElasticsearchIllegalArgumentException;
 import org.elasticsearch.common.Booleans;
 import org.elasticsearch.common.Nullable;
 import org.elasticsearch.common.Preconditions;
@@ -41,10 +42,14 @@ import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.common.text.StringAndBytesText;
 import org.elasticsearch.common.text.Text;
 import org.elasticsearch.common.xcontent.*;
+import org.elasticsearch.common.xcontent.smile.SmileXContent;
 import org.elasticsearch.index.analysis.NamedAnalyzer;
 import org.elasticsearch.index.mapper.internal.*;
 import org.elasticsearch.index.mapper.object.ObjectMapper;
 import org.elasticsearch.index.mapper.object.RootObjectMapper;
+import org.elasticsearch.script.ExecutableScript;
+import org.elasticsearch.script.ScriptService;
+import org.elasticsearch.script.ScriptService.ScriptType;
 
 import java.io.IOException;
 import java.util.*;
@@ -139,6 +144,8 @@ public class DocumentMapper implements ToXContent {
 
         private NamedAnalyzer searchQuoteAnalyzer;
 
+        private List<SourceTransform> sourceTransforms;
+
         private final String index;
 
         @Nullable
@@ -225,11 +232,18 @@ public class DocumentMapper implements ToXContent {
             return searchQuoteAnalyzer != null;
         }
 
+        public Builder transform(ScriptService scriptService, String script, ScriptType scriptType, String language, Map<String, Object> parameters) {
+            if (sourceTransforms == null) {
+                sourceTransforms = new ArrayList<>();
+            }
+            sourceTransforms.add(new ScriptTransform(scriptService, script, scriptType, language, parameters));
+            return this;
+        }
+
         public DocumentMapper build(DocumentMapperParser docMapperParser) {
             Preconditions.checkNotNull(rootObjectMapper, "Mapper builder must have the root object mapper set");
             return new DocumentMapper(index, indexSettings, docMapperParser, rootObjectMapper, meta,
-                    indexAnalyzer, searchAnalyzer, searchQuoteAnalyzer,
-                    rootMappers);
+                    indexAnalyzer, searchAnalyzer, searchQuoteAnalyzer, rootMappers, sourceTransforms);
         }
     }
 
@@ -281,11 +295,13 @@ public class DocumentMapper implements ToXContent {
 
     private final Object mappersMutex = new Object();
 
+    private final List<SourceTransform> sourceTransforms;
+
     public DocumentMapper(String index, @Nullable Settings indexSettings, DocumentMapperParser docMapperParser,
                           RootObjectMapper rootObjectMapper,
                           ImmutableMap<String, Object> meta,
                           NamedAnalyzer indexAnalyzer, NamedAnalyzer searchAnalyzer, NamedAnalyzer searchQuoteAnalyzer,
-                          Map<Class<? extends RootMapper>, RootMapper> rootMappers) {
+                          Map<Class<? extends RootMapper>, RootMapper> rootMappers, List<SourceTransform> sourceTransforms) {
         this.index = index;
         this.indexSettings = indexSettings;
         this.type = rootObjectMapper.name();
@@ -293,6 +309,7 @@ public class DocumentMapper implements ToXContent {
         this.docMapperParser = docMapperParser;
         this.meta = meta;
         this.rootObjectMapper = rootObjectMapper;
+        this.sourceTransforms = sourceTransforms;
 
         this.rootMappers = ImmutableMap.copyOf(rootMappers);
         this.rootMappersOrdered = rootMappers.values().toArray(new RootMapper[rootMappers.values().size()]);
@@ -479,6 +496,9 @@ public class DocumentMapper implements ToXContent {
             if (parser == null) {
                 parser = XContentHelper.createParser(source.source());
             }
+            if (sourceTransforms != null) {
+                parser = transform(parser);
+            }
             context.reset(parser, new ParseContext.Document(), source, listener);
 
             // will result in START_OBJECT
@@ -566,6 +586,28 @@ public class DocumentMapper implements ToXContent {
         return doc;
     }
 
+    /**
+     * Transform the source when it is expressed as a map.  This is public so it can be transformed the source is loaded.
+     * @param sourceAsMap source to transform.  This may be mutated by the script.
+     * @return transformed version of transformMe.  This may actually be the same object as sourceAsMap
+     */
+    public Map<String, Object> transformSourceAsMap(Map<String, Object> sourceAsMap) {
+        if (sourceTransforms == null) {
+            return sourceAsMap;
+        }
+        for (SourceTransform transform : sourceTransforms) {
+            sourceAsMap = transform.transformSourceAsMap(sourceAsMap);
+        }
+        return sourceAsMap;
+    }
+
+    private XContentParser transform(XContentParser parser) throws IOException {
+        Map<String, Object> transformed = transformSourceAsMap(parser.mapOrderedAndClose());
+        // TODO it'd be nice to have a MapXContent or something that could spit out the parser for this map
+        XContentBuilder builder = XContentFactory.smileBuilder().value(transformed);
+        return SmileXContent.smileXContent.createParser(builder.bytes());
+    }
+
     public void addFieldMappers(List<FieldMapper> fieldMappers) {
         synchronized (mappersMutex) {
             this.fieldMappers.addNewMappers(fieldMappers);
@@ -691,6 +733,18 @@ public class DocumentMapper implements ToXContent {
                         }
                     }
                 }
+                if (sourceTransforms != null) {
+                    if (sourceTransforms.size() == 1) {
+                        builder.field("transform");
+                        sourceTransforms.get(0).toXContent(builder, params);
+                    } else {
+                        builder.startArray("transform");
+                        for (SourceTransform transform: sourceTransforms) {
+                            transform.toXContent(builder, params);
+                        }
+                        builder.endArray();
+                    }
+                }
 
                 if (meta != null && !meta.isEmpty()) {
                     builder.field("_meta", meta());
@@ -702,4 +756,75 @@ public class DocumentMapper implements ToXContent {
         }, rootMappersNotIncludedInObject);
         return builder;
     }
+
+    /**
+     * Transformations to be applied to the source before indexing and/or after loading.
+     */
+    private interface SourceTransform extends ToXContent {
+        /**
+         * Transform the source when it is expressed as a map.  This is public so it can be transformed the source is loaded.
+         * @param sourceAsMap source to transform.  This may be mutated by the script.
+         * @return transformed version of transformMe.  This may actually be the same object as sourceAsMap
+         */
+        Map<String, Object> transformSourceAsMap(Map<String, Object> sourceAsMap);
+    }
+
+    /**
+     * Script based source transformation.
+     */
+    private static class ScriptTransform implements SourceTransform {
+        private final ScriptService scriptService;
+        /**
+         * Contents of the script to transform the source document before indexing.
+         */
+        private final String script;
+        /**
+         * The type of the script to run.
+         */
+        private final ScriptType scriptType;
+        /**
+         * Language of the script to transform the source document before indexing.
+         */
+        private final String language;
+        /**
+         * Parameters passed to the transform script.
+         */
+        private final Map<String, Object> parameters;
+
+        public ScriptTransform(ScriptService scriptService, String script, ScriptType scriptType, String language, Map<String, Object> parameters) {
+            this.scriptService = scriptService;
+            this.script = script;
+            this.scriptType = scriptType;
+            this.language = language;
+            this.parameters = parameters;
+        }
+
+        @SuppressWarnings("unchecked")
+        public Map<String, Object> transformSourceAsMap(Map<String, Object> sourceAsMap) {
+            try {
+                // We use the ctx variable and the _source name to be consistent with the update api.
+                ExecutableScript executable = scriptService.executable(language, script, scriptType, parameters);
+                Map<String, Object> ctx = new HashMap<>(1);
+                ctx.put("_source", sourceAsMap);
+                executable.setNextVar("ctx", ctx);
+                executable.run();
+                ctx = (Map<String, Object>) executable.unwrap(ctx);
+                return (Map<String, Object>) ctx.get("_source");
+            } catch (Exception e) {
+                throw new ElasticsearchIllegalArgumentException("failed to execute script", e);
+            }
+        }
+
+        @Override
+        public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
+            builder.startObject();
+            builder.field("script", script);
+            builder.field("lang", language);
+            if (parameters != null) {
+                builder.field("params", parameters);
+            }
+            builder.endObject();
+            return builder;
+        }
+    }
 }

+ 51 - 7
src/main/java/org/elasticsearch/index/mapper/DocumentMapperParser.java

@@ -46,8 +46,11 @@ import org.elasticsearch.index.mapper.object.ObjectMapper;
 import org.elasticsearch.index.mapper.object.RootObjectMapper;
 import org.elasticsearch.index.settings.IndexSettings;
 import org.elasticsearch.index.similarity.SimilarityLookupService;
+import org.elasticsearch.script.ScriptService;
+import org.elasticsearch.script.ScriptService.ScriptType;
 
 import java.util.Iterator;
+import java.util.List;
 import java.util.Map;
 
 import static org.elasticsearch.index.mapper.MapperBuilders.doc;
@@ -61,6 +64,7 @@ public class DocumentMapperParser extends AbstractIndexComponent {
     private final PostingsFormatService postingsFormatService;
     private final DocValuesFormatService docValuesFormatService;
     private final SimilarityLookupService similarityLookupService;
+    private final ScriptService scriptService;
 
     private final RootObjectMapper.TypeParser rootObjectTypeParser = new RootObjectMapper.TypeParser();
 
@@ -72,12 +76,13 @@ public class DocumentMapperParser extends AbstractIndexComponent {
 
     public DocumentMapperParser(Index index, @IndexSettings Settings indexSettings, AnalysisService analysisService,
                                 PostingsFormatService postingsFormatService, DocValuesFormatService docValuesFormatService,
-                                SimilarityLookupService similarityLookupService) {
+                                SimilarityLookupService similarityLookupService, ScriptService scriptService) {
         super(index, indexSettings);
         this.analysisService = analysisService;
         this.postingsFormatService = postingsFormatService;
         this.docValuesFormatService = docValuesFormatService;
         this.similarityLookupService = similarityLookupService;
+        this.scriptService = scriptService;
         MapBuilder<String, Mapper.TypeParser> typeParsersBuilder = new MapBuilder<String, Mapper.TypeParser>()
                 .put(ByteFieldMapper.CONTENT_TYPE, new ByteFieldMapper.TypeParser())
                 .put(ShortFieldMapper.CONTENT_TYPE, new ShortFieldMapper.TypeParser())
@@ -238,6 +243,20 @@ public class DocumentMapperParser extends AbstractIndexComponent {
                 }
                 docBuilder.indexAnalyzer(analyzer);
                 docBuilder.searchAnalyzer(analyzer);
+            } else if ("transform".equals(fieldName)) {
+                iterator.remove();
+                if (fieldNode instanceof Map) {
+                    parseTransform(docBuilder, (Map<String, Object>) fieldNode);
+                } else if (fieldNode instanceof List) {
+                    for (Object transformItem: (List)fieldNode) {
+                        if (!(transformItem instanceof Map)) {
+                            throw new MapperParsingException("Elements of transform list must be objects but one was:  " + fieldNode);
+                        }
+                        parseTransform(docBuilder, (Map<String, Object>) transformItem);
+                    }
+                } else {
+                    throw new MapperParsingException("Transform must be an object or an array but was:  " + fieldNode);
+                }
             } else {
                 Mapper.TypeParser typeParser = rootTypeParsers.get(fieldName);
                 if (typeParser != null) {
@@ -254,11 +273,7 @@ public class DocumentMapperParser extends AbstractIndexComponent {
         docBuilder.meta(attributes);
 
         if (!mapping.isEmpty()) {
-            StringBuilder remainingFields = new StringBuilder();
-            for (String key : mapping.keySet()) {
-                remainingFields.append(" [").append(key).append(" : ").append(mapping.get(key).toString()).append("]");
-            }
-            throw new MapperParsingException("Root type mapping not empty after parsing! Remaining fields:" + remainingFields.toString());
+            throw new MapperParsingException("Root type mapping not empty after parsing! Remaining fields:  " + getRemainingFields(mapping));
         }
         if (!docBuilder.hasIndexAnalyzer()) {
             docBuilder.indexAnalyzer(analysisService.defaultIndexAnalyzer());
@@ -276,7 +291,36 @@ public class DocumentMapperParser extends AbstractIndexComponent {
         return documentMapper;
     }
 
-    @SuppressWarnings({"unchecked"})
+    private String getRemainingFields(Map<String, ?> map) {
+        StringBuilder remainingFields = new StringBuilder();
+        for (String key : map.keySet()) {
+            remainingFields.append(" [").append(key).append(" : ").append(map.get(key).toString()).append("]");
+        }
+        return remainingFields.toString();
+    }
+
+    @SuppressWarnings("unchecked")
+    private void parseTransform(DocumentMapper.Builder docBuilder, Map<String, Object> transformConfig) {
+        String script = (String) transformConfig.remove("script_file");
+        ScriptType scriptType = ScriptType.FILE;
+        if (script == null) {
+            script = (String) transformConfig.remove("script_id");
+            scriptType = ScriptType.INDEXED;
+        }
+        if (script == null) {
+            script = (String) transformConfig.remove("script");
+            scriptType = ScriptType.INLINE;
+        }
+        if (script != null) {
+            String scriptLang = (String) transformConfig.remove("lang");
+            Map<String, Object> params = (Map<String, Object>)transformConfig.remove("params");
+            docBuilder.transform(scriptService, script, scriptType, scriptLang, params);
+        }
+        if (!transformConfig.isEmpty()) {
+            throw new MapperParsingException("Unrecognized parameter in transform config:  " + getRemainingFields(transformConfig));
+        }
+    }
+
     private Tuple<String, Map<String, Object>> extractMapping(String type, String source) throws MapperParsingException {
         Map<String, Object> root;
         try {

+ 3 - 2
src/main/java/org/elasticsearch/index/mapper/MapperService.java

@@ -127,12 +127,13 @@ public class MapperService extends AbstractIndexComponent  {
 
     @Inject
     public MapperService(Index index, @IndexSettings Settings indexSettings, Environment environment, AnalysisService analysisService, IndexFieldDataService fieldDataService,
-                         PostingsFormatService postingsFormatService, DocValuesFormatService docValuesFormatService, SimilarityLookupService similarityLookupService) {
+                         PostingsFormatService postingsFormatService, DocValuesFormatService docValuesFormatService, SimilarityLookupService similarityLookupService,
+                         ScriptService scriptService) {
         super(index, indexSettings);
         this.analysisService = analysisService;
         this.fieldDataService = fieldDataService;
         this.fieldMappers = new FieldMappersLookup(indexSettings);
-        this.documentParser = new DocumentMapperParser(index, indexSettings, analysisService, postingsFormatService, docValuesFormatService, similarityLookupService);
+        this.documentParser = new DocumentMapperParser(index, indexSettings, analysisService, postingsFormatService, docValuesFormatService, similarityLookupService, scriptService);
         this.searchAnalyzer = new SmartIndexNameSearchAnalyzer(analysisService.defaultSearchAnalyzer());
         this.searchQuoteAnalyzer = new SmartIndexNameSearchQuoteAnalyzer(analysisService.defaultSearchQuoteAnalyzer());
 

+ 35 - 7
src/main/java/org/elasticsearch/search/fetch/source/FetchSourceContext.java

@@ -19,6 +19,7 @@
 
 package org.elasticsearch.search.fetch.source;
 
+import org.elasticsearch.Version;
 import org.elasticsearch.common.Booleans;
 import org.elasticsearch.common.Strings;
 import org.elasticsearch.common.io.stream.StreamInput;
@@ -36,6 +37,7 @@ public class FetchSourceContext implements Streamable {
     public static final FetchSourceContext FETCH_SOURCE = new FetchSourceContext(true);
     public static final FetchSourceContext DO_NOT_FETCH_SOURCE = new FetchSourceContext(false);
     private boolean fetchSource;
+    private boolean transformSource;
     private String[] includes;
     private String[] excludes;
 
@@ -45,7 +47,7 @@ public class FetchSourceContext implements Streamable {
     }
 
     public FetchSourceContext(boolean fetchSource) {
-        this(fetchSource, Strings.EMPTY_ARRAY, Strings.EMPTY_ARRAY);
+        this(fetchSource, Strings.EMPTY_ARRAY, Strings.EMPTY_ARRAY, false);
     }
 
     public FetchSourceContext(String include) {
@@ -55,21 +57,23 @@ public class FetchSourceContext implements Streamable {
     public FetchSourceContext(String include, String exclude) {
         this(true,
                 include == null ? Strings.EMPTY_ARRAY : new String[]{include},
-                exclude == null ? Strings.EMPTY_ARRAY : new String[]{exclude});
+                exclude == null ? Strings.EMPTY_ARRAY : new String[]{exclude},
+                false);
     }
 
     public FetchSourceContext(String[] includes) {
-        this(true, includes, Strings.EMPTY_ARRAY);
+        this(true, includes, Strings.EMPTY_ARRAY, false);
     }
 
     public FetchSourceContext(String[] includes, String[] excludes) {
-        this(true, includes, excludes);
+        this(true, includes, excludes, false);
     }
 
-    public FetchSourceContext(boolean fetchSource, String[] includes, String[] excludes) {
+    public FetchSourceContext(boolean fetchSource, String[] includes, String[] excludes, boolean transform) {
         this.fetchSource = fetchSource;
         this.includes = includes == null ? Strings.EMPTY_ARRAY : includes;
         this.excludes = excludes == null ? Strings.EMPTY_ARRAY : excludes;
+        this.transformSource = transform;
     }
 
     public boolean fetchSource() {
@@ -81,6 +85,22 @@ public class FetchSourceContext implements Streamable {
         return this;
     }
 
+    /**
+     * Should the document be transformed after the source is loaded?
+     */
+    public boolean transformSource() {
+        return this.transformSource;
+    }
+
+    /**
+     * Should the document be transformed after the source is loaded?
+     * @return this for chaining
+     */
+    public FetchSourceContext transformSource(boolean transformSource) {
+        this.transformSource = transformSource;
+        return this;
+    }
+
     public String[] includes() {
         return this.includes;
     }
@@ -144,8 +164,10 @@ public class FetchSourceContext implements Streamable {
             source_excludes = Strings.splitStringByCommaToArray(sExcludes);
         }
 
-        if (fetchSource != null || source_includes != null || source_excludes != null) {
-            return new FetchSourceContext(fetchSource == null ? true : fetchSource, source_includes, source_excludes);
+        boolean transform = request.paramAsBoolean("_source_transform", false);
+
+        if (fetchSource != null || source_includes != null || source_excludes != null || transform) {
+            return new FetchSourceContext(fetchSource == null ? true : fetchSource, source_includes, source_excludes, transform);
         }
         return null;
     }
@@ -155,6 +177,9 @@ public class FetchSourceContext implements Streamable {
         fetchSource = in.readBoolean();
         includes = in.readStringArray();
         excludes = in.readStringArray();
+        if (in.getVersion().onOrAfter(Version.V_1_3_0)) {
+            transformSource = in.readBoolean();
+        }
     }
 
     @Override
@@ -162,6 +187,9 @@ public class FetchSourceContext implements Streamable {
         out.writeBoolean(fetchSource);
         out.writeStringArray(includes);
         out.writeStringArray(excludes);
+        if (out.getVersion().onOrAfter(Version.V_1_3_0)) {
+            out.writeBoolean(transformSource);
+        }
     }
 
     @Override

+ 10 - 29
src/test/java/org/elasticsearch/index/codec/CodecTests.java

@@ -34,28 +34,19 @@ import org.apache.lucene.codecs.memory.MemoryPostingsFormat;
 import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
 import org.apache.lucene.codecs.pulsing.Pulsing41PostingsFormat;
 import org.apache.lucene.codecs.simpletext.SimpleTextCodec;
-import org.elasticsearch.common.inject.AbstractModule;
-import org.elasticsearch.common.inject.Injector;
-import org.elasticsearch.common.inject.ModulesBuilder;
 import org.elasticsearch.common.settings.ImmutableSettings;
 import org.elasticsearch.common.settings.Settings;
-import org.elasticsearch.common.settings.SettingsModule;
 import org.elasticsearch.common.xcontent.XContentFactory;
-import org.elasticsearch.index.Index;
-import org.elasticsearch.index.IndexNameModule;
-import org.elasticsearch.index.analysis.AnalysisModule;
 import org.elasticsearch.index.codec.docvaluesformat.*;
 import org.elasticsearch.index.codec.postingsformat.*;
 import org.elasticsearch.index.mapper.DocumentMapper;
-import org.elasticsearch.index.mapper.MapperServiceModule;
 import org.elasticsearch.index.mapper.internal.IdFieldMapper;
 import org.elasticsearch.index.mapper.internal.UidFieldMapper;
 import org.elasticsearch.index.mapper.internal.VersionFieldMapper;
-import org.elasticsearch.index.settings.IndexSettingsModule;
-import org.elasticsearch.index.similarity.SimilarityModule;
-import org.elasticsearch.indices.fielddata.breaker.CircuitBreakerService;
-import org.elasticsearch.indices.fielddata.breaker.NoneCircuitBreakerService;
+import org.elasticsearch.index.service.IndexService;
 import org.elasticsearch.test.ElasticsearchLuceneTestCase;
+import org.elasticsearch.test.ElasticsearchSingleNodeTest;
+import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
 
@@ -74,6 +65,11 @@ public class CodecTests extends ElasticsearchLuceneTestCase {
         forceDefaultCodec(); // we test against default codec so never get a random one here!
     }
 
+    @After
+    public void cleanup() {
+        ElasticsearchSingleNodeTest.cleanup();
+    }
+
     @Test
     public void testResolveDefaultCodecs() throws Exception {
         CodecService codecService = createCodecService();
@@ -409,23 +405,8 @@ public class CodecTests extends ElasticsearchLuceneTestCase {
     }
 
     private static CodecService createCodecService(Settings settings) {
-        Index index = new Index("test");
-        Injector injector = new ModulesBuilder()
-                .add(new SettingsModule(settings))
-                .add(new IndexNameModule(index))
-                .add(new IndexSettingsModule(index, settings))
-                .add(new SimilarityModule(settings))
-                .add(new CodecModule(settings))
-                .add(new MapperServiceModule())
-                .add(new AnalysisModule(settings))
-                .add(new AbstractModule() {
-                    @Override
-                    protected void configure() {
-                        bind(CircuitBreakerService.class).to(NoneCircuitBreakerService.class);
-                    }
-                })
-                .createInjector();
-        return injector.getInstance(CodecService.class);
+        IndexService indexService = ElasticsearchSingleNodeTest.createIndex("test", settings);
+        return indexService.injector().getInstance(CodecService.class);
     }
 
 }

+ 3 - 3
src/test/java/org/elasticsearch/index/mapper/MapperTestUtils.java

@@ -46,12 +46,12 @@ public class MapperTestUtils {
 
     public static DocumentMapperParser newParser() {
         return new DocumentMapperParser(new Index("test"), ImmutableSettings.Builder.EMPTY_SETTINGS, newAnalysisService(), new PostingsFormatService(new Index("test")),
-                new DocValuesFormatService(new Index("test")), newSimilarityLookupService());
+                new DocValuesFormatService(new Index("test")), newSimilarityLookupService(), null);
     }
 
     public static DocumentMapperParser newParser(Settings indexSettings) {
         return new DocumentMapperParser(new Index("test"), indexSettings, newAnalysisService(indexSettings), new PostingsFormatService(new Index("test")),
-                new DocValuesFormatService(new Index("test")), newSimilarityLookupService());
+                new DocValuesFormatService(new Index("test")), newSimilarityLookupService(), null);
     }
 
     public static MapperService newMapperService() {
@@ -60,7 +60,7 @@ public class MapperTestUtils {
 
     public static MapperService newMapperService(Index index, Settings indexSettings) {
         return new MapperService(index, indexSettings, new Environment(), newAnalysisService(), new IndexFieldDataService(index, new NoneCircuitBreakerService()),
-                new PostingsFormatService(index), new DocValuesFormatService(index), newSimilarityLookupService());
+                new PostingsFormatService(index), new DocValuesFormatService(index), newSimilarityLookupService(), null);
     }
 
     public static AnalysisService newAnalysisService() {

+ 133 - 0
src/test/java/org/elasticsearch/index/mapper/TransformOnIndexMapperIntegrationTest.java

@@ -0,0 +1,133 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.index.mapper;
+
+import com.google.common.collect.ImmutableMap;
+import org.elasticsearch.action.get.GetResponse;
+import org.elasticsearch.action.search.SearchResponse;
+import org.elasticsearch.common.xcontent.XContentBuilder;
+import org.elasticsearch.common.xcontent.XContentFactory;
+import org.elasticsearch.test.ElasticsearchIntegrationTest;
+import org.junit.Test;
+
+import java.io.IOException;
+import java.util.Map;
+import java.util.concurrent.ExecutionException;
+
+import static org.elasticsearch.index.query.QueryBuilders.termQuery;
+import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.*;
+import static org.hamcrest.Matchers.*;
+
+/**
+ * Tests for transforming the source document before indexing.
+ */
+public class TransformOnIndexMapperIntegrationTest extends ElasticsearchIntegrationTest {
+    @Test
+    public void searchOnTransformed() throws Exception {
+        setup(false);
+
+        // Searching by the field created in the transport finds the entry
+        SearchResponse response = client().prepareSearch("test").setQuery(termQuery("destination", "findme")).get();
+        assertSearchHits(response, "righttitle");
+        // The field built in the transform isn't in the source but source is,
+        // even though we didn't index it!
+        assertRightTitleSourceUntransformed(response.getHits().getAt(0).sourceAsMap());
+
+        // Can't find by a field removed from the document by the transform
+        response = client().prepareSearch("test").setQuery(termQuery("content", "findme")).get();
+        assertHitCount(response, 0);
+    }
+
+    @Test
+    public void getTransformed() throws Exception {
+        setup(getRandom().nextBoolean());
+        GetResponse response = client().prepareGet("test", "test", "righttitle").get();
+        assertExists(response);
+        assertRightTitleSourceUntransformed(response.getSource());
+
+        response = client().prepareGet("test", "test", "righttitle").setTransformSource(true).get();
+        assertExists(response);
+        assertRightTitleSourceTransformed(response.getSource());
+    }
+
+    /**
+     * Setup an index with some source transforms. Randomly picks the number of
+     * transforms but all but one of the transforms is a noop. The other is a
+     * script that fills the 'destination' field with the 'content' field only
+     * if the 'title' field starts with 't' and then always removes the
+     * 'content' field regarless of the contents of 't'. The actual script
+     * randomly uses parameters or not.
+     * 
+     * @param flush
+     *            should the data be flushed to disk? Set to false to test real
+     *            time fetching
+     */
+    private void setup(boolean flush) throws IOException, InterruptedException, ExecutionException {
+        XContentBuilder builder = XContentFactory.jsonBuilder().startObject();
+        builder.field("transform");
+        if (getRandom().nextBoolean()) {
+            // Single transform
+            builder.startObject();
+            buildTransformScript(builder);
+            builder.field("lang", "groovy");
+            builder.endObject();
+        } else {
+            // Multiple transforms
+            int total = between(1, 10);
+            int actual = between(0, total - 1);
+            builder.startArray();
+            for (int s = 0; s < total; s++) {
+                builder.startObject();
+                if (s == actual) {
+                    buildTransformScript(builder);
+                } else {
+                    builder.field("script", "true");
+                }
+                builder.field("lang", "groovy");
+                builder.endObject();
+            }
+            builder.endArray();
+        }
+        assertAcked(client().admin().indices().prepareCreate("test").addMapping("test", builder));
+
+        indexRandom(!flush, client().prepareIndex("test", "test", "notitle").setSource("content", "findme"),
+                client().prepareIndex("test", "test", "badtitle").setSource("content", "findme", "title", "cat"),
+                client().prepareIndex("test", "test", "righttitle").setSource("content", "findme", "title", "table"));
+    }
+
+    private void buildTransformScript(XContentBuilder builder) throws IOException {
+        String script = "if (ctx._source['title']?.startsWith('t')) { ctx._source['destination'] = ctx._source[sourceField] }; ctx._source.remove(sourceField);";
+        if (getRandom().nextBoolean()) {
+            script = script.replace("sourceField", "'content'");
+        } else {
+            builder.field("params", ImmutableMap.of("sourceField", "content"));
+        }
+        builder.field("script", script);
+    }
+
+    private void assertRightTitleSourceUntransformed(Map<String, Object> source) {
+        assertThat(source, both(hasEntry("content", (Object) "findme")).and(not(hasKey("destination"))));
+    }
+
+    private void assertRightTitleSourceTransformed(Map<String, Object> source) {
+        assertThat(source, both(hasEntry("destination", (Object) "findme")).and(not(hasKey("content"))));
+    }
+
+}

+ 6 - 2
src/test/java/org/elasticsearch/test/ElasticsearchSingleNodeTest.java

@@ -49,8 +49,7 @@ public abstract class ElasticsearchSingleNodeTest extends ElasticsearchTestCase
 
     private static final Node node = newNode();
 
-    @After
-    public void after() {
+    public static void cleanup() {
         node.client().admin().indices().prepareDelete("*").get();
         MetaData metaData = node.client().admin().cluster().prepareState().get().getState().getMetaData();
         assertThat("test leaves persistent cluster metadata behind: " + metaData.persistentSettings().getAsMap(),
@@ -59,6 +58,11 @@ public abstract class ElasticsearchSingleNodeTest extends ElasticsearchTestCase
                 metaData.transientSettings().getAsMap().size(), equalTo(0));
     }
 
+    @After
+    public void after() {
+        cleanup();
+    }
+
     private static Node newNode() {
         Node build = NodeBuilder.nodeBuilder().local(true).data(true).settings(ImmutableSettings.builder()
                 .put(ClusterName.SETTING, ElasticsearchSingleNodeTest.class.getName())