|  | @@ -0,0 +1,172 @@
 | 
	
		
			
				|  |  | +/*
 | 
	
		
			
				|  |  | + * Licensed to Elasticsearch under one or more contributor
 | 
	
		
			
				|  |  | + * license agreements. See the NOTICE file distributed with
 | 
	
		
			
				|  |  | + * this work for additional information regarding copyright
 | 
	
		
			
				|  |  | + * ownership. Elasticsearch licenses this file to you under
 | 
	
		
			
				|  |  | + * the Apache License, Version 2.0 (the "License"); you may
 | 
	
		
			
				|  |  | + * not use this file except in compliance with the License.
 | 
	
		
			
				|  |  | + * You may obtain a copy of the License at
 | 
	
		
			
				|  |  | + *
 | 
	
		
			
				|  |  | + *    http://www.apache.org/licenses/LICENSE-2.0
 | 
	
		
			
				|  |  | + *
 | 
	
		
			
				|  |  | + * Unless required by applicable law or agreed to in writing,
 | 
	
		
			
				|  |  | + * software distributed under the License is distributed on an
 | 
	
		
			
				|  |  | + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 | 
	
		
			
				|  |  | + * KIND, either express or implied.  See the License for the
 | 
	
		
			
				|  |  | + * specific language governing permissions and limitations
 | 
	
		
			
				|  |  | + * under the License.
 | 
	
		
			
				|  |  | + */
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +package org.elasticsearch.client.analytics;
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +import org.elasticsearch.common.ParseField;
 | 
	
		
			
				|  |  | +import org.elasticsearch.common.xcontent.ConstructingObjectParser;
 | 
	
		
			
				|  |  | +import org.elasticsearch.common.xcontent.XContentBuilder;
 | 
	
		
			
				|  |  | +import org.elasticsearch.common.xcontent.XContentParser;
 | 
	
		
			
				|  |  | +import org.elasticsearch.search.aggregations.ParsedAggregation;
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +import java.io.IOException;
 | 
	
		
			
				|  |  | +import java.util.HashMap;
 | 
	
		
			
				|  |  | +import java.util.Map;
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +import static java.util.Collections.unmodifiableMap;
 | 
	
		
			
				|  |  | +import static org.elasticsearch.common.xcontent.ConstructingObjectParser.constructorArg;
 | 
	
		
			
				|  |  | +import static org.elasticsearch.common.xcontent.ConstructingObjectParser.optionalConstructorArg;
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +/**
 | 
	
		
			
				|  |  | + * Results from the {@code string_stats} aggregation.
 | 
	
		
			
				|  |  | + */
 | 
	
		
			
				|  |  | +public class ParsedStringStats extends ParsedAggregation {
 | 
	
		
			
				|  |  | +    private static final ParseField COUNT_FIELD = new ParseField("count");
 | 
	
		
			
				|  |  | +    private static final ParseField MIN_LENGTH_FIELD = new ParseField("min_length");
 | 
	
		
			
				|  |  | +    private static final ParseField MAX_LENGTH_FIELD = new ParseField("max_length");
 | 
	
		
			
				|  |  | +    private static final ParseField AVG_LENGTH_FIELD = new ParseField("avg_length");
 | 
	
		
			
				|  |  | +    private static final ParseField ENTROPY_FIELD = new ParseField("entropy");
 | 
	
		
			
				|  |  | +    private static final ParseField DISTRIBUTION_FIELD = new ParseField("distribution");
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    private final long count;
 | 
	
		
			
				|  |  | +    private final int minLength;
 | 
	
		
			
				|  |  | +    private final int maxLength;
 | 
	
		
			
				|  |  | +    private final double avgLength;
 | 
	
		
			
				|  |  | +    private final double entropy;
 | 
	
		
			
				|  |  | +    private final boolean showDistribution;
 | 
	
		
			
				|  |  | +    private final Map<String, Double> distribution;
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    private ParsedStringStats(String name, long count, int minLength, int maxLength, double avgLength, double entropy,
 | 
	
		
			
				|  |  | +            boolean showDistribution, Map<String, Double> distribution) {
 | 
	
		
			
				|  |  | +        setName(name);
 | 
	
		
			
				|  |  | +        this.count = count;
 | 
	
		
			
				|  |  | +        this.minLength = minLength;
 | 
	
		
			
				|  |  | +        this.maxLength = maxLength;
 | 
	
		
			
				|  |  | +        this.avgLength = avgLength;
 | 
	
		
			
				|  |  | +        this.entropy = entropy;
 | 
	
		
			
				|  |  | +        this.showDistribution = showDistribution;
 | 
	
		
			
				|  |  | +        this.distribution = distribution;
 | 
	
		
			
				|  |  | +    }
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    /**
 | 
	
		
			
				|  |  | +     * The number of non-empty fields counted.
 | 
	
		
			
				|  |  | +     */
 | 
	
		
			
				|  |  | +    public long getCount() {
 | 
	
		
			
				|  |  | +        return count;
 | 
	
		
			
				|  |  | +    }
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    /**
 | 
	
		
			
				|  |  | +     * The length of the shortest term.
 | 
	
		
			
				|  |  | +     */
 | 
	
		
			
				|  |  | +    public int getMinLength() {
 | 
	
		
			
				|  |  | +        return minLength;
 | 
	
		
			
				|  |  | +    }
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    /**
 | 
	
		
			
				|  |  | +     * The length of the longest term.
 | 
	
		
			
				|  |  | +     */
 | 
	
		
			
				|  |  | +    public int getMaxLength() {
 | 
	
		
			
				|  |  | +        return maxLength;
 | 
	
		
			
				|  |  | +    }
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    /**
 | 
	
		
			
				|  |  | +     * The average length computed over all terms.
 | 
	
		
			
				|  |  | +     */
 | 
	
		
			
				|  |  | +    public double getAvgLength() {
 | 
	
		
			
				|  |  | +        return avgLength;
 | 
	
		
			
				|  |  | +    }
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    /**
 | 
	
		
			
				|  |  | +     * The <a href="https://en.wikipedia.org/wiki/Entropy_(information_theory)">Shannon Entropy</a>
 | 
	
		
			
				|  |  | +     * value computed over all terms collected by the aggregation.
 | 
	
		
			
				|  |  | +     * Shannon entropy quantifies the amount of information contained in
 | 
	
		
			
				|  |  | +     * the field. It is a very useful metric for measuring a wide range of
 | 
	
		
			
				|  |  | +     * properties of a data set, such as diversity, similarity,
 | 
	
		
			
				|  |  | +     * randomness etc.
 | 
	
		
			
				|  |  | +     */
 | 
	
		
			
				|  |  | +    public double getEntropy() {
 | 
	
		
			
				|  |  | +        return entropy;
 | 
	
		
			
				|  |  | +    }
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    /**
 | 
	
		
			
				|  |  | +     * The probability distribution for all characters. {@code null} unless
 | 
	
		
			
				|  |  | +     * explicitly requested with {@link StringStatsAggregationBuilder#showDistribution(boolean)}.
 | 
	
		
			
				|  |  | +     */
 | 
	
		
			
				|  |  | +    public Map<String, Double> getDistribution() {
 | 
	
		
			
				|  |  | +        return distribution;
 | 
	
		
			
				|  |  | +    }
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    @Override
 | 
	
		
			
				|  |  | +    public String getType() {
 | 
	
		
			
				|  |  | +        return StringStatsAggregationBuilder.NAME;
 | 
	
		
			
				|  |  | +    }
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    private static final Object NULL_DISTRIBUTION_MARKER = new Object();
 | 
	
		
			
				|  |  | +    public static final ConstructingObjectParser<ParsedStringStats, String> PARSER = new ConstructingObjectParser<>(
 | 
	
		
			
				|  |  | +            StringStatsAggregationBuilder.NAME, true, (args, name) -> {
 | 
	
		
			
				|  |  | +                long count = (long) args[0];
 | 
	
		
			
				|  |  | +                boolean disributionWasExplicitNull = args[5] == NULL_DISTRIBUTION_MARKER;
 | 
	
		
			
				|  |  | +                if (count == 0) {
 | 
	
		
			
				|  |  | +                    return new ParsedStringStats(name, count, 0, 0, 0, 0, disributionWasExplicitNull, null);
 | 
	
		
			
				|  |  | +                }
 | 
	
		
			
				|  |  | +                int minLength = (int) args[1];
 | 
	
		
			
				|  |  | +                int maxLength = (int) args[2];
 | 
	
		
			
				|  |  | +                double averageLength = (double) args[3];
 | 
	
		
			
				|  |  | +                double entropy = (double) args[4];
 | 
	
		
			
				|  |  | +                if (disributionWasExplicitNull) {
 | 
	
		
			
				|  |  | +                    return new ParsedStringStats(name, count, minLength, maxLength, averageLength, entropy,
 | 
	
		
			
				|  |  | +                            disributionWasExplicitNull, null);
 | 
	
		
			
				|  |  | +                } else {
 | 
	
		
			
				|  |  | +                    @SuppressWarnings("unchecked")
 | 
	
		
			
				|  |  | +                    Map<String, Double> distribution = (Map<String, Double>) args[5];
 | 
	
		
			
				|  |  | +                    return new ParsedStringStats(name, count, minLength, maxLength, averageLength, entropy,
 | 
	
		
			
				|  |  | +                            distribution != null, distribution);
 | 
	
		
			
				|  |  | +                }
 | 
	
		
			
				|  |  | +            });
 | 
	
		
			
				|  |  | +    static {
 | 
	
		
			
				|  |  | +        PARSER.declareLong(constructorArg(), COUNT_FIELD);
 | 
	
		
			
				|  |  | +        PARSER.declareIntOrNull(constructorArg(), 0, MIN_LENGTH_FIELD);
 | 
	
		
			
				|  |  | +        PARSER.declareIntOrNull(constructorArg(), 0, MAX_LENGTH_FIELD);
 | 
	
		
			
				|  |  | +        PARSER.declareDoubleOrNull(constructorArg(), 0, AVG_LENGTH_FIELD);
 | 
	
		
			
				|  |  | +        PARSER.declareDoubleOrNull(constructorArg(), 0, ENTROPY_FIELD);
 | 
	
		
			
				|  |  | +        PARSER.declareObjectOrNull(optionalConstructorArg(), (p, c) -> unmodifiableMap(p.map(HashMap::new, XContentParser::doubleValue)),
 | 
	
		
			
				|  |  | +                NULL_DISTRIBUTION_MARKER, DISTRIBUTION_FIELD);
 | 
	
		
			
				|  |  | +        ParsedAggregation.declareAggregationFields(PARSER);
 | 
	
		
			
				|  |  | +    }
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    @Override
 | 
	
		
			
				|  |  | +    protected XContentBuilder doXContentBody(XContentBuilder builder, Params params) throws IOException {
 | 
	
		
			
				|  |  | +        builder.field(COUNT_FIELD.getPreferredName(), count);
 | 
	
		
			
				|  |  | +        if (count == 0) {
 | 
	
		
			
				|  |  | +            builder.nullField(MIN_LENGTH_FIELD.getPreferredName());
 | 
	
		
			
				|  |  | +            builder.nullField(MAX_LENGTH_FIELD.getPreferredName());
 | 
	
		
			
				|  |  | +            builder.nullField(AVG_LENGTH_FIELD.getPreferredName());
 | 
	
		
			
				|  |  | +            builder.field(ENTROPY_FIELD.getPreferredName(), 0.0);
 | 
	
		
			
				|  |  | +        } else {
 | 
	
		
			
				|  |  | +            builder.field(MIN_LENGTH_FIELD.getPreferredName(), minLength);
 | 
	
		
			
				|  |  | +            builder.field(MAX_LENGTH_FIELD.getPreferredName(), maxLength);
 | 
	
		
			
				|  |  | +            builder.field(AVG_LENGTH_FIELD.getPreferredName(), avgLength);
 | 
	
		
			
				|  |  | +            builder.field(ENTROPY_FIELD.getPreferredName(), entropy);
 | 
	
		
			
				|  |  | +        }
 | 
	
		
			
				|  |  | +        if (showDistribution) {
 | 
	
		
			
				|  |  | +            builder.field(DISTRIBUTION_FIELD.getPreferredName(), distribution);
 | 
	
		
			
				|  |  | +        }
 | 
	
		
			
				|  |  | +        return builder;
 | 
	
		
			
				|  |  | +    }
 | 
	
		
			
				|  |  | +}
 |