|
@@ -9,7 +9,11 @@ package org.elasticsearch.xpack.ml.aggs.frequentitemsets.mr;
|
|
|
|
|
|
import org.apache.logging.log4j.LogManager;
|
|
|
import org.apache.logging.log4j.Logger;
|
|
|
+import org.apache.lucene.index.BinaryDocValues;
|
|
|
+import org.apache.lucene.index.DocValues;
|
|
|
import org.apache.lucene.index.LeafReaderContext;
|
|
|
+import org.apache.lucene.index.NumericDocValues;
|
|
|
+import org.apache.lucene.index.SortedDocValues;
|
|
|
import org.apache.lucene.index.SortedNumericDocValues;
|
|
|
import org.apache.lucene.index.SortedSetDocValues;
|
|
|
import org.apache.lucene.util.BytesRef;
|
|
@@ -19,6 +23,7 @@ import org.elasticsearch.common.io.stream.StreamInput;
|
|
|
import org.elasticsearch.common.io.stream.StreamOutput;
|
|
|
import org.elasticsearch.common.io.stream.Writeable;
|
|
|
import org.elasticsearch.core.Tuple;
|
|
|
+import org.elasticsearch.index.fielddata.FieldData;
|
|
|
import org.elasticsearch.index.fielddata.SortedBinaryDocValues;
|
|
|
import org.elasticsearch.search.DocValueFormat;
|
|
|
import org.elasticsearch.search.aggregations.bucket.terms.IncludeExclude;
|
|
@@ -204,15 +209,19 @@ public abstract class ItemSetMapReduceValueSource {
|
|
|
@Override
|
|
|
public ValueCollector getValueCollector(LeafReaderContext ctx) throws IOException {
|
|
|
this.docValues = source.globalOrdinalsValues(ctx);
|
|
|
- ;
|
|
|
- final Tuple<Field, List<Object>> empty = new Tuple<>(field, Collections.emptyList());
|
|
|
final SortedSetDocValues values = this.docValues;
|
|
|
+ final SortedDocValues singleton = DocValues.unwrapSingleton(values);
|
|
|
+ final Tuple<Field, List<Object>> empty = new Tuple<>(field, Collections.emptyList());
|
|
|
+ return singleton != null ? getValueCollector(singleton, empty) : getValueCollector(values, empty);
|
|
|
+ }
|
|
|
+
|
|
|
+ private ValueCollector getValueCollector(SortedSetDocValues values, Tuple<Field, List<Object>> empty) {
|
|
|
return doc -> {
|
|
|
if (values.advanceExact(doc)) {
|
|
|
- int valuesCount = values.docValueCount();
|
|
|
+ final int valuesCount = values.docValueCount();
|
|
|
|
|
|
if (valuesCount == 1) {
|
|
|
- long v = values.nextOrd();
|
|
|
+ final long v = values.nextOrd();
|
|
|
assert v >= 0;
|
|
|
if (bitSetFilter == null || bitSetFilter.get(v)) {
|
|
|
return new Tuple<>(field, Collections.singletonList(v));
|
|
@@ -220,14 +229,9 @@ public abstract class ItemSetMapReduceValueSource {
|
|
|
return empty;
|
|
|
}
|
|
|
|
|
|
- if (valuesCount == 0) {
|
|
|
- return empty;
|
|
|
- }
|
|
|
-
|
|
|
- List<Object> objects = new ArrayList<>(valuesCount);
|
|
|
-
|
|
|
+ final List<Object> objects = new ArrayList<>(valuesCount);
|
|
|
for (int i = 0; i < valuesCount; ++i) {
|
|
|
- long v = values.nextOrd();
|
|
|
+ final long v = values.nextOrd();
|
|
|
assert v >= 0;
|
|
|
if (bitSetFilter == null || bitSetFilter.get(v)) {
|
|
|
objects.add(v);
|
|
@@ -239,6 +243,20 @@ public abstract class ItemSetMapReduceValueSource {
|
|
|
};
|
|
|
}
|
|
|
|
|
|
+ private ValueCollector getValueCollector(SortedDocValues values, Tuple<Field, List<Object>> empty) {
|
|
|
+ return doc -> {
|
|
|
+ if (values.advanceExact(doc)) {
|
|
|
+ final long v = values.ordValue();
|
|
|
+ assert v >= 0;
|
|
|
+ if (bitSetFilter == null || bitSetFilter.get(v)) {
|
|
|
+ return new Tuple<>(field, Collections.singletonList(v));
|
|
|
+ }
|
|
|
+ return empty;
|
|
|
+ }
|
|
|
+ return empty;
|
|
|
+ };
|
|
|
+ }
|
|
|
+
|
|
|
@Override
|
|
|
public boolean usesOrdinals() {
|
|
|
return true;
|
|
@@ -265,28 +283,27 @@ public abstract class ItemSetMapReduceValueSource {
|
|
|
@Override
|
|
|
public ValueCollector getValueCollector(LeafReaderContext ctx) throws IOException {
|
|
|
final SortedBinaryDocValues values = source.bytesValues(ctx);
|
|
|
+ final BinaryDocValues singleton = FieldData.unwrapSingleton(values);
|
|
|
final Tuple<Field, List<Object>> empty = new Tuple<>(field, Collections.emptyList());
|
|
|
+ return singleton != null ? getValueCollector(singleton, empty) : getValueCollector(values, empty);
|
|
|
+ }
|
|
|
|
|
|
+ private ValueCollector getValueCollector(SortedBinaryDocValues values, Tuple<Field, List<Object>> empty) {
|
|
|
return doc -> {
|
|
|
if (values.advanceExact(doc)) {
|
|
|
- int valuesCount = values.docValueCount();
|
|
|
+ final int valuesCount = values.docValueCount();
|
|
|
|
|
|
if (valuesCount == 1) {
|
|
|
- BytesRef v = values.nextValue();
|
|
|
+ final BytesRef v = values.nextValue();
|
|
|
if (stringFilter == null || stringFilter.accept(v)) {
|
|
|
return new Tuple<>(field, Collections.singletonList(BytesRef.deepCopyOf(v)));
|
|
|
}
|
|
|
return empty;
|
|
|
}
|
|
|
|
|
|
- if (valuesCount == 0) {
|
|
|
- return empty;
|
|
|
- }
|
|
|
-
|
|
|
- List<Object> objects = new ArrayList<>(valuesCount);
|
|
|
-
|
|
|
+ final List<Object> objects = new ArrayList<>(valuesCount);
|
|
|
for (int i = 0; i < valuesCount; ++i) {
|
|
|
- BytesRef v = values.nextValue();
|
|
|
+ final BytesRef v = values.nextValue();
|
|
|
if (stringFilter == null || stringFilter.accept(v)) {
|
|
|
objects.add(BytesRef.deepCopyOf(v));
|
|
|
}
|
|
@@ -297,6 +314,19 @@ public abstract class ItemSetMapReduceValueSource {
|
|
|
};
|
|
|
}
|
|
|
|
|
|
+ private ValueCollector getValueCollector(BinaryDocValues values, Tuple<Field, List<Object>> empty) {
|
|
|
+ return doc -> {
|
|
|
+ if (values.advanceExact(doc)) {
|
|
|
+ final BytesRef v = values.binaryValue();
|
|
|
+ if (stringFilter == null || stringFilter.accept(v)) {
|
|
|
+ return new Tuple<>(field, Collections.singletonList(BytesRef.deepCopyOf(v)));
|
|
|
+ }
|
|
|
+ return empty;
|
|
|
+ }
|
|
|
+ return empty;
|
|
|
+ };
|
|
|
+ }
|
|
|
+
|
|
|
@Override
|
|
|
public boolean usesOrdinals() {
|
|
|
return false;
|
|
@@ -374,29 +404,28 @@ public abstract class ItemSetMapReduceValueSource {
|
|
|
@Override
|
|
|
ValueCollector getValueCollector(LeafReaderContext ctx) throws IOException {
|
|
|
final SortedNumericDocValues values = source.longValues(ctx);
|
|
|
+ final NumericDocValues singleton = DocValues.unwrapSingleton(values);
|
|
|
final Field field = getField();
|
|
|
final Tuple<Field, List<Object>> empty = new Tuple<>(field, Collections.emptyList());
|
|
|
+ return singleton != null ? getValueCollector(singleton, empty, field) : getValueCollector(values, empty, field);
|
|
|
+ }
|
|
|
|
|
|
+ private ValueCollector getValueCollector(SortedNumericDocValues values, Tuple<Field, List<Object>> empty, Field field) {
|
|
|
return doc -> {
|
|
|
if (values.advanceExact(doc)) {
|
|
|
- int valuesCount = values.docValueCount();
|
|
|
+ final int valuesCount = values.docValueCount();
|
|
|
|
|
|
if (valuesCount == 1) {
|
|
|
- long v = values.nextValue();
|
|
|
+ final long v = values.nextValue();
|
|
|
if (longFilter == null || longFilter.accept(v)) {
|
|
|
- return new Tuple<>(getField(), Collections.singletonList(v));
|
|
|
+ return new Tuple<>(field, Collections.singletonList(v));
|
|
|
}
|
|
|
return empty;
|
|
|
}
|
|
|
|
|
|
- if (valuesCount == 0) {
|
|
|
- return empty;
|
|
|
- }
|
|
|
-
|
|
|
- List<Object> objects = new ArrayList<>(valuesCount);
|
|
|
-
|
|
|
+ final List<Object> objects = new ArrayList<>(valuesCount);
|
|
|
for (int i = 0; i < valuesCount; ++i) {
|
|
|
- long v = values.nextValue();
|
|
|
+ final long v = values.nextValue();
|
|
|
if (longFilter == null || longFilter.accept(v)) {
|
|
|
objects.add(v);
|
|
|
}
|
|
@@ -407,5 +436,17 @@ public abstract class ItemSetMapReduceValueSource {
|
|
|
};
|
|
|
}
|
|
|
|
|
|
+ private ValueCollector getValueCollector(NumericDocValues values, Tuple<Field, List<Object>> empty, Field field) {
|
|
|
+ return doc -> {
|
|
|
+ if (values.advanceExact(doc)) {
|
|
|
+ final long v = values.longValue();
|
|
|
+ if (longFilter == null || longFilter.accept(v)) {
|
|
|
+ return new Tuple<>(field, Collections.singletonList(v));
|
|
|
+ }
|
|
|
+ return empty;
|
|
|
+ }
|
|
|
+ return empty;
|
|
|
+ };
|
|
|
+ }
|
|
|
}
|
|
|
}
|