summaryrefslogtreecommitdiff
path: root/core/src/main/java/org/elasticsearch/search
diff options
context:
space:
mode:
authorJim Ferenczi <jim.ferenczi@elastic.co>2017-05-30 19:13:07 +0200
committerGitHub <noreply@github.com>2017-05-30 19:13:07 +0200
commitce7195d81abafd0a880ebbc2d1514ab608e56b79 (patch)
tree45f67b453675691f237f2519748186c3cb9e7b5b /core/src/main/java/org/elasticsearch/search
parent2a6e6866bd6a25f469c07ba0766edec6272e93ac (diff)
Terms aggregation should remap global ordinal buckets when a sub-aggregator is used to sort the terms (#24941)
`terms` aggregations at the root level use the `global_ordinals` execution hint by default. When all sub-aggregators can be run in `breadth_first` mode the collected buckets for these sub-aggs are dense (remapped after the initial pruning). But if a sub-aggregator is not deferrable and needs to collect all buckets before pruning we don't remap global ords and the aggregator needs to deal with sparse buckets. Most (if not all) aggregators expect dense buckets and uses this information to allocate memories. This change forces the remap of the global ordinals but only when there is at least one sub-aggregator that cannot be deferred. Relates #24788
Diffstat (limited to 'core/src/main/java/org/elasticsearch/search')
-rw-r--r--core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/GlobalOrdinalsSignificantTermsAggregator.java89
-rw-r--r--core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorFactory.java97
-rw-r--r--core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java254
-rw-r--r--core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java127
-rw-r--r--core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/support/IncludeExclude.java12
5 files changed, 299 insertions, 280 deletions
diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/GlobalOrdinalsSignificantTermsAggregator.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/GlobalOrdinalsSignificantTermsAggregator.java
index 98effdcfd5..50f114b35b 100644
--- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/GlobalOrdinalsSignificantTermsAggregator.java
+++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/GlobalOrdinalsSignificantTermsAggregator.java
@@ -53,22 +53,28 @@ public class GlobalOrdinalsSignificantTermsAggregator extends GlobalOrdinalsStri
protected final SignificantTermsAggregatorFactory termsAggFactory;
private final SignificanceHeuristic significanceHeuristic;
- public GlobalOrdinalsSignificantTermsAggregator(String name, AggregatorFactories factories,
- ValuesSource.Bytes.WithOrdinals.FieldData valuesSource, DocValueFormat format,
- BucketCountThresholds bucketCountThresholds, IncludeExclude.OrdinalsFilter includeExclude,
- SearchContext context, Aggregator parent,
- SignificanceHeuristic significanceHeuristic, SignificantTermsAggregatorFactory termsAggFactory,
- List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData) throws IOException {
-
+ public GlobalOrdinalsSignificantTermsAggregator(String name,
+ AggregatorFactories factories,
+ ValuesSource.Bytes.WithOrdinals.FieldData valuesSource,
+ DocValueFormat format,
+ BucketCountThresholds bucketCountThresholds,
+ IncludeExclude.OrdinalsFilter includeExclude,
+ SearchContext context,
+ Aggregator parent,
+ boolean forceRemapGlobalOrds,
+ SignificanceHeuristic significanceHeuristic,
+ SignificantTermsAggregatorFactory termsAggFactory,
+ List<PipelineAggregator> pipelineAggregators,
+ Map<String, Object> metaData) throws IOException {
super(name, factories, valuesSource, null, format, bucketCountThresholds, includeExclude, context, parent,
- SubAggCollectionMode.DEPTH_FIRST, false, pipelineAggregators, metaData);
+ forceRemapGlobalOrds, SubAggCollectionMode.DEPTH_FIRST, false, pipelineAggregators, metaData);
this.significanceHeuristic = significanceHeuristic;
this.termsAggFactory = termsAggFactory;
+ this.numCollectedDocs = 0;
}
@Override
- public LeafBucketCollector getLeafCollector(LeafReaderContext ctx,
- final LeafBucketCollector sub) throws IOException {
+ public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, final LeafBucketCollector sub) throws IOException {
return new LeafBucketCollectorBase(super.getLeafCollector(ctx, sub), null) {
@Override
public void collect(int doc, long bucket) throws IOException {
@@ -78,18 +84,17 @@ public class GlobalOrdinalsSignificantTermsAggregator extends GlobalOrdinalsStri
};
}
-
@Override
public SignificantStringTerms buildAggregation(long owningBucketOrdinal) throws IOException {
assert owningBucketOrdinal == 0;
- if (globalOrds == null) { // no context in this reader
+ if (valueCount == 0) { // no context in this reader
return buildEmptyAggregation();
}
final int size;
if (bucketCountThresholds.getMinDocCount() == 0) {
// if minDocCount == 0 then we can end up with more buckets then maxBucketOrd() returns
- size = (int) Math.min(globalOrds.getValueCount(), bucketCountThresholds.getShardSize());
+ size = (int) Math.min(valueCount, bucketCountThresholds.getShardSize());
} else {
size = (int) Math.min(maxBucketOrd(), bucketCountThresholds.getShardSize());
}
@@ -98,7 +103,7 @@ public class GlobalOrdinalsSignificantTermsAggregator extends GlobalOrdinalsStri
BucketSignificancePriorityQueue<SignificantStringTerms.Bucket> ordered = new BucketSignificancePriorityQueue<>(size);
SignificantStringTerms.Bucket spare = null;
- for (long globalTermOrd = 0; globalTermOrd < globalOrds.getValueCount(); ++globalTermOrd) {
+ for (long globalTermOrd = 0; globalTermOrd < valueCount; ++globalTermOrd) {
if (includeExclude != null && !acceptedGlobalOrdinals.get(globalTermOrd)) {
continue;
}
@@ -115,7 +120,7 @@ public class GlobalOrdinalsSignificantTermsAggregator extends GlobalOrdinalsStri
spare = new SignificantStringTerms.Bucket(new BytesRef(), 0, 0, 0, 0, null, format);
}
spare.bucketOrd = bucketOrd;
- copy(globalOrds.lookupOrd(globalTermOrd), spare.termBytes);
+ copy(lookupGlobalOrd.apply(globalTermOrd), spare.termBytes);
spare.subsetDf = bucketDocCount;
spare.subsetSize = subsetSize;
spare.supersetDf = termsAggFactory.getBackgroundFrequency(spare.termBytes);
@@ -148,63 +153,13 @@ public class GlobalOrdinalsSignificantTermsAggregator extends GlobalOrdinalsStri
IndexReader topReader = searcher.getIndexReader();
int supersetSize = topReader.numDocs();
return new SignificantStringTerms(name, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getMinDocCount(),
- pipelineAggregators(), metaData(), format, 0, supersetSize, significanceHeuristic, emptyList());
+ pipelineAggregators(), metaData(), format, numCollectedDocs, supersetSize, significanceHeuristic, emptyList());
}
@Override
protected void doClose() {
+ super.doClose();
Releasables.close(termsAggFactory);
}
-
- public static class WithHash extends GlobalOrdinalsSignificantTermsAggregator {
-
- private final LongHash bucketOrds;
-
- public WithHash(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals.FieldData valuesSource,
- DocValueFormat format, BucketCountThresholds bucketCountThresholds, IncludeExclude.OrdinalsFilter includeExclude,
- SearchContext context, Aggregator parent, SignificanceHeuristic significanceHeuristic,
- SignificantTermsAggregatorFactory termsAggFactory, List<PipelineAggregator> pipelineAggregators,
- Map<String, Object> metaData) throws IOException {
- super(name, factories, valuesSource, format, bucketCountThresholds, includeExclude, context, parent, significanceHeuristic,
- termsAggFactory, pipelineAggregators, metaData);
- bucketOrds = new LongHash(1, context.bigArrays());
- }
-
- @Override
- public LeafBucketCollector getLeafCollector(LeafReaderContext ctx,
- final LeafBucketCollector sub) throws IOException {
- return new LeafBucketCollectorBase(super.getLeafCollector(ctx, sub), null) {
- @Override
- public void collect(int doc, long bucket) throws IOException {
- assert bucket == 0;
- numCollectedDocs++;
- if (globalOrds.advanceExact(doc)) {
- for (long globalOrd = globalOrds.nextOrd();
- globalOrd != SortedSetDocValues.NO_MORE_ORDS;
- globalOrd = globalOrds.nextOrd()) {
- long bucketOrd = bucketOrds.add(globalOrd);
- if (bucketOrd < 0) {
- bucketOrd = -1 - bucketOrd;
- collectExistingBucket(sub, doc, bucketOrd);
- } else {
- collectBucket(sub, doc, bucketOrd);
- }
- }
- }
- }
- };
- }
-
- @Override
- protected long getBucketOrd(long termOrd) {
- return bucketOrds.find(termOrd);
- }
-
- @Override
- protected void doClose() {
- Releasables.close(termsAggFactory, bucketOrds);
- }
- }
-
}
diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorFactory.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorFactory.java
index c27cabf78b..ba5191b122 100644
--- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorFactory.java
+++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorFactory.java
@@ -70,10 +70,17 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac
private final TermsAggregator.BucketCountThresholds bucketCountThresholds;
private final SignificanceHeuristic significanceHeuristic;
- public SignificantTermsAggregatorFactory(String name, ValuesSourceConfig<ValuesSource> config, IncludeExclude includeExclude,
- String executionHint, QueryBuilder filterBuilder, TermsAggregator.BucketCountThresholds bucketCountThresholds,
- SignificanceHeuristic significanceHeuristic, SearchContext context, AggregatorFactory<?> parent,
- AggregatorFactories.Builder subFactoriesBuilder, Map<String, Object> metaData) throws IOException {
+ public SignificantTermsAggregatorFactory(String name,
+ ValuesSourceConfig<ValuesSource> config,
+ IncludeExclude includeExclude,
+ String executionHint,
+ QueryBuilder filterBuilder,
+ TermsAggregator.BucketCountThresholds bucketCountThresholds,
+ SignificanceHeuristic significanceHeuristic,
+ SearchContext context,
+ AggregatorFactory<?> parent,
+ AggregatorFactories.Builder subFactoriesBuilder,
+ Map<String, Object> metaData) throws IOException {
super(name, config, context, parent, subFactoriesBuilder, metaData);
this.includeExclude = includeExclude;
this.executionHint = executionHint;
@@ -246,44 +253,71 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac
MAP(new ParseField("map")) {
@Override
- Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, DocValueFormat format,
- TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude,
- SearchContext aggregationContext, Aggregator parent, SignificanceHeuristic significanceHeuristic,
- SignificantTermsAggregatorFactory termsAggregatorFactory, List<PipelineAggregator> pipelineAggregators,
- Map<String, Object> metaData) throws IOException {
+ Aggregator create(String name,
+ AggregatorFactories factories,
+ ValuesSource valuesSource,
+ DocValueFormat format,
+ TermsAggregator.BucketCountThresholds bucketCountThresholds,
+ IncludeExclude includeExclude,
+ SearchContext aggregationContext,
+ Aggregator parent,
+ SignificanceHeuristic significanceHeuristic,
+ SignificantTermsAggregatorFactory termsAggregatorFactory,
+ List<PipelineAggregator> pipelineAggregators,
+ Map<String, Object> metaData) throws IOException {
+
final IncludeExclude.StringFilter filter = includeExclude == null ? null : includeExclude.convertToStringFilter(format);
return new SignificantStringTermsAggregator(name, factories, valuesSource, format, bucketCountThresholds, filter,
aggregationContext, parent, significanceHeuristic, termsAggregatorFactory, pipelineAggregators, metaData);
+
}
},
GLOBAL_ORDINALS(new ParseField("global_ordinals")) {
@Override
- Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, DocValueFormat format,
- TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude,
- SearchContext aggregationContext, Aggregator parent, SignificanceHeuristic significanceHeuristic,
- SignificantTermsAggregatorFactory termsAggregatorFactory, List<PipelineAggregator> pipelineAggregators,
- Map<String, Object> metaData) throws IOException {
+ Aggregator create(String name,
+ AggregatorFactories factories,
+ ValuesSource valuesSource,
+ DocValueFormat format,
+ TermsAggregator.BucketCountThresholds bucketCountThresholds,
+ IncludeExclude includeExclude,
+ SearchContext aggregationContext,
+ Aggregator parent,
+ SignificanceHeuristic significanceHeuristic,
+ SignificantTermsAggregatorFactory termsAggregatorFactory,
+ List<PipelineAggregator> pipelineAggregators,
+ Map<String, Object> metaData) throws IOException {
+
final IncludeExclude.OrdinalsFilter filter = includeExclude == null ? null : includeExclude.convertToOrdinalsFilter(format);
return new GlobalOrdinalsSignificantTermsAggregator(name, factories,
(ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, format, bucketCountThresholds, filter,
- aggregationContext, parent, significanceHeuristic, termsAggregatorFactory, pipelineAggregators, metaData);
+ aggregationContext, parent, false, significanceHeuristic, termsAggregatorFactory, pipelineAggregators, metaData);
+
}
},
GLOBAL_ORDINALS_HASH(new ParseField("global_ordinals_hash")) {
@Override
- Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, DocValueFormat format,
- TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude,
- SearchContext aggregationContext, Aggregator parent, SignificanceHeuristic significanceHeuristic,
- SignificantTermsAggregatorFactory termsAggregatorFactory, List<PipelineAggregator> pipelineAggregators,
- Map<String, Object> metaData) throws IOException {
+ Aggregator create(String name,
+ AggregatorFactories factories,
+ ValuesSource valuesSource,
+ DocValueFormat format,
+ TermsAggregator.BucketCountThresholds bucketCountThresholds,
+ IncludeExclude includeExclude,
+ SearchContext aggregationContext,
+ Aggregator parent,
+ SignificanceHeuristic significanceHeuristic,
+ SignificantTermsAggregatorFactory termsAggregatorFactory,
+ List<PipelineAggregator> pipelineAggregators,
+ Map<String, Object> metaData) throws IOException {
+
final IncludeExclude.OrdinalsFilter filter = includeExclude == null ? null : includeExclude.convertToOrdinalsFilter(format);
- return new GlobalOrdinalsSignificantTermsAggregator.WithHash(name, factories,
- (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, format, bucketCountThresholds, filter,
- aggregationContext, parent, significanceHeuristic, termsAggregatorFactory, pipelineAggregators, metaData);
+ return new GlobalOrdinalsSignificantTermsAggregator(name, factories,
+ (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, format, bucketCountThresholds, filter, aggregationContext, parent,
+ true, significanceHeuristic, termsAggregatorFactory, pipelineAggregators, metaData);
+
}
};
@@ -302,11 +336,18 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac
this.parseField = parseField;
}
- abstract Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, DocValueFormat format,
- TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude,
- SearchContext aggregationContext, Aggregator parent, SignificanceHeuristic significanceHeuristic,
- SignificantTermsAggregatorFactory termsAggregatorFactory, List<PipelineAggregator> pipelineAggregators,
- Map<String, Object> metaData) throws IOException;
+ abstract Aggregator create(String name,
+ AggregatorFactories factories,
+ ValuesSource valuesSource,
+ DocValueFormat format,
+ TermsAggregator.BucketCountThresholds bucketCountThresholds,
+ IncludeExclude includeExclude,
+ SearchContext aggregationContext,
+ Aggregator parent,
+ SignificanceHeuristic significanceHeuristic,
+ SignificantTermsAggregatorFactory termsAggregatorFactory,
+ List<PipelineAggregator> pipelineAggregators,
+ Map<String, Object> metaData) throws IOException;
@Override
public String toString() {
diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java
index 33bbc370c6..d9c60da7ac 100644
--- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java
+++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java
@@ -20,6 +20,7 @@
package org.elasticsearch.search.aggregations.bucket.terms;
import org.apache.lucene.index.DocValues;
+import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues;
@@ -52,6 +53,8 @@ import java.util.Arrays;
import java.util.List;
import java.util.Map;
+import static org.apache.lucene.index.SortedSetDocValues.NO_MORE_ORDS;
+
/**
* An aggregator of string values that relies on global ordinals in order to build buckets.
*/
@@ -66,67 +69,104 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
// first defined one.
// So currently for each instance of this aggregator the acceptedglobalValues will be computed, this is unnecessary
// especially if this agg is on a second layer or deeper.
- protected LongBitSet acceptedGlobalOrdinals;
+ protected final LongBitSet acceptedGlobalOrdinals;
+ protected final long valueCount;
+ protected final GlobalOrdLookupFunction lookupGlobalOrd;
+
+ private final LongHash bucketOrds;
- protected SortedSetDocValues globalOrds;
+ public interface GlobalOrdLookupFunction {
+ BytesRef apply(long ord) throws IOException;
+ }
- public GlobalOrdinalsStringTermsAggregator(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals valuesSource,
- BucketOrder order, DocValueFormat format, BucketCountThresholds bucketCountThresholds,
- IncludeExclude.OrdinalsFilter includeExclude, SearchContext context, Aggregator parent,
- SubAggCollectionMode collectionMode, boolean showTermDocCountError, List<PipelineAggregator> pipelineAggregators,
- Map<String, Object> metaData) throws IOException {
+ public GlobalOrdinalsStringTermsAggregator(String name, AggregatorFactories factories,
+ ValuesSource.Bytes.WithOrdinals valuesSource,
+ BucketOrder order,
+ DocValueFormat format,
+ BucketCountThresholds bucketCountThresholds,
+ IncludeExclude.OrdinalsFilter includeExclude,
+ SearchContext context,
+ Aggregator parent,
+ boolean forceRemapGlobalOrds,
+ SubAggCollectionMode collectionMode,
+ boolean showTermDocCountError,
+ List<PipelineAggregator> pipelineAggregators,
+ Map<String, Object> metaData) throws IOException {
super(name, factories, context, parent, order, format, bucketCountThresholds, collectionMode, showTermDocCountError,
pipelineAggregators, metaData);
this.valuesSource = valuesSource;
this.includeExclude = includeExclude;
+ final IndexReader reader = context.searcher().getIndexReader();
+ final SortedSetDocValues values = reader.leaves().size() > 0 ?
+ valuesSource.globalOrdinalsValues(context.searcher().getIndexReader().leaves().get(0)) : DocValues.emptySortedSet();
+ this.valueCount = values.getValueCount();
+ this.lookupGlobalOrd = values::lookupOrd;
+ this.acceptedGlobalOrdinals = includeExclude != null ? includeExclude.acceptedGlobalOrdinals(values) : null;
+
+ /**
+ * Remap global ords to dense bucket ordinals if any sub-aggregator cannot be deferred.
+ * Sub-aggregators expect dense buckets and allocate memories based on this assumption.
+ * Deferred aggregators are safe because the selected ordinals are remapped when the buckets
+ * are replayed.
+ */
+ boolean remapGlobalOrds = forceRemapGlobalOrds || Arrays.stream(subAggregators).anyMatch((a) -> shouldDefer(a) == false);
+ this.bucketOrds = remapGlobalOrds ? new LongHash(1, context.bigArrays()) : null;
}
- protected long getBucketOrd(long termOrd) {
- return termOrd;
- }
-
- @Override
- public LeafBucketCollector getLeafCollector(LeafReaderContext ctx,
- final LeafBucketCollector sub) throws IOException {
- globalOrds = valuesSource.globalOrdinalsValues(ctx);
+ boolean remapGlobalOrds() {
+ return bucketOrds != null;
+ }
- if (acceptedGlobalOrdinals == null && includeExclude != null) {
- acceptedGlobalOrdinals = includeExclude.acceptedGlobalOrdinals(globalOrds);
- }
+ protected final long getBucketOrd(long globalOrd) {
+ return bucketOrds == null ? globalOrd : bucketOrds.find(globalOrd);
+ }
- if (acceptedGlobalOrdinals != null) {
- globalOrds = new FilteredOrdinals(globalOrds, acceptedGlobalOrdinals);
+ private void collectGlobalOrd(int doc, long globalOrd, LeafBucketCollector sub) throws IOException {
+ if (bucketOrds == null) {
+ collectExistingBucket(sub, doc, globalOrd);
+ } else {
+ long bucketOrd = bucketOrds.add(globalOrd);
+ if (bucketOrd < 0) {
+ bucketOrd = -1 - bucketOrd;
+ collectExistingBucket(sub, doc, bucketOrd);
+ } else {
+ collectBucket(sub, doc, bucketOrd);
+ }
}
+ }
- return newCollector(globalOrds, sub);
+ private SortedSetDocValues getGlobalOrds(LeafReaderContext ctx) throws IOException {
+ return acceptedGlobalOrdinals == null ?
+ valuesSource.globalOrdinalsValues(ctx) : new FilteredOrdinals(valuesSource.globalOrdinalsValues(ctx), acceptedGlobalOrdinals);
}
- protected LeafBucketCollector newCollector(final SortedSetDocValues ords,
- final LeafBucketCollector sub) {
- grow(ords.getValueCount());
- final SortedDocValues singleValues = DocValues.unwrapSingleton(ords);
+ @Override
+ public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, final LeafBucketCollector sub) throws IOException {
+ final SortedSetDocValues globalOrds = getGlobalOrds(ctx);
+ if (bucketOrds == null) {
+ grow(globalOrds.getValueCount());
+ }
+ final SortedDocValues singleValues = DocValues.unwrapSingleton(globalOrds);
if (singleValues != null) {
- return new LeafBucketCollectorBase(sub, ords) {
+ return new LeafBucketCollectorBase(sub, globalOrds) {
@Override
public void collect(int doc, long bucket) throws IOException {
assert bucket == 0;
if (singleValues.advanceExact(doc)) {
final int ord = singleValues.ordValue();
- collectExistingBucket(sub, doc, ord);
+ collectGlobalOrd(doc, ord, sub);
}
}
};
} else {
- return new LeafBucketCollectorBase(sub, ords) {
+ return new LeafBucketCollectorBase(sub, globalOrds) {
@Override
public void collect(int doc, long bucket) throws IOException {
assert bucket == 0;
- if (ords.advanceExact(doc)) {
- for (long globalOrd = ords.nextOrd();
- globalOrd != SortedSetDocValues.NO_MORE_ORDS;
- globalOrd = ords.nextOrd()) {
- collectExistingBucket(sub, doc, globalOrd);
+ if (globalOrds.advanceExact(doc)) {
+ for (long globalOrd = globalOrds.nextOrd(); globalOrd != NO_MORE_ORDS; globalOrd = globalOrds.nextOrd()) {
+ collectGlobalOrd(doc, globalOrd, sub);
}
}
}
@@ -145,21 +185,21 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
@Override
public InternalAggregation buildAggregation(long owningBucketOrdinal) throws IOException {
- if (globalOrds == null) { // no context in this reader
+ if (valueCount == 0) { // no context in this reader
return buildEmptyAggregation();
}
final int size;
if (bucketCountThresholds.getMinDocCount() == 0) {
// if minDocCount == 0 then we can end up with more buckets then maxBucketOrd() returns
- size = (int) Math.min(globalOrds.getValueCount(), bucketCountThresholds.getShardSize());
+ size = (int) Math.min(valueCount, bucketCountThresholds.getShardSize());
} else {
size = (int) Math.min(maxBucketOrd(), bucketCountThresholds.getShardSize());
}
long otherDocCount = 0;
BucketPriorityQueue<OrdBucket> ordered = new BucketPriorityQueue<>(size, order.comparator(this));
OrdBucket spare = new OrdBucket(-1, 0, null, showTermDocCountError, 0);
- for (long globalTermOrd = 0; globalTermOrd < globalOrds.getValueCount(); ++globalTermOrd) {
+ for (long globalTermOrd = 0; globalTermOrd < valueCount; ++globalTermOrd) {
if (includeExclude != null && !acceptedGlobalOrdinals.get(globalTermOrd)) {
continue;
}
@@ -184,10 +224,10 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
final StringTerms.Bucket[] list = new StringTerms.Bucket[ordered.size()];
long survivingBucketOrds[] = new long[ordered.size()];
for (int i = ordered.size() - 1; i >= 0; --i) {
- final OrdBucket bucket = (OrdBucket) ordered.pop();
+ final OrdBucket bucket = ordered.pop();
survivingBucketOrds[i] = bucket.bucketOrd;
BytesRef scratch = new BytesRef();
- copy(globalOrds.lookupOrd(bucket.globalOrd), scratch);
+ copy(lookupGlobalOrd.apply(bucket.globalOrd), scratch);
list[i] = new StringTerms.Bucket(scratch, bucket.docCount, null, showTermDocCountError, 0, format);
list[i].bucketOrd = bucket.bucketOrd;
otherDocCount -= list[i].docCount;
@@ -254,76 +294,9 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
}
}
- /**
- * Variant of {@link GlobalOrdinalsStringTermsAggregator} that rebases hashes in order to make them dense. Might be
- * useful in case few hashes are visited.
- */
- public static class WithHash extends GlobalOrdinalsStringTermsAggregator {
-
- private final LongHash bucketOrds;
-
- public WithHash(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals valuesSource, BucketOrder order,
- DocValueFormat format, BucketCountThresholds bucketCountThresholds, IncludeExclude.OrdinalsFilter includeExclude,
- SearchContext context, Aggregator parent, SubAggCollectionMode collectionMode,
- boolean showTermDocCountError, List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData)
- throws IOException {
- super(name, factories, valuesSource, order, format, bucketCountThresholds, includeExclude, context, parent, collectionMode,
- showTermDocCountError, pipelineAggregators, metaData);
- bucketOrds = new LongHash(1, context.bigArrays());
- }
-
- @Override
- protected LeafBucketCollector newCollector(final SortedSetDocValues ords,
- final LeafBucketCollector sub) {
- final SortedDocValues singleValues = DocValues.unwrapSingleton(ords);
- if (singleValues != null) {
- return new LeafBucketCollectorBase(sub, ords) {
- @Override
- public void collect(int doc, long bucket) throws IOException {
- if (singleValues.advanceExact(doc)) {
- final int globalOrd = singleValues.ordValue();
- long bucketOrd = bucketOrds.add(globalOrd);
- if (bucketOrd < 0) {
- bucketOrd = -1 - bucketOrd;
- collectExistingBucket(sub, doc, bucketOrd);
- } else {
- collectBucket(sub, doc, bucketOrd);
- }
- }
- }
- };
- } else {
- return new LeafBucketCollectorBase(sub, ords) {
- @Override
- public void collect(int doc, long bucket) throws IOException {
- if (ords.advanceExact(doc)) {
- for (long globalOrd = ords.nextOrd();
- globalOrd != SortedSetDocValues.NO_MORE_ORDS;
- globalOrd = ords.nextOrd()) {
- long bucketOrd = bucketOrds.add(globalOrd);
- if (bucketOrd < 0) {
- bucketOrd = -1 - bucketOrd;
- collectExistingBucket(sub, doc, bucketOrd);
- } else {
- collectBucket(sub, doc, bucketOrd);
- }
- }
- }
- }
- };
- }
- }
-
- @Override
- protected long getBucketOrd(long termOrd) {
- return bucketOrds.find(termOrd);
- }
-
- @Override
- protected void doClose() {
- Releasables.close(bucketOrds);
- }
-
+ @Override
+ protected void doClose() {
+ Releasables.close(bucketOrds);
}
/**
@@ -331,32 +304,44 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
* instead of on the fly for each match.This is beneficial for low cardinality fields, because it can reduce
* the amount of look-ups significantly.
*/
- public static class LowCardinality extends GlobalOrdinalsStringTermsAggregator {
+ static class LowCardinality extends GlobalOrdinalsStringTermsAggregator {
private IntArray segmentDocCounts;
-
+ private SortedSetDocValues globalOrds;
private SortedSetDocValues segmentOrds;
- public LowCardinality(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals valuesSource,
- BucketOrder order, DocValueFormat format,
- BucketCountThresholds bucketCountThresholds, SearchContext context, Aggregator parent,
- SubAggCollectionMode collectionMode, boolean showTermDocCountError, List<PipelineAggregator> pipelineAggregators,
- Map<String, Object> metaData) throws IOException {
- super(name, factories, valuesSource, order, format, bucketCountThresholds, null, context, parent, collectionMode,
- showTermDocCountError, pipelineAggregators, metaData);
+ LowCardinality(String name,
+ AggregatorFactories factories,
+ ValuesSource.Bytes.WithOrdinals valuesSource,
+ BucketOrder order,
+ DocValueFormat format,
+ BucketCountThresholds bucketCountThresholds,
+ SearchContext context,
+ Aggregator parent,
+ boolean forceDenseMode,
+ SubAggCollectionMode collectionMode,
+ boolean showTermDocCountError,
+ List<PipelineAggregator> pipelineAggregators,
+ Map<String, Object> metaData) throws IOException {
+ super(name, factories, valuesSource, order, format, bucketCountThresholds, null,
+ context, parent, forceDenseMode, collectionMode, showTermDocCountError, pipelineAggregators, metaData);
assert factories == null || factories.countAggregators() == 0;
this.segmentDocCounts = context.bigArrays().newIntArray(1, true);
}
- // bucketOrd is ord + 1 to avoid a branch to deal with the missing ord
@Override
- protected LeafBucketCollector newCollector(final SortedSetDocValues ords,
- LeafBucketCollector sub) {
- segmentDocCounts = context.bigArrays().grow(segmentDocCounts, 1 + ords.getValueCount());
+ public LeafBucketCollector getLeafCollector(LeafReaderContext ctx,
+ final LeafBucketCollector sub) throws IOException {
+ if (segmentOrds != null) {
+ mapSegmentCountsToGlobalCounts();
+ }
+ globalOrds = valuesSource.globalOrdinalsValues(ctx);
+ segmentOrds = valuesSource.ordinalsValues(ctx);
+ segmentDocCounts = context.bigArrays().grow(segmentDocCounts, 1 + segmentOrds.getValueCount());
assert sub == LeafBucketCollector.NO_OP_COLLECTOR;
- final SortedDocValues singleValues = DocValues.unwrapSingleton(ords);
+ final SortedDocValues singleValues = DocValues.unwrapSingleton(segmentOrds);
if (singleValues != null) {
- return new LeafBucketCollectorBase(sub, ords) {
+ return new LeafBucketCollectorBase(sub, segmentOrds) {
@Override
public void collect(int doc, long bucket) throws IOException {
assert bucket == 0;
@@ -367,14 +352,12 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
}
};
} else {
- return new LeafBucketCollectorBase(sub, ords) {
+ return new LeafBucketCollectorBase(sub, segmentOrds) {
@Override
public void collect(int doc, long bucket) throws IOException {
assert bucket == 0;
- if (ords.advanceExact(doc)) {
- for (long segmentOrd = ords.nextOrd();
- segmentOrd != SortedSetDocValues.NO_MORE_ORDS;
- segmentOrd = ords.nextOrd()) {
+ if (segmentOrds.advanceExact(doc)) {
+ for (long segmentOrd = segmentOrds.nextOrd(); segmentOrd != NO_MORE_ORDS; segmentOrd = segmentOrds.nextOrd()) {
segmentDocCounts.increment(segmentOrd + 1, 1);
}
}
@@ -384,18 +367,6 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
}
@Override
- public LeafBucketCollector getLeafCollector(LeafReaderContext ctx,
- final LeafBucketCollector sub) throws IOException {
- if (segmentOrds != null) {
- mapSegmentCountsToGlobalCounts();
- }
-
- globalOrds = valuesSource.globalOrdinalsValues(ctx);
- segmentOrds = valuesSource.ordinalsValues(ctx);
- return newCollector(segmentOrds, sub);
- }
-
- @Override
protected void doPostCollection() {
if (segmentOrds != null) {
mapSegmentCountsToGlobalCounts();
@@ -426,7 +397,8 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
}
final long ord = i - 1; // remember we do +1 when counting
final long globalOrd = mapping == null ? ord : mapping.getGlobalOrd(ord);
- incrementBucketDocCount(globalOrd, inc);
+ long bucketOrd = getBucketOrd(globalOrd);
+ incrementBucketDocCount(bucketOrd, inc);
}
}
}
diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java
index 9a06dfe66f..ca2974a105 100644
--- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java
+++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java
@@ -53,10 +53,18 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory<Values
private final TermsAggregator.BucketCountThresholds bucketCountThresholds;
private boolean showTermDocCountError;
- public TermsAggregatorFactory(String name, ValuesSourceConfig<ValuesSource> config, BucketOrder order,
- IncludeExclude includeExclude, String executionHint, SubAggCollectionMode collectMode,
- TermsAggregator.BucketCountThresholds bucketCountThresholds, boolean showTermDocCountError, SearchContext context,
- AggregatorFactory<?> parent, AggregatorFactories.Builder subFactoriesBuilder, Map<String, Object> metaData) throws IOException {
+ public TermsAggregatorFactory(String name,
+ ValuesSourceConfig<ValuesSource> config,
+ BucketOrder order,
+ IncludeExclude includeExclude,
+ String executionHint,
+ SubAggCollectionMode collectMode,
+ TermsAggregator.BucketCountThresholds bucketCountThresholds,
+ boolean showTermDocCountError,
+ SearchContext context,
+ AggregatorFactory<?> parent,
+ AggregatorFactories.Builder subFactoriesBuilder,
+ Map<String, Object> metaData) throws IOException {
super(name, config, context, parent, subFactoriesBuilder, metaData);
this.order = order;
this.includeExclude = includeExclude;
@@ -225,14 +233,24 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory<Values
MAP(new ParseField("map")) {
@Override
- Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, BucketOrder order,
- DocValueFormat format, TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude,
- SearchContext context, Aggregator parent, SubAggCollectionMode subAggCollectMode,
- boolean showTermDocCountError, List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData)
- throws IOException {
+ Aggregator create(String name,
+ AggregatorFactories factories,
+ ValuesSource valuesSource,
+ BucketOrder order,
+ DocValueFormat format,
+ TermsAggregator.BucketCountThresholds bucketCountThresholds,
+ IncludeExclude includeExclude,
+ SearchContext context,
+ Aggregator parent,
+ SubAggCollectionMode subAggCollectMode,
+ boolean showTermDocCountError,
+ List<PipelineAggregator> pipelineAggregators,
+ Map<String, Object> metaData) throws IOException {
+
final IncludeExclude.StringFilter filter = includeExclude == null ? null : includeExclude.convertToStringFilter(format);
return new StringTermsAggregator(name, factories, valuesSource, order, format, bucketCountThresholds, filter,
context, parent, subAggCollectMode, showTermDocCountError, pipelineAggregators, metaData);
+
}
@Override
@@ -244,15 +262,24 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory<Values
GLOBAL_ORDINALS(new ParseField("global_ordinals")) {
@Override
- Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, BucketOrder order,
- DocValueFormat format, TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude,
- SearchContext context, Aggregator parent, SubAggCollectionMode subAggCollectMode,
- boolean showTermDocCountError, List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData)
- throws IOException {
+ Aggregator create(String name,
+ AggregatorFactories factories,
+ ValuesSource valuesSource,
+ BucketOrder order,
+ DocValueFormat format,
+ TermsAggregator.BucketCountThresholds bucketCountThresholds,
+ IncludeExclude includeExclude,
+ SearchContext context, Aggregator parent,
+ SubAggCollectionMode subAggCollectMode,
+ boolean showTermDocCountError,
+ List<PipelineAggregator> pipelineAggregators,
+ Map<String, Object> metaData) throws IOException {
+
final IncludeExclude.OrdinalsFilter filter = includeExclude == null ? null : includeExclude.convertToOrdinalsFilter(format);
return new GlobalOrdinalsStringTermsAggregator(name, factories, (ValuesSource.Bytes.WithOrdinals) valuesSource, order,
- format, bucketCountThresholds, filter, context, parent, subAggCollectMode, showTermDocCountError,
+ format, bucketCountThresholds, filter, context, parent, false, subAggCollectMode, showTermDocCountError,
pipelineAggregators, metaData);
+
}
@Override
@@ -264,15 +291,25 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory<Values
GLOBAL_ORDINALS_HASH(new ParseField("global_ordinals_hash")) {
@Override
- Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, BucketOrder order,
- DocValueFormat format, TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude,
- SearchContext context, Aggregator parent, SubAggCollectionMode subAggCollectMode,
- boolean showTermDocCountError, List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData)
- throws IOException {
+ Aggregator create(String name,
+ AggregatorFactories factories,
+ ValuesSource valuesSource,
+ BucketOrder order,
+ DocValueFormat format,
+ TermsAggregator.BucketCountThresholds bucketCountThresholds,
+ IncludeExclude includeExclude,
+ SearchContext context,
+ Aggregator parent,
+ SubAggCollectionMode subAggCollectMode,
+ boolean showTermDocCountError,
+ List<PipelineAggregator> pipelineAggregators,
+ Map<String, Object> metaData) throws IOException {
+
final IncludeExclude.OrdinalsFilter filter = includeExclude == null ? null : includeExclude.convertToOrdinalsFilter(format);
- return new GlobalOrdinalsStringTermsAggregator.WithHash(name, factories, (ValuesSource.Bytes.WithOrdinals) valuesSource,
- order, format, bucketCountThresholds, filter, context, parent, subAggCollectMode, showTermDocCountError,
- pipelineAggregators, metaData);
+ return new GlobalOrdinalsStringTermsAggregator(name, factories, (ValuesSource.Bytes.WithOrdinals) valuesSource,
+ order, format, bucketCountThresholds, filter, context, parent, true, subAggCollectMode,
+ showTermDocCountError, pipelineAggregators, metaData);
+
}
@Override
@@ -283,21 +320,31 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory<Values
GLOBAL_ORDINALS_LOW_CARDINALITY(new ParseField("global_ordinals_low_cardinality")) {
@Override
- Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, BucketOrder order,
- DocValueFormat format, TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude,
- SearchContext context, Aggregator parent, SubAggCollectionMode subAggCollectMode,
- boolean showTermDocCountError, List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData)
- throws IOException {
+ Aggregator create(String name,
+ AggregatorFactories factories,
+ ValuesSource valuesSource,
+ BucketOrder order,
+ DocValueFormat format,
+ TermsAggregator.BucketCountThresholds bucketCountThresholds,
+ IncludeExclude includeExclude,
+ SearchContext context,
+ Aggregator parent,
+ SubAggCollectionMode subAggCollectMode,
+ boolean showTermDocCountError,
+ List<PipelineAggregator> pipelineAggregators,
+ Map<String, Object> metaData) throws IOException {
+
if (includeExclude != null || factories.countAggregators() > 0
- // we need the FieldData impl to be able to extract the
- // segment to global ord mapping
+ // we need the FieldData impl to be able to extract the
+ // segment to global ord mapping
|| valuesSource.getClass() != ValuesSource.Bytes.FieldData.class) {
return GLOBAL_ORDINALS.create(name, factories, valuesSource, order, format, bucketCountThresholds, includeExclude,
context, parent, subAggCollectMode, showTermDocCountError, pipelineAggregators, metaData);
}
return new GlobalOrdinalsStringTermsAggregator.LowCardinality(name, factories,
(ValuesSource.Bytes.WithOrdinals) valuesSource, order, format, bucketCountThresholds, context, parent,
- subAggCollectMode, showTermDocCountError, pipelineAggregators, metaData);
+ false, subAggCollectMode, showTermDocCountError, pipelineAggregators, metaData);
+
}
@Override
@@ -321,11 +368,19 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory<Values
this.parseField = parseField;
}
- abstract Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, BucketOrder order,
- DocValueFormat format, TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude,
- SearchContext context, Aggregator parent, SubAggCollectionMode subAggCollectMode,
- boolean showTermDocCountError, List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData)
- throws IOException;
+ abstract Aggregator create(String name,
+ AggregatorFactories factories,
+ ValuesSource valuesSource,
+ BucketOrder order,
+ DocValueFormat format,
+ TermsAggregator.BucketCountThresholds bucketCountThresholds,
+ IncludeExclude includeExclude,
+ SearchContext context,
+ Aggregator parent,
+ SubAggCollectionMode subAggCollectMode,
+ boolean showTermDocCountError,
+ List<PipelineAggregator> pipelineAggregators,
+ Map<String, Object> metaData) throws IOException;
abstract boolean needsGlobalOrdinals();
diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/support/IncludeExclude.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/support/IncludeExclude.java
index 46e371a3df..dd0785b2d7 100644
--- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/support/IncludeExclude.java
+++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/support/IncludeExclude.java
@@ -233,16 +233,14 @@ public class IncludeExclude implements Writeable, ToXContent {
}
public abstract static class OrdinalsFilter {
- public abstract LongBitSet acceptedGlobalOrdinals(SortedSetDocValues globalOrdinals)
- throws IOException;
+ public abstract LongBitSet acceptedGlobalOrdinals(SortedSetDocValues globalOrdinals) throws IOException;
}
class PartitionedOrdinalsFilter extends OrdinalsFilter {
@Override
- public LongBitSet acceptedGlobalOrdinals(SortedSetDocValues globalOrdinals)
- throws IOException {
+ public LongBitSet acceptedGlobalOrdinals(SortedSetDocValues globalOrdinals) throws IOException {
final long numOrds = globalOrdinals.getValueCount();
final LongBitSet acceptedGlobalOrdinals = new LongBitSet(numOrds);
final TermsEnum termEnum = globalOrdinals.termsEnum();
@@ -271,8 +269,7 @@ public class IncludeExclude implements Writeable, ToXContent {
*
*/
@Override
- public LongBitSet acceptedGlobalOrdinals(SortedSetDocValues globalOrdinals)
- throws IOException {
+ public LongBitSet acceptedGlobalOrdinals(SortedSetDocValues globalOrdinals) throws IOException {
LongBitSet acceptedGlobalOrdinals = new LongBitSet(globalOrdinals.getValueCount());
TermsEnum globalTermsEnum;
Terms globalTerms = new DocValuesTerms(globalOrdinals);
@@ -297,8 +294,7 @@ public class IncludeExclude implements Writeable, ToXContent {
}
@Override
- public LongBitSet acceptedGlobalOrdinals(SortedSetDocValues globalOrdinals)
- throws IOException {
+ public LongBitSet acceptedGlobalOrdinals(SortedSetDocValues globalOrdinals) throws IOException {
LongBitSet acceptedGlobalOrdinals = new LongBitSet(globalOrdinals.getValueCount());
if (includeValues != null) {
for (BytesRef term : includeValues) {