Added tests for toXContent and fromXContent for IncludeExclude class.

New REST test revealed an issue with inconsistent hashing in partitioned term tests which is also fixed in this change. Closes #22102
author: markharwood <markharwood@gmail.com> 2016-12-12 09:52:15 +0000
committer: markharwood <markharwood@gmail.com> 2016-12-13 15:23:09 +0000
commit: 4c6d17a176014fad1bdbe5ed04968455f9435094 (patch)
tree: 237c8056fcc1b17daaf82cd585d3581dde8ba8bd /core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/support/IncludeExclude.java
parent: 28397c9594fba75af3104525fcdd4a17281e5e83 (diff)
1 files changed, 7 insertions, 3 deletions
diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/support/IncludeExclude.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/support/IncludeExclude.java
index ea4797780c..725a8c437c 100644
--- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/support/IncludeExclude.java
+++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/support/IncludeExclude.java
@@ -29,6 +29,7 @@ import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.LongBitSet;
 import org.apache.lucene.util.NumericUtils;
+import org.apache.lucene.util.StringHelper;
 import org.apache.lucene.util.automaton.Automata;
 import org.apache.lucene.util.automaton.Automaton;
 import org.apache.lucene.util.automaton.ByteRunAutomaton;
@@ -49,7 +50,6 @@ import org.elasticsearch.index.query.QueryParseContext;
 import org.elasticsearch.search.DocValueFormat;
 
 import java.io.IOException;
-import java.nio.ByteBuffer;
 import java.util.HashSet;
 import java.util.Objects;
 import java.util.Set;
@@ -66,6 +66,10 @@ public class IncludeExclude implements Writeable, ToXContent {
     public static final ParseField PATTERN_FIELD = new ParseField("pattern");
     public static final ParseField PARTITION_FIELD = new ParseField("partition");
     public static final ParseField NUM_PARTITIONS_FIELD = new ParseField("num_partitions");
+    // Needed to add this seed for a deterministic term hashing policy
+    // otherwise tests fail to get expected results and worse, shards 
+    // can disagree on which terms hash to the required partition. 
+    private static final int HASH_PARTITIONING_SEED = 31;
 
     // for parsing purposes only
     // TODO: move all aggs to the same package so that this stuff could be pkg-private
@@ -196,7 +200,7 @@ public class IncludeExclude implements Writeable, ToXContent {
     class PartitionedStringFilter extends StringFilter {
         @Override
         public boolean accept(BytesRef value) {
-            return Math.floorMod(value.hashCode(), incNumPartitions) == incZeroBasedPartition;
+            return Math.floorMod(StringHelper.murmurhash3_x86_32(value, HASH_PARTITIONING_SEED), incNumPartitions) == incZeroBasedPartition;
         }
     }
 
@@ -252,7 +256,7 @@ public class IncludeExclude implements Writeable, ToXContent {
 
             BytesRef term = termEnum.next();
             while (term != null) {
-                if (Math.floorMod(term.hashCode(), incNumPartitions) == incZeroBasedPartition) {
+                if (Math.floorMod(StringHelper.murmurhash3_x86_32(term, HASH_PARTITIONING_SEED), incNumPartitions) == incZeroBasedPartition) {
                     acceptedGlobalOrdinals.set(termEnum.ord());
                 }
                 term = termEnum.next();
author	markharwood <markharwood@gmail.com>	2016-12-12 09:52:15 +0000
committer	markharwood <markharwood@gmail.com>	2016-12-13 15:23:09 +0000
commit	4c6d17a176014fad1bdbe5ed04968455f9435094 (patch)
tree	237c8056fcc1b17daaf82cd585d3581dde8ba8bd /core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/support/IncludeExclude.java
parent	28397c9594fba75af3104525fcdd4a17281e5e83 (diff)