DRILL-6418: Handle Schema change in Unnest And Lateral for unnest field / non-unnest field

Note: Changed Lateral to handle non-empty right batch with OK_NEW_SCHEMA closes #1271
author: Sorabh Hamirwasia <shamirwasia@maprtech.com> 2018-05-15 14:27:31 -0700
committer: Arina Ielchiieva <arina.yelchiyeva@gmail.com> 2018-05-19 22:33:01 +0300
commit: b7d259ba9c8c2b28700c9da33bb97dd79ef04cbc (patch)
tree: da0304ed423aa4485eb22d7c8467ab974f319441 /exec/vector
parent: dc1db98f613f041e40c2c704862051ee2a51734a (diff)
2 files changed, 38 insertions, 8 deletions
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/record/MaterializedField.java b/exec/vector/src/main/java/org/apache/drill/exec/record/MaterializedField.java
index fa4d2767e..672bb7e0c 100644
--- a/exec/vector/src/main/java/org/apache/drill/exec/record/MaterializedField.java
+++ b/exec/vector/src/main/java/org/apache/drill/exec/record/MaterializedField.java
@@ -111,7 +111,7 @@ public class MaterializedField {
    * <p>
    * By allowing the non-critical metadata to change, we preserve the
    * child relationships as a list or union evolves.
-   * @param type
+   * @param newType
    */
 
   public void replaceType(MajorType newType) {
@@ -190,11 +190,20 @@ public class MaterializedField {
     return Objects.hash(this.name, this.type, this.children);
   }
 
+  /**
+   * Equals method doesn't check for the children list of fields here. When a batch is sent over network then it is
+   * serialized along with the Materialized Field which also contains information about the internal vectors like
+   * offset and bits. While deserializing, these vectors are treated as children of parent vector. If a operator on
+   * receiver side like Sort receives a schema in buildSchema phase and then later on receives another batch, that
+   * will result in schema change and query will fail. This is because second batch schema will contain information
+   * about internal vectors like offset and bits which will not be present in first batch schema. For ref: See
+   * TestSort#testSortWithRepeatedMapWithExchanges
+   *
+   * @param obj
+   * @return
+   */
   @Override
   public boolean equals(Object obj) {
-    if (this == obj) {
-      return true;
-    }
     if (obj == null) {
       return false;
     }
@@ -206,7 +215,7 @@ public class MaterializedField {
     // in MapVector$MapTransferPair
 
     return this.name.equalsIgnoreCase(other.name) &&
-            Objects.equals(this.type, other.type);
+      Objects.equals(this.type, other.type);
   }
 
   /**
@@ -230,6 +239,27 @@ public class MaterializedField {
    * sense.) Operators that want to reconcile two maps that differ only in
    * column order need a different comparison.</li>
    * </ul>
+   * <ul>
+   * Note: Materialized Field and ValueVector has 1:1 mapping which means for each ValueVector there is a materialized
+   * field associated with it. So when we replace or add a ValueVector in a VectorContainer then we create new
+   * Materialized Field object for the new vector. This works fine for Primitive type ValueVectors but for ValueVector
+   * which are of type {@link org.apache.drill.exec.vector.complex.AbstractContainerVector} there is some differences on
+   * how Materialized field and ValueVector objects are updated inside the container which both ValueVector and
+   * Materialized Field object both mutable.
+   * <p>
+   * For example: For cases of MapVector it can so happen that only the children field type changed but
+   * the parent Map type and name remained same. In these cases we replace the children field ValueVector from parent
+   * MapVector inside main batch container, with new type of vector. Thus the reference of parent MaprVector inside
+   * batch container remains same but the reference of children field ValueVector stored inside MapVector get's updated.
+   * During this update it also replaces the Materialized field for that children field which is stored in childrens
+   * list of the parent MapVector Materialized Field.
+   * Since the children list of parent Materialized Field is updated, this make this class mutable. Hence there should
+   * not be any check for object reference equality here but instead there should be deep comparison which is what
+   * this method is now performing. Since if we have object reference check then in above cases it will return true for
+   * 2 Materialized Field object whose children field list is different which is not correct. Same holds true for
+   * {@link MaterializedField#isEquivalent(MaterializedField)} method.
+   * </p>
+   * </ul>
    *
    * @param other another field
    * @return <tt>true</tt> if the columns are identical according to the
@@ -237,9 +267,6 @@ public class MaterializedField {
    */
 
   public boolean isEquivalent(MaterializedField other) {
-    if (this == other) {
-      return true;
-    }
     if (! name.equalsIgnoreCase(other.name)) {
       return false;
     }
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/AbstractMapVector.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/AbstractMapVector.java
index 36823970d..1d0e03be3 100644
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/AbstractMapVector.java
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/AbstractMapVector.java
@@ -129,6 +129,9 @@ public abstract class AbstractMapVector extends AbstractContainerVector {
       return (T) existing;
     } else if (nullFilled(existing)) {
       existing.clear();
+      // Since it's removing old vector and adding new one based on new type, it should do same for Materialized field,
+      // Otherwise there will be duplicate of same field with same name but different type.
+      field.removeChild(existing.getField());
       create = true;
     }
     if (create) {
author	Sorabh Hamirwasia <shamirwasia@maprtech.com>	2018-05-15 14:27:31 -0700
committer	Arina Ielchiieva <arina.yelchiyeva@gmail.com>	2018-05-19 22:33:01 +0300
commit	b7d259ba9c8c2b28700c9da33bb97dd79ef04cbc (patch)
tree	da0304ed423aa4485eb22d7c8467ab974f319441 /exec/vector
parent	dc1db98f613f041e40c2c704862051ee2a51734a (diff)