aboutsummaryrefslogtreecommitdiff
path: root/exec/vector
diff options
context:
space:
mode:
authorSorabh Hamirwasia <shamirwasia@maprtech.com>2018-05-15 14:27:31 -0700
committerArina Ielchiieva <arina.yelchiyeva@gmail.com>2018-05-19 22:33:01 +0300
commitb7d259ba9c8c2b28700c9da33bb97dd79ef04cbc (patch)
treeda0304ed423aa4485eb22d7c8467ab974f319441 /exec/vector
parentdc1db98f613f041e40c2c704862051ee2a51734a (diff)
DRILL-6418: Handle Schema change in Unnest And Lateral for unnest field / non-unnest field
Note: Changed Lateral to handle non-empty right batch with OK_NEW_SCHEMA closes #1271
Diffstat (limited to 'exec/vector')
-rw-r--r--exec/vector/src/main/java/org/apache/drill/exec/record/MaterializedField.java43
-rw-r--r--exec/vector/src/main/java/org/apache/drill/exec/vector/complex/AbstractMapVector.java3
2 files changed, 38 insertions, 8 deletions
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/record/MaterializedField.java b/exec/vector/src/main/java/org/apache/drill/exec/record/MaterializedField.java
index fa4d2767e..672bb7e0c 100644
--- a/exec/vector/src/main/java/org/apache/drill/exec/record/MaterializedField.java
+++ b/exec/vector/src/main/java/org/apache/drill/exec/record/MaterializedField.java
@@ -111,7 +111,7 @@ public class MaterializedField {
* <p>
* By allowing the non-critical metadata to change, we preserve the
* child relationships as a list or union evolves.
- * @param type
+ * @param newType
*/
public void replaceType(MajorType newType) {
@@ -190,11 +190,20 @@ public class MaterializedField {
return Objects.hash(this.name, this.type, this.children);
}
+ /**
+ * Equals method doesn't check for the children list of fields here. When a batch is sent over network then it is
+ * serialized along with the Materialized Field which also contains information about the internal vectors like
+ * offset and bits. While deserializing, these vectors are treated as children of parent vector. If a operator on
+ * receiver side like Sort receives a schema in buildSchema phase and then later on receives another batch, that
+ * will result in schema change and query will fail. This is because second batch schema will contain information
+ * about internal vectors like offset and bits which will not be present in first batch schema. For ref: See
+ * TestSort#testSortWithRepeatedMapWithExchanges
+ *
+ * @param obj
+ * @return
+ */
@Override
public boolean equals(Object obj) {
- if (this == obj) {
- return true;
- }
if (obj == null) {
return false;
}
@@ -206,7 +215,7 @@ public class MaterializedField {
// in MapVector$MapTransferPair
return this.name.equalsIgnoreCase(other.name) &&
- Objects.equals(this.type, other.type);
+ Objects.equals(this.type, other.type);
}
/**
@@ -230,6 +239,27 @@ public class MaterializedField {
* sense.) Operators that want to reconcile two maps that differ only in
* column order need a different comparison.</li>
* </ul>
+ * <ul>
+ * Note: Materialized Field and ValueVector has 1:1 mapping which means for each ValueVector there is a materialized
+ * field associated with it. So when we replace or add a ValueVector in a VectorContainer then we create new
+ * Materialized Field object for the new vector. This works fine for Primitive type ValueVectors but for ValueVector
+ * which are of type {@link org.apache.drill.exec.vector.complex.AbstractContainerVector} there is some differences on
+ * how Materialized field and ValueVector objects are updated inside the container which both ValueVector and
+ * Materialized Field object both mutable.
+ * <p>
+ * For example: For cases of MapVector it can so happen that only the children field type changed but
+ * the parent Map type and name remained same. In these cases we replace the children field ValueVector from parent
+ * MapVector inside main batch container, with new type of vector. Thus the reference of parent MaprVector inside
+ * batch container remains same but the reference of children field ValueVector stored inside MapVector get's updated.
+ * During this update it also replaces the Materialized field for that children field which is stored in childrens
+ * list of the parent MapVector Materialized Field.
+ * Since the children list of parent Materialized Field is updated, this make this class mutable. Hence there should
+ * not be any check for object reference equality here but instead there should be deep comparison which is what
+ * this method is now performing. Since if we have object reference check then in above cases it will return true for
+ * 2 Materialized Field object whose children field list is different which is not correct. Same holds true for
+ * {@link MaterializedField#isEquivalent(MaterializedField)} method.
+ * </p>
+ * </ul>
*
* @param other another field
* @return <tt>true</tt> if the columns are identical according to the
@@ -237,9 +267,6 @@ public class MaterializedField {
*/
public boolean isEquivalent(MaterializedField other) {
- if (this == other) {
- return true;
- }
if (! name.equalsIgnoreCase(other.name)) {
return false;
}
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/AbstractMapVector.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/AbstractMapVector.java
index 36823970d..1d0e03be3 100644
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/AbstractMapVector.java
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/AbstractMapVector.java
@@ -129,6 +129,9 @@ public abstract class AbstractMapVector extends AbstractContainerVector {
return (T) existing;
} else if (nullFilled(existing)) {
existing.clear();
+ // Since it's removing old vector and adding new one based on new type, it should do same for Materialized field,
+ // Otherwise there will be duplicate of same field with same name but different type.
+ field.removeChild(existing.getField());
create = true;
}
if (create) {