aboutsummaryrefslogtreecommitdiff
path: root/common
diff options
context:
space:
mode:
authorJason Altekruse <altekrusejason@gmail.com>2014-08-05 15:44:42 -0700
committerJacques Nadeau <jacques@apache.org>2014-08-27 18:38:32 -0700
commit929d765afd9da2fb0010a97e90b2ee19f245e37c (patch)
treea55d153a3aec9f9ca98f62330a2ef1c27cd2e64a /common
parentcc25504d1bfeb7ac7bc99a6c3ce5285d30e13697 (diff)
DRILL-1283: JSON project pushdown.
Allows for users to avoid reading columns of a JSON file, including those that include elements of JSON that drill does not currently support. This can be used to query a subset of an existing file while avoiding elements like schema changes in some columns or nulls in lists that are currently not compatible with Drill. Patch was revised based on Hanifi's review comments, and then rebased off of the merge branch.
Diffstat (limited to 'common')
-rw-r--r--common/src/main/java/org/apache/drill/common/expression/PathSegment.java42
-rw-r--r--common/src/main/java/org/apache/drill/common/expression/SchemaPath.java16
2 files changed, 58 insertions, 0 deletions
diff --git a/common/src/main/java/org/apache/drill/common/expression/PathSegment.java b/common/src/main/java/org/apache/drill/common/expression/PathSegment.java
index 0ecfcd05e..c434dc74c 100644
--- a/common/src/main/java/org/apache/drill/common/expression/PathSegment.java
+++ b/common/src/main/java/org/apache/drill/common/expression/PathSegment.java
@@ -239,4 +239,46 @@ public abstract class PathSegment{
} else return child.equals(other.child);
}
+ /**
+ * Check if another path is contained in this one. This is useful for 2 cases. The first
+ * is checking if the other is lower down in the tree, below this path. The other is if
+ * a path is actually contained above the current one.
+ *
+ * Examples:
+ * [a] . contains( [a.b.c] ) returns true
+ * [a.b.c] . contains( [a] ) returns true
+ *
+ * This behavior is used for cases like scanning json in an event based fashion, when we arrive at
+ * a node in a complex type, we will know the complete path back to the root. This method can
+ * be used to determine if we need the data below. This is true in both the cases where the
+ * column requested from the user is below the current node (in which case we may ignore other nodes
+ * further down the tree, while keeping others). This is also the case if the requested path is further
+ * up the tree, if we know we are at position a.b.c and a.b was a requested column, we need to scan
+ * all of the data at and below the current a.b.c node.
+ *
+ * @param otherSeg - path segment to check if it is contained below this one.
+ * @return - is this a match
+ */
+ public boolean contains(PathSegment otherSeg) {
+ if (this == otherSeg)
+ return true;
+ if (otherSeg == null)
+ return false;
+ // TODO - fix this in the future to match array segments are part of the path
+ // the current behavior to always return true when we hit an array may be useful in some cases,
+ // but we can get better performance in the JSON reader if we avoid reading unwanted elements in arrays
+ if (otherSeg.isArray() || this.isArray())
+ return true;
+ if (getClass() != otherSeg.getClass())
+ return false;
+
+ if (!segmentEquals(otherSeg)) {
+ return false;
+ }
+ else if (child == null || otherSeg.child == null) {
+ return true;
+ } else return child.contains(otherSeg.child);
+
+ }
+
}
diff --git a/common/src/main/java/org/apache/drill/common/expression/SchemaPath.java b/common/src/main/java/org/apache/drill/common/expression/SchemaPath.java
index 25ee8b433..9f444e410 100644
--- a/common/src/main/java/org/apache/drill/common/expression/SchemaPath.java
+++ b/common/src/main/java/org/apache/drill/common/expression/SchemaPath.java
@@ -199,6 +199,22 @@ public class SchemaPath extends LogicalExpressionBase {
return rootSegment.equals(other.rootSegment);
}
+ public boolean contains(Object obj) {
+ if (this == obj)
+ return true;
+ if (obj == null)
+ return false;
+ if (!(obj instanceof SchemaPath))
+ return false;
+
+ SchemaPath other = (SchemaPath) obj;
+ if (rootSegment == null) {
+ return true;
+ }
+ return rootSegment.contains(other.rootSegment);
+
+ }
+
@Override
public Iterator<LogicalExpression> iterator() {
return Iterators.emptyIterator();