aboutsummaryrefslogtreecommitdiff
path: root/contrib/storage-jdbc/src
diff options
context:
space:
mode:
authorGautam Parai <gparai@maprtech.com>2014-08-21 14:59:53 -0700
committerGautam Parai <gparai@maprtech.com>2019-02-28 12:01:24 -0800
commit469be17597e7b7c6bc1de9863dcb6c5604a55f0c (patch)
tree76a1c2572cfb19a75a0f82e6d165db333797fe3b /contrib/storage-jdbc/src
parent3233d8aaff57ac71bd3b726efcd5fdaa92aef861 (diff)
DRILL-1328: Support table statistics - Part 2
Add support for avg row-width and major type statistics. Parallelize the ANALYZE implementation and stats UDF implementation to improve stats collection performance. Update/fix rowcount, selectivity and ndv computations to improve plan costing. Add options for configuring collection/usage of statistics. Add new APIs and implementation for stats writer (as a precursor to Drill Metastore APIs). Fix several stats/costing related issues identified while running TPC-H nad TPC-DS queries. Add support for CPU sampling and nested scalar columns. Add more testcases for collection and usage of statistics and fix remaining unit/functional test failures. Thanks to Venki Korukanti (@vkorukanti) for the description below (modified to account for new changes). He graciously agreed to rebase the patch to latest master, fixed few issues and added few tests. FUNCS: Statistics functions as UDFs: Separate Currently using FieldReader to ensure consistent output type so that Unpivot doesn't get confused. All stats columns should be Nullable, so that stats functions can return NULL when N/A. * custom versions of "count" that always return BigInt * HyperLogLog based NDV that returns BigInt that works only on VarChars * HyperLogLog with binary output that only works on VarChars OPS: Updated protobufs for new ops OPS: Implemented StatisticsMerge OPS: Implemented StatisticsUnpivot ANALYZE: AnalyzeTable functionality * JavaCC syntax more-or-less copied from LucidDB. * (Basic) AnalyzePrule: DrillAnalyzeRel -> UnpivotPrel StatsMergePrel FilterPrel(for sampling) StatsAggPrel ScanPrel ANALYZE: Add getMetadataTable() to AbstractSchema USAGE: Change field access in QueryWrapper USAGE: Add getDrillTable() to DrillScanRelBase and ScanPrel * since ScanPrel does not inherit from DrillScanRelBase, this requires adding a DrillTable to the constructor * This is done so that a custom ReflectiveRelMetadataProvider can access the DrillTable associated with Logical/Physical scans. USAGE: Attach DrillStatsTable to DrillTable. * DrillStatsTable represents the data scanned from a corresponding ".stats.drill" table * In order to avoid doing query execution right after the ".stats.drill" table is found, metadata is not actually collected until the MaterializationVisitor is used. ** Currently, the metadata source must be a string (so that a SQL query can be created). Doing this with a table is probably more complicated. ** Query is set up to extract only the most recent statistics results for each column. closes #729
Diffstat (limited to 'contrib/storage-jdbc/src')
-rw-r--r--contrib/storage-jdbc/src/main/java/org/apache/drill/exec/store/jdbc/JdbcGroupScan.java9
-rw-r--r--contrib/storage-jdbc/src/main/java/org/apache/drill/exec/store/jdbc/JdbcPrel.java9
-rwxr-xr-xcontrib/storage-jdbc/src/main/java/org/apache/drill/exec/store/jdbc/JdbcRecordReader.java7
-rwxr-xr-xcontrib/storage-jdbc/src/main/java/org/apache/drill/exec/store/jdbc/JdbcSubScan.java9
4 files changed, 22 insertions, 12 deletions
diff --git a/contrib/storage-jdbc/src/main/java/org/apache/drill/exec/store/jdbc/JdbcGroupScan.java b/contrib/storage-jdbc/src/main/java/org/apache/drill/exec/store/jdbc/JdbcGroupScan.java
index a98193939..199d922ba 100644
--- a/contrib/storage-jdbc/src/main/java/org/apache/drill/exec/store/jdbc/JdbcGroupScan.java
+++ b/contrib/storage-jdbc/src/main/java/org/apache/drill/exec/store/jdbc/JdbcGroupScan.java
@@ -20,6 +20,7 @@ package org.apache.drill.exec.store.jdbc;
import java.util.List;
import org.apache.drill.common.exceptions.ExecutionSetupException;
+import org.apache.drill.common.expression.SchemaPath;
import org.apache.drill.common.logical.StoragePluginConfig;
import org.apache.drill.exec.physical.base.AbstractGroupScan;
import org.apache.drill.exec.physical.base.PhysicalOperator;
@@ -38,14 +39,14 @@ import com.fasterxml.jackson.annotation.JsonTypeName;
public class JdbcGroupScan extends AbstractGroupScan {
private final String sql;
- private final List<String> columns;
+ private final List<SchemaPath> columns;
private final JdbcStoragePlugin plugin;
private final double rows;
@JsonCreator
public JdbcGroupScan(
@JsonProperty("sql") String sql,
- @JsonProperty("columns") List<String> columns,
+ @JsonProperty("columns") List<SchemaPath> columns,
@JsonProperty("config") StoragePluginConfig config,
@JsonProperty("rows") double rows,
@JacksonInject StoragePluginRegistry plugins) throws ExecutionSetupException {
@@ -56,7 +57,7 @@ public class JdbcGroupScan extends AbstractGroupScan {
this.rows = rows;
}
- JdbcGroupScan(String sql, List<String> columns, JdbcStoragePlugin plugin, double rows) {
+ JdbcGroupScan(String sql, List<SchemaPath> columns, JdbcStoragePlugin plugin, double rows) {
super("");
this.sql = sql;
this.columns = columns;
@@ -91,7 +92,7 @@ public class JdbcGroupScan extends AbstractGroupScan {
return sql;
}
- public List<String> getColumns() {
+ public List<SchemaPath> getColumns() {
return columns;
}
diff --git a/contrib/storage-jdbc/src/main/java/org/apache/drill/exec/store/jdbc/JdbcPrel.java b/contrib/storage-jdbc/src/main/java/org/apache/drill/exec/store/jdbc/JdbcPrel.java
index b8229402b..85f88a872 100644
--- a/contrib/storage-jdbc/src/main/java/org/apache/drill/exec/store/jdbc/JdbcPrel.java
+++ b/contrib/storage-jdbc/src/main/java/org/apache/drill/exec/store/jdbc/JdbcPrel.java
@@ -17,9 +17,11 @@
*/
package org.apache.drill.exec.store.jdbc;
+import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
+import java.util.List;
import org.apache.calcite.adapter.java.JavaTypeFactory;
import org.apache.calcite.adapter.jdbc.JdbcImplementor;
import org.apache.calcite.plan.ConventionTraitDef;
@@ -32,6 +34,7 @@ import org.apache.calcite.rel.RelShuttleImpl;
import org.apache.calcite.rel.RelWriter;
import org.apache.calcite.rel.metadata.RelMetadataQuery;
import org.apache.calcite.sql.SqlDialect;
+import org.apache.drill.common.expression.SchemaPath;
import org.apache.drill.exec.physical.base.PhysicalOperator;
import org.apache.drill.exec.planner.physical.PhysicalPlanCreator;
import org.apache.drill.exec.planner.physical.Prel;
@@ -91,7 +94,11 @@ public class JdbcPrel extends AbstractRelNode implements Prel {
@Override
public PhysicalOperator getPhysicalOperator(PhysicalPlanCreator creator) {
- JdbcGroupScan output = new JdbcGroupScan(sql, rowType.getFieldNames(), convention.getPlugin(), rows);
+ List<SchemaPath> columns = new ArrayList<>();
+ for (String col : rowType.getFieldNames()) {
+ columns.add(SchemaPath.getSimplePath(col));
+ }
+ JdbcGroupScan output = new JdbcGroupScan(sql, columns, convention.getPlugin(), rows);
return creator.addMetadata(this, output);
}
diff --git a/contrib/storage-jdbc/src/main/java/org/apache/drill/exec/store/jdbc/JdbcRecordReader.java b/contrib/storage-jdbc/src/main/java/org/apache/drill/exec/store/jdbc/JdbcRecordReader.java
index 011c9bc58..5c6def26a 100755
--- a/contrib/storage-jdbc/src/main/java/org/apache/drill/exec/store/jdbc/JdbcRecordReader.java
+++ b/contrib/storage-jdbc/src/main/java/org/apache/drill/exec/store/jdbc/JdbcRecordReader.java
@@ -35,6 +35,7 @@ import javax.sql.DataSource;
import org.apache.drill.common.AutoCloseables;
import org.apache.drill.common.exceptions.UserException;
+import org.apache.drill.common.expression.SchemaPath;
import org.apache.drill.common.types.TypeProtos;
import org.apache.drill.common.types.TypeProtos.MajorType;
import org.apache.drill.common.types.TypeProtos.MinorType;
@@ -75,9 +76,9 @@ class JdbcRecordReader extends AbstractRecordReader {
private final String sql;
private ImmutableList<ValueVector> vectors;
private ImmutableList<Copier<?>> copiers;
- private final List<String> columns;
+ private final List<SchemaPath> columns;
- public JdbcRecordReader(DataSource source, String sql, String storagePluginName, List<String> columns) {
+ public JdbcRecordReader(DataSource source, String sql, String storagePluginName, List<SchemaPath> columns) {
this.source = source;
this.sql = sql;
this.storagePluginName = storagePluginName;
@@ -206,7 +207,7 @@ class JdbcRecordReader extends AbstractRecordReader {
ImmutableList.Builder<Copier<?>> copierBuilder = ImmutableList.builder();
for (int i = 1; i <= columnsCount; i++) {
- String name = columns.get(i - 1);
+ String name = columns.get(i - 1).getRootSegmentPath();
// column index in ResultSetMetaData starts from 1
int jdbcType = meta.getColumnType(i);
int width = meta.getPrecision(i);
diff --git a/contrib/storage-jdbc/src/main/java/org/apache/drill/exec/store/jdbc/JdbcSubScan.java b/contrib/storage-jdbc/src/main/java/org/apache/drill/exec/store/jdbc/JdbcSubScan.java
index 9bc6de891..c9d5f0daf 100755
--- a/contrib/storage-jdbc/src/main/java/org/apache/drill/exec/store/jdbc/JdbcSubScan.java
+++ b/contrib/storage-jdbc/src/main/java/org/apache/drill/exec/store/jdbc/JdbcSubScan.java
@@ -18,6 +18,7 @@
package org.apache.drill.exec.store.jdbc;
import org.apache.drill.common.exceptions.ExecutionSetupException;
+import org.apache.drill.common.expression.SchemaPath;
import org.apache.drill.common.logical.StoragePluginConfig;
import org.apache.drill.exec.physical.base.AbstractSubScan;
import org.apache.drill.exec.proto.beans.CoreOperatorType;
@@ -36,12 +37,12 @@ public class JdbcSubScan extends AbstractSubScan {
private final String sql;
private final JdbcStoragePlugin plugin;
- private final List<String> columns;
+ private final List<SchemaPath> columns;
@JsonCreator
public JdbcSubScan(
@JsonProperty("sql") String sql,
- @JsonProperty("columns") List<String> columns,
+ @JsonProperty("columns") List<SchemaPath> columns,
@JsonProperty("config") StoragePluginConfig config,
@JacksonInject StoragePluginRegistry plugins) throws ExecutionSetupException {
super("");
@@ -50,7 +51,7 @@ public class JdbcSubScan extends AbstractSubScan {
this.plugin = (JdbcStoragePlugin) plugins.getPlugin(config);
}
- JdbcSubScan(String sql, List<String> columns, JdbcStoragePlugin plugin) {
+ JdbcSubScan(String sql, List<SchemaPath> columns, JdbcStoragePlugin plugin) {
super("");
this.sql = sql;
this.columns = columns;
@@ -66,7 +67,7 @@ public class JdbcSubScan extends AbstractSubScan {
return sql;
}
- public List<String> getColumns() {
+ public List<SchemaPath> getColumns() {
return columns;
}