diff options
author | Gautam Parai <gparai@maprtech.com> | 2014-08-21 14:59:53 -0700 |
---|---|---|
committer | Gautam Parai <gparai@maprtech.com> | 2019-02-28 12:01:24 -0800 |
commit | 469be17597e7b7c6bc1de9863dcb6c5604a55f0c (patch) | |
tree | 76a1c2572cfb19a75a0f82e6d165db333797fe3b /contrib/storage-jdbc/src | |
parent | 3233d8aaff57ac71bd3b726efcd5fdaa92aef861 (diff) |
DRILL-1328: Support table statistics - Part 2
Add support for avg row-width and major type statistics.
Parallelize the ANALYZE implementation and stats UDF implementation to improve stats collection performance.
Update/fix rowcount, selectivity and ndv computations to improve plan costing.
Add options for configuring collection/usage of statistics.
Add new APIs and implementation for stats writer (as a precursor to Drill Metastore APIs).
Fix several stats/costing related issues identified while running TPC-H nad TPC-DS queries.
Add support for CPU sampling and nested scalar columns.
Add more testcases for collection and usage of statistics and fix remaining unit/functional test failures.
Thanks to Venki Korukanti (@vkorukanti) for the description below (modified to account for new changes). He graciously agreed to rebase the patch to latest master, fixed few issues and added few tests.
FUNCS: Statistics functions as UDFs:
Separate
Currently using FieldReader to ensure consistent output type so that Unpivot doesn't get confused. All stats columns should be Nullable, so that stats functions can return NULL when N/A.
* custom versions of "count" that always return BigInt
* HyperLogLog based NDV that returns BigInt that works only on VarChars
* HyperLogLog with binary output that only works on VarChars
OPS: Updated protobufs for new ops
OPS: Implemented StatisticsMerge
OPS: Implemented StatisticsUnpivot
ANALYZE: AnalyzeTable functionality
* JavaCC syntax more-or-less copied from LucidDB.
* (Basic) AnalyzePrule: DrillAnalyzeRel -> UnpivotPrel StatsMergePrel FilterPrel(for sampling) StatsAggPrel ScanPrel
ANALYZE: Add getMetadataTable() to AbstractSchema
USAGE: Change field access in QueryWrapper
USAGE: Add getDrillTable() to DrillScanRelBase and ScanPrel
* since ScanPrel does not inherit from DrillScanRelBase, this requires adding a DrillTable to the constructor
* This is done so that a custom ReflectiveRelMetadataProvider can access the DrillTable associated with Logical/Physical scans.
USAGE: Attach DrillStatsTable to DrillTable.
* DrillStatsTable represents the data scanned from a corresponding ".stats.drill" table
* In order to avoid doing query execution right after the ".stats.drill" table is found, metadata is not actually collected until the MaterializationVisitor is used.
** Currently, the metadata source must be a string (so that a SQL query can be created). Doing this with a table is probably more complicated.
** Query is set up to extract only the most recent statistics results for each column.
closes #729
Diffstat (limited to 'contrib/storage-jdbc/src')
4 files changed, 22 insertions, 12 deletions
diff --git a/contrib/storage-jdbc/src/main/java/org/apache/drill/exec/store/jdbc/JdbcGroupScan.java b/contrib/storage-jdbc/src/main/java/org/apache/drill/exec/store/jdbc/JdbcGroupScan.java index a98193939..199d922ba 100644 --- a/contrib/storage-jdbc/src/main/java/org/apache/drill/exec/store/jdbc/JdbcGroupScan.java +++ b/contrib/storage-jdbc/src/main/java/org/apache/drill/exec/store/jdbc/JdbcGroupScan.java @@ -20,6 +20,7 @@ package org.apache.drill.exec.store.jdbc; import java.util.List; import org.apache.drill.common.exceptions.ExecutionSetupException; +import org.apache.drill.common.expression.SchemaPath; import org.apache.drill.common.logical.StoragePluginConfig; import org.apache.drill.exec.physical.base.AbstractGroupScan; import org.apache.drill.exec.physical.base.PhysicalOperator; @@ -38,14 +39,14 @@ import com.fasterxml.jackson.annotation.JsonTypeName; public class JdbcGroupScan extends AbstractGroupScan { private final String sql; - private final List<String> columns; + private final List<SchemaPath> columns; private final JdbcStoragePlugin plugin; private final double rows; @JsonCreator public JdbcGroupScan( @JsonProperty("sql") String sql, - @JsonProperty("columns") List<String> columns, + @JsonProperty("columns") List<SchemaPath> columns, @JsonProperty("config") StoragePluginConfig config, @JsonProperty("rows") double rows, @JacksonInject StoragePluginRegistry plugins) throws ExecutionSetupException { @@ -56,7 +57,7 @@ public class JdbcGroupScan extends AbstractGroupScan { this.rows = rows; } - JdbcGroupScan(String sql, List<String> columns, JdbcStoragePlugin plugin, double rows) { + JdbcGroupScan(String sql, List<SchemaPath> columns, JdbcStoragePlugin plugin, double rows) { super(""); this.sql = sql; this.columns = columns; @@ -91,7 +92,7 @@ public class JdbcGroupScan extends AbstractGroupScan { return sql; } - public List<String> getColumns() { + public List<SchemaPath> getColumns() { return columns; } diff --git a/contrib/storage-jdbc/src/main/java/org/apache/drill/exec/store/jdbc/JdbcPrel.java b/contrib/storage-jdbc/src/main/java/org/apache/drill/exec/store/jdbc/JdbcPrel.java index b8229402b..85f88a872 100644 --- a/contrib/storage-jdbc/src/main/java/org/apache/drill/exec/store/jdbc/JdbcPrel.java +++ b/contrib/storage-jdbc/src/main/java/org/apache/drill/exec/store/jdbc/JdbcPrel.java @@ -17,9 +17,11 @@ */ package org.apache.drill.exec.store.jdbc; +import java.util.ArrayList; import java.util.Collections; import java.util.Iterator; +import java.util.List; import org.apache.calcite.adapter.java.JavaTypeFactory; import org.apache.calcite.adapter.jdbc.JdbcImplementor; import org.apache.calcite.plan.ConventionTraitDef; @@ -32,6 +34,7 @@ import org.apache.calcite.rel.RelShuttleImpl; import org.apache.calcite.rel.RelWriter; import org.apache.calcite.rel.metadata.RelMetadataQuery; import org.apache.calcite.sql.SqlDialect; +import org.apache.drill.common.expression.SchemaPath; import org.apache.drill.exec.physical.base.PhysicalOperator; import org.apache.drill.exec.planner.physical.PhysicalPlanCreator; import org.apache.drill.exec.planner.physical.Prel; @@ -91,7 +94,11 @@ public class JdbcPrel extends AbstractRelNode implements Prel { @Override public PhysicalOperator getPhysicalOperator(PhysicalPlanCreator creator) { - JdbcGroupScan output = new JdbcGroupScan(sql, rowType.getFieldNames(), convention.getPlugin(), rows); + List<SchemaPath> columns = new ArrayList<>(); + for (String col : rowType.getFieldNames()) { + columns.add(SchemaPath.getSimplePath(col)); + } + JdbcGroupScan output = new JdbcGroupScan(sql, columns, convention.getPlugin(), rows); return creator.addMetadata(this, output); } diff --git a/contrib/storage-jdbc/src/main/java/org/apache/drill/exec/store/jdbc/JdbcRecordReader.java b/contrib/storage-jdbc/src/main/java/org/apache/drill/exec/store/jdbc/JdbcRecordReader.java index 011c9bc58..5c6def26a 100755 --- a/contrib/storage-jdbc/src/main/java/org/apache/drill/exec/store/jdbc/JdbcRecordReader.java +++ b/contrib/storage-jdbc/src/main/java/org/apache/drill/exec/store/jdbc/JdbcRecordReader.java @@ -35,6 +35,7 @@ import javax.sql.DataSource; import org.apache.drill.common.AutoCloseables; import org.apache.drill.common.exceptions.UserException; +import org.apache.drill.common.expression.SchemaPath; import org.apache.drill.common.types.TypeProtos; import org.apache.drill.common.types.TypeProtos.MajorType; import org.apache.drill.common.types.TypeProtos.MinorType; @@ -75,9 +76,9 @@ class JdbcRecordReader extends AbstractRecordReader { private final String sql; private ImmutableList<ValueVector> vectors; private ImmutableList<Copier<?>> copiers; - private final List<String> columns; + private final List<SchemaPath> columns; - public JdbcRecordReader(DataSource source, String sql, String storagePluginName, List<String> columns) { + public JdbcRecordReader(DataSource source, String sql, String storagePluginName, List<SchemaPath> columns) { this.source = source; this.sql = sql; this.storagePluginName = storagePluginName; @@ -206,7 +207,7 @@ class JdbcRecordReader extends AbstractRecordReader { ImmutableList.Builder<Copier<?>> copierBuilder = ImmutableList.builder(); for (int i = 1; i <= columnsCount; i++) { - String name = columns.get(i - 1); + String name = columns.get(i - 1).getRootSegmentPath(); // column index in ResultSetMetaData starts from 1 int jdbcType = meta.getColumnType(i); int width = meta.getPrecision(i); diff --git a/contrib/storage-jdbc/src/main/java/org/apache/drill/exec/store/jdbc/JdbcSubScan.java b/contrib/storage-jdbc/src/main/java/org/apache/drill/exec/store/jdbc/JdbcSubScan.java index 9bc6de891..c9d5f0daf 100755 --- a/contrib/storage-jdbc/src/main/java/org/apache/drill/exec/store/jdbc/JdbcSubScan.java +++ b/contrib/storage-jdbc/src/main/java/org/apache/drill/exec/store/jdbc/JdbcSubScan.java @@ -18,6 +18,7 @@ package org.apache.drill.exec.store.jdbc; import org.apache.drill.common.exceptions.ExecutionSetupException; +import org.apache.drill.common.expression.SchemaPath; import org.apache.drill.common.logical.StoragePluginConfig; import org.apache.drill.exec.physical.base.AbstractSubScan; import org.apache.drill.exec.proto.beans.CoreOperatorType; @@ -36,12 +37,12 @@ public class JdbcSubScan extends AbstractSubScan { private final String sql; private final JdbcStoragePlugin plugin; - private final List<String> columns; + private final List<SchemaPath> columns; @JsonCreator public JdbcSubScan( @JsonProperty("sql") String sql, - @JsonProperty("columns") List<String> columns, + @JsonProperty("columns") List<SchemaPath> columns, @JsonProperty("config") StoragePluginConfig config, @JacksonInject StoragePluginRegistry plugins) throws ExecutionSetupException { super(""); @@ -50,7 +51,7 @@ public class JdbcSubScan extends AbstractSubScan { this.plugin = (JdbcStoragePlugin) plugins.getPlugin(config); } - JdbcSubScan(String sql, List<String> columns, JdbcStoragePlugin plugin) { + JdbcSubScan(String sql, List<SchemaPath> columns, JdbcStoragePlugin plugin) { super(""); this.sql = sql; this.columns = columns; @@ -66,7 +67,7 @@ public class JdbcSubScan extends AbstractSubScan { return sql; } - public List<String> getColumns() { + public List<SchemaPath> getColumns() { return columns; } |