aboutsummaryrefslogtreecommitdiff
path: root/contrib/native/client/src
diff options
context:
space:
mode:
authorGautam Parai <gparai@maprtech.com>2014-08-21 14:59:53 -0700
committerGautam Parai <gparai@maprtech.com>2019-02-28 12:01:24 -0800
commit469be17597e7b7c6bc1de9863dcb6c5604a55f0c (patch)
tree76a1c2572cfb19a75a0f82e6d165db333797fe3b /contrib/native/client/src
parent3233d8aaff57ac71bd3b726efcd5fdaa92aef861 (diff)
DRILL-1328: Support table statistics - Part 2
Add support for avg row-width and major type statistics. Parallelize the ANALYZE implementation and stats UDF implementation to improve stats collection performance. Update/fix rowcount, selectivity and ndv computations to improve plan costing. Add options for configuring collection/usage of statistics. Add new APIs and implementation for stats writer (as a precursor to Drill Metastore APIs). Fix several stats/costing related issues identified while running TPC-H nad TPC-DS queries. Add support for CPU sampling and nested scalar columns. Add more testcases for collection and usage of statistics and fix remaining unit/functional test failures. Thanks to Venki Korukanti (@vkorukanti) for the description below (modified to account for new changes). He graciously agreed to rebase the patch to latest master, fixed few issues and added few tests. FUNCS: Statistics functions as UDFs: Separate Currently using FieldReader to ensure consistent output type so that Unpivot doesn't get confused. All stats columns should be Nullable, so that stats functions can return NULL when N/A. * custom versions of "count" that always return BigInt * HyperLogLog based NDV that returns BigInt that works only on VarChars * HyperLogLog with binary output that only works on VarChars OPS: Updated protobufs for new ops OPS: Implemented StatisticsMerge OPS: Implemented StatisticsUnpivot ANALYZE: AnalyzeTable functionality * JavaCC syntax more-or-less copied from LucidDB. * (Basic) AnalyzePrule: DrillAnalyzeRel -> UnpivotPrel StatsMergePrel FilterPrel(for sampling) StatsAggPrel ScanPrel ANALYZE: Add getMetadataTable() to AbstractSchema USAGE: Change field access in QueryWrapper USAGE: Add getDrillTable() to DrillScanRelBase and ScanPrel * since ScanPrel does not inherit from DrillScanRelBase, this requires adding a DrillTable to the constructor * This is done so that a custom ReflectiveRelMetadataProvider can access the DrillTable associated with Logical/Physical scans. USAGE: Attach DrillStatsTable to DrillTable. * DrillStatsTable represents the data scanned from a corresponding ".stats.drill" table * In order to avoid doing query execution right after the ".stats.drill" table is found, metadata is not actually collected until the MaterializationVisitor is used. ** Currently, the metadata source must be a string (so that a SQL query can be created). Doing this with a table is probably more complicated. ** Query is set up to extract only the most recent statistics results for each column. closes #729
Diffstat (limited to 'contrib/native/client/src')
-rw-r--r--contrib/native/client/src/protobuf/UserBitShared.pb.cc15
-rw-r--r--contrib/native/client/src/protobuf/UserBitShared.pb.h7
2 files changed, 15 insertions, 7 deletions
diff --git a/contrib/native/client/src/protobuf/UserBitShared.pb.cc b/contrib/native/client/src/protobuf/UserBitShared.pb.cc
index 1e0712091..0db64d348 100644
--- a/contrib/native/client/src/protobuf/UserBitShared.pb.cc
+++ b/contrib/native/client/src/protobuf/UserBitShared.pb.cc
@@ -754,7 +754,7 @@ void protobuf_AddDesc_UserBitShared_2eproto() {
"entState\022\013\n\007SENDING\020\000\022\027\n\023AWAITING_ALLOCA"
"TION\020\001\022\013\n\007RUNNING\020\002\022\014\n\010FINISHED\020\003\022\r\n\tCAN"
"CELLED\020\004\022\n\n\006FAILED\020\005\022\032\n\026CANCELLATION_REQ"
- "UESTED\020\006*\247\t\n\020CoreOperatorType\022\021\n\rSINGLE_"
+ "UESTED\020\006*\351\t\n\020CoreOperatorType\022\021\n\rSINGLE_"
"SENDER\020\000\022\024\n\020BROADCAST_SENDER\020\001\022\n\n\006FILTER"
"\020\002\022\022\n\016HASH_AGGREGATE\020\003\022\r\n\tHASH_JOIN\020\004\022\016\n"
"\nMERGE_JOIN\020\005\022\031\n\025HASH_PARTITION_SENDER\020\006"
@@ -784,10 +784,12 @@ void protobuf_AddDesc_UserBitShared_2eproto() {
"\025\n\021SEQUENCE_SUB_SCAN\0205\022\023\n\017PARTITION_LIMI"
"T\0206\022\023\n\017PCAPNG_SUB_SCAN\0207\022\022\n\016RUNTIME_FILT"
"ER\0208\022\017\n\013ROWKEY_JOIN\0209\022\023\n\017SYSLOG_SUB_SCAN"
- "\020:*g\n\nSaslStatus\022\020\n\014SASL_UNKNOWN\020\000\022\016\n\nSA"
- "SL_START\020\001\022\024\n\020SASL_IN_PROGRESS\020\002\022\020\n\014SASL"
- "_SUCCESS\020\003\022\017\n\013SASL_FAILED\020\004B.\n\033org.apach"
- "e.drill.exec.protoB\rUserBitSharedH\001", 5555);
+ "\020:\022\030\n\024STATISTICS_AGGREGATE\020;\022\020\n\014UNPIVOT_"
+ "MAPS\020<\022\024\n\020STATISTICS_MERGE\020=*g\n\nSaslStat"
+ "us\022\020\n\014SASL_UNKNOWN\020\000\022\016\n\nSASL_START\020\001\022\024\n\020"
+ "SASL_IN_PROGRESS\020\002\022\020\n\014SASL_SUCCESS\020\003\022\017\n\013"
+ "SASL_FAILED\020\004B.\n\033org.apache.drill.exec.p"
+ "rotoB\rUserBitSharedH\001", 5621);
::google::protobuf::MessageFactory::InternalRegisterGeneratedFile(
"UserBitShared.proto", &protobuf_RegisterTypes);
UserCredentials::default_instance_ = new UserCredentials();
@@ -967,6 +969,9 @@ bool CoreOperatorType_IsValid(int value) {
case 56:
case 57:
case 58:
+ case 59:
+ case 60:
+ case 61:
return true;
default:
return false;
diff --git a/contrib/native/client/src/protobuf/UserBitShared.pb.h b/contrib/native/client/src/protobuf/UserBitShared.pb.h
index b95b311c8..a8e6ccba6 100644
--- a/contrib/native/client/src/protobuf/UserBitShared.pb.h
+++ b/contrib/native/client/src/protobuf/UserBitShared.pb.h
@@ -262,11 +262,14 @@ enum CoreOperatorType {
PCAPNG_SUB_SCAN = 55,
RUNTIME_FILTER = 56,
ROWKEY_JOIN = 57,
- SYSLOG_SUB_SCAN = 58
+ SYSLOG_SUB_SCAN = 58,
+ STATISTICS_AGGREGATE = 59,
+ UNPIVOT_MAPS = 60,
+ STATISTICS_MERGE = 61
};
bool CoreOperatorType_IsValid(int value);
const CoreOperatorType CoreOperatorType_MIN = SINGLE_SENDER;
-const CoreOperatorType CoreOperatorType_MAX = SYSLOG_SUB_SCAN;
+const CoreOperatorType CoreOperatorType_MAX = STATISTICS_MERGE;
const int CoreOperatorType_ARRAYSIZE = CoreOperatorType_MAX + 1;
const ::google::protobuf::EnumDescriptor* CoreOperatorType_descriptor();