aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJinfeng Ni <jni@maprtech.com>2014-12-18 18:26:23 -0800
committerJinfeng Ni <jni@maprtech.com>2014-12-22 11:01:12 -0800
commit5f70ba1cd17604d2ccb232ae9715629197389c41 (patch)
treef565e6d7ead9c0a1755b07ec17eed7ec90f2bfd2
parentdf56954771950cd850ecae10404007d11d9241cb (diff)
DRILL-1900: Fix numeric overflow problem in hbase stat calculation.
-rw-r--r--contrib/storage-hbase/src/main/java/org/apache/drill/exec/store/hbase/HBaseGroupScan.java2
-rw-r--r--contrib/storage-hbase/src/main/java/org/apache/drill/exec/store/hbase/TableStatsCalculator.java9
2 files changed, 7 insertions, 4 deletions
diff --git a/contrib/storage-hbase/src/main/java/org/apache/drill/exec/store/hbase/HBaseGroupScan.java b/contrib/storage-hbase/src/main/java/org/apache/drill/exec/store/hbase/HBaseGroupScan.java
index 420fe777b..6d18d1247 100644
--- a/contrib/storage-hbase/src/main/java/org/apache/drill/exec/store/hbase/HBaseGroupScan.java
+++ b/contrib/storage-hbase/src/main/java/org/apache/drill/exec/store/hbase/HBaseGroupScan.java
@@ -352,7 +352,7 @@ public class HBaseGroupScan extends AbstractGroupScan implements DrillHBaseConst
@Override
public ScanStats getScanStats() {
- int rowCount = (int) ((scanSizeInBytes / statsCalculator.getAvgRowSizeInBytes()) * (hbaseScanSpec.getFilter() != null ? 0.5 : 1));
+ long rowCount = (long) ((scanSizeInBytes / statsCalculator.getAvgRowSizeInBytes()) * (hbaseScanSpec.getFilter() != null ? 0.5 : 1));
// the following calculation is not precise since 'columns' could specify CFs while getColsPerRow() returns the number of qualifier.
float diskCost = scanSizeInBytes * ((columns == null || columns.isEmpty()) ? 1 : columns.size()/statsCalculator.getColsPerRow());
return new ScanStats(GroupScanProperty.NO_EXACT_ROW_COUNT, rowCount, 1, diskCost);
diff --git a/contrib/storage-hbase/src/main/java/org/apache/drill/exec/store/hbase/TableStatsCalculator.java b/contrib/storage-hbase/src/main/java/org/apache/drill/exec/store/hbase/TableStatsCalculator.java
index 473deeb9c..9c8fbadbe 100644
--- a/contrib/storage-hbase/src/main/java/org/apache/drill/exec/store/hbase/TableStatsCalculator.java
+++ b/contrib/storage-hbase/src/main/java/org/apache/drill/exec/store/hbase/TableStatsCalculator.java
@@ -44,6 +44,8 @@ import org.apache.hadoop.hbase.util.Bytes;
public class TableStatsCalculator {
static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(TableStatsCalculator.class);
+ public static final long DEFAULT_ROW_COUNT = 1024L * 1024L;
+
private static final String DRILL_EXEC_HBASE_SCAN_SAMPLE_ROWS_COUNT = "drill.exec.hbase.scan.samplerows.count";
private static final int DEFAULT_SAMPLE_SIZE = 100;
@@ -74,7 +76,8 @@ public class TableStatsCalculator {
scan.setCaching(rowsToSample < DEFAULT_SAMPLE_SIZE ? rowsToSample : DEFAULT_SAMPLE_SIZE);
scan.setMaxVersions(1);
ResultScanner scanner = table.getScanner(scan);
- int rowSizeSum = 0, numColumnsSum = 0, rowCount = 0;
+ long rowSizeSum = 0;
+ int numColumnsSum = 0, rowCount = 0;
for (; rowCount < rowsToSample; ++rowCount) {
Result row = scanner.next();
if (row == null) {
@@ -84,7 +87,7 @@ public class TableStatsCalculator {
rowSizeSum += row.getBytes().getLength();
}
if (rowCount > 0) {
- avgRowSizeInBytes = rowSizeSum/rowCount;
+ avgRowSizeInBytes = (int) (rowSizeSum/rowCount);
colsPerRow = numColumnsSum/rowCount;
}
scanner.close();
@@ -155,7 +158,7 @@ public class TableStatsCalculator {
*/
public long getRegionSizeInBytes(byte[] regionId) {
if (sizeMap == null) {
- return avgRowSizeInBytes*1024*1024; // 1 million rows
+ return (long) avgRowSizeInBytes * DEFAULT_ROW_COUNT; // 1 million rows
} else {
Long size = sizeMap.get(regionId);
if (size == null) {