DRILL-1900: Fix numeric overflow problem in hbase stat calculation.

author: Jinfeng Ni <jni@maprtech.com> 2014-12-18 18:26:23 -0800
committer: Jinfeng Ni <jni@maprtech.com> 2014-12-22 11:01:12 -0800
commit: 5f70ba1cd17604d2ccb232ae9715629197389c41 (patch)
tree: f565e6d7ead9c0a1755b07ec17eed7ec90f2bfd2
parent: df56954771950cd850ecae10404007d11d9241cb (diff)
2 files changed, 7 insertions, 4 deletions
diff --git a/contrib/storage-hbase/src/main/java/org/apache/drill/exec/store/hbase/HBaseGroupScan.java b/contrib/storage-hbase/src/main/java/org/apache/drill/exec/store/hbase/HBaseGroupScan.java
index 420fe777b..6d18d1247 100644
--- a/contrib/storage-hbase/src/main/java/org/apache/drill/exec/store/hbase/HBaseGroupScan.java
+++ b/contrib/storage-hbase/src/main/java/org/apache/drill/exec/store/hbase/HBaseGroupScan.java
@@ -352,7 +352,7 @@ public class HBaseGroupScan extends AbstractGroupScan implements DrillHBaseConst
 
   @Override
   public ScanStats getScanStats() {
-    int rowCount =  (int) ((scanSizeInBytes / statsCalculator.getAvgRowSizeInBytes()) * (hbaseScanSpec.getFilter() != null ? 0.5 : 1));
+    long rowCount = (long) ((scanSizeInBytes / statsCalculator.getAvgRowSizeInBytes()) * (hbaseScanSpec.getFilter() != null ? 0.5 : 1));
     // the following calculation is not precise since 'columns' could specify CFs while getColsPerRow() returns the number of qualifier.
     float diskCost = scanSizeInBytes * ((columns == null || columns.isEmpty()) ? 1 : columns.size()/statsCalculator.getColsPerRow());
     return new ScanStats(GroupScanProperty.NO_EXACT_ROW_COUNT, rowCount, 1, diskCost);
diff --git a/contrib/storage-hbase/src/main/java/org/apache/drill/exec/store/hbase/TableStatsCalculator.java b/contrib/storage-hbase/src/main/java/org/apache/drill/exec/store/hbase/TableStatsCalculator.java
index 473deeb9c..9c8fbadbe 100644
--- a/contrib/storage-hbase/src/main/java/org/apache/drill/exec/store/hbase/TableStatsCalculator.java
+++ b/contrib/storage-hbase/src/main/java/org/apache/drill/exec/store/hbase/TableStatsCalculator.java
@@ -44,6 +44,8 @@ import org.apache.hadoop.hbase.util.Bytes;
 public class TableStatsCalculator {
   static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(TableStatsCalculator.class);
 
+  public static final long DEFAULT_ROW_COUNT = 1024L * 1024L;
+
   private static final String DRILL_EXEC_HBASE_SCAN_SAMPLE_ROWS_COUNT = "drill.exec.hbase.scan.samplerows.count";
 
   private static final int DEFAULT_SAMPLE_SIZE = 100;
@@ -74,7 +76,8 @@ public class TableStatsCalculator {
         scan.setCaching(rowsToSample < DEFAULT_SAMPLE_SIZE ? rowsToSample : DEFAULT_SAMPLE_SIZE);
         scan.setMaxVersions(1);
         ResultScanner scanner = table.getScanner(scan);
-        int rowSizeSum = 0, numColumnsSum = 0, rowCount = 0;
+        long rowSizeSum = 0;
+        int numColumnsSum = 0, rowCount = 0;
         for (; rowCount < rowsToSample; ++rowCount) {
           Result row = scanner.next();
           if (row == null) {
@@ -84,7 +87,7 @@ public class TableStatsCalculator {
           rowSizeSum += row.getBytes().getLength();
         }
         if (rowCount > 0) {
-          avgRowSizeInBytes = rowSizeSum/rowCount;
+          avgRowSizeInBytes = (int) (rowSizeSum/rowCount);
           colsPerRow = numColumnsSum/rowCount;
         }
         scanner.close();
@@ -155,7 +158,7 @@ public class TableStatsCalculator {
    */
   public long getRegionSizeInBytes(byte[] regionId) {
     if (sizeMap == null) {
-      return avgRowSizeInBytes*1024*1024; // 1 million rows
+      return (long) avgRowSizeInBytes * DEFAULT_ROW_COUNT; // 1 million rows
     } else {
       Long size = sizeMap.get(regionId);
       if (size == null) {
author	Jinfeng Ni <jni@maprtech.com>	2014-12-18 18:26:23 -0800
committer	Jinfeng Ni <jni@maprtech.com>	2014-12-22 11:01:12 -0800
commit	5f70ba1cd17604d2ccb232ae9715629197389c41 (patch)
tree	f565e6d7ead9c0a1755b07ec17eed7ec90f2bfd2
parent	df56954771950cd850ecae10404007d11d9241cb (diff)