diff options
author | Jinfeng Ni <jni@maprtech.com> | 2014-12-18 18:26:23 -0800 |
---|---|---|
committer | Jinfeng Ni <jni@maprtech.com> | 2014-12-22 11:01:12 -0800 |
commit | 5f70ba1cd17604d2ccb232ae9715629197389c41 (patch) | |
tree | f565e6d7ead9c0a1755b07ec17eed7ec90f2bfd2 | |
parent | df56954771950cd850ecae10404007d11d9241cb (diff) |
DRILL-1900: Fix numeric overflow problem in hbase stat calculation.
2 files changed, 7 insertions, 4 deletions
diff --git a/contrib/storage-hbase/src/main/java/org/apache/drill/exec/store/hbase/HBaseGroupScan.java b/contrib/storage-hbase/src/main/java/org/apache/drill/exec/store/hbase/HBaseGroupScan.java index 420fe777b..6d18d1247 100644 --- a/contrib/storage-hbase/src/main/java/org/apache/drill/exec/store/hbase/HBaseGroupScan.java +++ b/contrib/storage-hbase/src/main/java/org/apache/drill/exec/store/hbase/HBaseGroupScan.java @@ -352,7 +352,7 @@ public class HBaseGroupScan extends AbstractGroupScan implements DrillHBaseConst @Override public ScanStats getScanStats() { - int rowCount = (int) ((scanSizeInBytes / statsCalculator.getAvgRowSizeInBytes()) * (hbaseScanSpec.getFilter() != null ? 0.5 : 1)); + long rowCount = (long) ((scanSizeInBytes / statsCalculator.getAvgRowSizeInBytes()) * (hbaseScanSpec.getFilter() != null ? 0.5 : 1)); // the following calculation is not precise since 'columns' could specify CFs while getColsPerRow() returns the number of qualifier. float diskCost = scanSizeInBytes * ((columns == null || columns.isEmpty()) ? 1 : columns.size()/statsCalculator.getColsPerRow()); return new ScanStats(GroupScanProperty.NO_EXACT_ROW_COUNT, rowCount, 1, diskCost); diff --git a/contrib/storage-hbase/src/main/java/org/apache/drill/exec/store/hbase/TableStatsCalculator.java b/contrib/storage-hbase/src/main/java/org/apache/drill/exec/store/hbase/TableStatsCalculator.java index 473deeb9c..9c8fbadbe 100644 --- a/contrib/storage-hbase/src/main/java/org/apache/drill/exec/store/hbase/TableStatsCalculator.java +++ b/contrib/storage-hbase/src/main/java/org/apache/drill/exec/store/hbase/TableStatsCalculator.java @@ -44,6 +44,8 @@ import org.apache.hadoop.hbase.util.Bytes; public class TableStatsCalculator { static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(TableStatsCalculator.class); + public static final long DEFAULT_ROW_COUNT = 1024L * 1024L; + private static final String DRILL_EXEC_HBASE_SCAN_SAMPLE_ROWS_COUNT = "drill.exec.hbase.scan.samplerows.count"; private static final int DEFAULT_SAMPLE_SIZE = 100; @@ -74,7 +76,8 @@ public class TableStatsCalculator { scan.setCaching(rowsToSample < DEFAULT_SAMPLE_SIZE ? rowsToSample : DEFAULT_SAMPLE_SIZE); scan.setMaxVersions(1); ResultScanner scanner = table.getScanner(scan); - int rowSizeSum = 0, numColumnsSum = 0, rowCount = 0; + long rowSizeSum = 0; + int numColumnsSum = 0, rowCount = 0; for (; rowCount < rowsToSample; ++rowCount) { Result row = scanner.next(); if (row == null) { @@ -84,7 +87,7 @@ public class TableStatsCalculator { rowSizeSum += row.getBytes().getLength(); } if (rowCount > 0) { - avgRowSizeInBytes = rowSizeSum/rowCount; + avgRowSizeInBytes = (int) (rowSizeSum/rowCount); colsPerRow = numColumnsSum/rowCount; } scanner.close(); @@ -155,7 +158,7 @@ public class TableStatsCalculator { */ public long getRegionSizeInBytes(byte[] regionId) { if (sizeMap == null) { - return avgRowSizeInBytes*1024*1024; // 1 million rows + return (long) avgRowSizeInBytes * DEFAULT_ROW_COUNT; // 1 million rows } else { Long size = sizeMap.get(regionId); if (size == null) { |