aboutsummaryrefslogtreecommitdiff
path: root/exec/vector
diff options
context:
space:
mode:
authorDave Oshinsky <daveoshinsky@yahoo.com>2016-02-09 17:37:47 -0500
committerVolodymyr Vysotskyi <vvovyk@gmail.com>2018-05-04 20:30:50 +0300
commit79e27eadb86dfaa0e2d8bc514f3069bf02dc2762 (patch)
treeed49b2eb2d0cf3a31901088db29784581b99c844 /exec/vector
parent24193b1b038a6315681a65c76a67034b64f71fc5 (diff)
DRILL-4184: Support variable length decimal fields in parquet
Diffstat (limited to 'exec/vector')
-rw-r--r--exec/vector/src/main/codegen/data/ValueVectorTypes.tdd9
-rw-r--r--exec/vector/src/main/codegen/templates/BaseWriter.java1
-rw-r--r--exec/vector/src/main/codegen/templates/ColumnAccessors.java30
-rw-r--r--exec/vector/src/main/codegen/templates/ComplexWriters.java8
-rw-r--r--exec/vector/src/main/codegen/templates/HolderReaderImpl.java4
-rw-r--r--exec/vector/src/main/codegen/templates/RepeatedValueVectors.java8
-rw-r--r--exec/vector/src/main/codegen/templates/UnionReader.java4
-rw-r--r--exec/vector/src/main/codegen/templates/ValueHolders.java17
-rw-r--r--exec/vector/src/main/codegen/templates/VariableLengthVectors.java9
-rw-r--r--exec/vector/src/main/java/org/apache/drill/exec/util/DecimalUtility.java72
-rw-r--r--exec/vector/src/main/java/org/apache/drill/exec/vector/complex/impl/MapOrListWriterImpl.java6
11 files changed, 138 insertions, 30 deletions
diff --git a/exec/vector/src/main/codegen/data/ValueVectorTypes.tdd b/exec/vector/src/main/codegen/data/ValueVectorTypes.tdd
index 4d719b4f5..ca4653d3e 100644
--- a/exec/vector/src/main/codegen/data/ValueVectorTypes.tdd
+++ b/exec/vector/src/main/codegen/data/ValueVectorTypes.tdd
@@ -178,6 +178,15 @@
]
},
{
+ major: "VarLen",
+ width: 4,
+ javaType: "int",
+ boxedType: "DrillBuf",
+ minor: [
+ { class: "VarDecimal", friendlyType: "BigDecimal", fields: [{name: "start", type: "int"}, {name: "end", type: "int"}, {name: "buffer", type: "DrillBuf"}, {name: "scale", type: "int", include: false}] }
+ ]
+ },
+ {
major: "Bit",
width: 1,
javaType: "int",
diff --git a/exec/vector/src/main/codegen/templates/BaseWriter.java b/exec/vector/src/main/codegen/templates/BaseWriter.java
index c65a75809..ad9c44e1f 100644
--- a/exec/vector/src/main/codegen/templates/BaseWriter.java
+++ b/exec/vector/src/main/codegen/templates/BaseWriter.java
@@ -111,6 +111,7 @@ package org.apache.drill.exec.vector.complex.writer;
UInt8Writer uInt8(String name);
VarCharWriter varChar(String name);
Var16CharWriter var16Char(String name);
+ VarDecimalWriter varDecimal(String name);
TinyIntWriter tinyInt(String name);
SmallIntWriter smallInt(String name);
IntWriter integer(String name);
diff --git a/exec/vector/src/main/codegen/templates/ColumnAccessors.java b/exec/vector/src/main/codegen/templates/ColumnAccessors.java
index d0a2ace6f..6068afa22 100644
--- a/exec/vector/src/main/codegen/templates/ColumnAccessors.java
+++ b/exec/vector/src/main/codegen/templates/ColumnAccessors.java
@@ -112,9 +112,9 @@ public class ColumnAccessors {
<#if accessorType=="BigDecimal">
<#assign label="Decimal">
</#if>
- <#assign varWidth = drillType == "VarChar" || drillType == "Var16Char" || drillType == "VarBinary" />
+ <#assign varWidth = drillType == "VarChar" || drillType == "Var16Char" || drillType == "VarBinary" || drillType == "VarDecimal"/>
<#assign decimal = drillType == "Decimal9" || drillType == "Decimal18" ||
- drillType == "Decimal28Sparse" || drillType == "Decimal38Sparse" />
+ drillType == "Decimal28Sparse" || drillType == "Decimal38Sparse" || drillType == "VarDecimal"/>
<#if varWidth>
<#assign accessorType = "byte[]">
<#assign label = "Bytes">
@@ -135,15 +135,15 @@ public class ColumnAccessors {
<#if varWidth>
public static class ${drillType}ColumnReader extends BaseVarWidthReader {
-
+
<#else>
public static class ${drillType}ColumnReader extends BaseFixedWidthReader {
-
+
private static final int VALUE_WIDTH = ${drillType}Vector.VALUE_WIDTH;
<#if decimal>
private MajorType type;
-
+
</#if>
</#if>
<#if decimal>
@@ -190,12 +190,12 @@ public class ColumnAccessors {
<#elseif drillType == "IntervalDay">
final int offset = ${getOffset};
return DateUtilities.fromIntervalDay(
- buf.getInt(offset),
+ buf.getInt(offset),
buf.getInt(offset + ${minor.millisecondsOffset}));
<#elseif drillType == "Interval">
final int offset = ${getOffset};
return DateUtilities.fromInterval(
- buf.getInt(offset),
+ buf.getInt(offset),
buf.getInt(offset + ${minor.daysOffset}),
buf.getInt(offset + ${minor.millisecondsOffset}));
<#elseif drillType == "Decimal28Sparse" || drillType == "Decimal38Sparse">
@@ -241,9 +241,9 @@ public class ColumnAccessors {
public static class ${drillType}ColumnWriter extends BaseVarWidthWriter {
<#else>
public static class ${drillType}ColumnWriter extends BaseFixedWidthWriter {
-
+
private static final int VALUE_WIDTH = ${drillType}Vector.VALUE_WIDTH;
-
+
<#if decimal>
private MajorType type;
</#if>
@@ -272,7 +272,17 @@ public class ColumnAccessors {
</#if>
@Override
+ <#if drillType = "VarDecimal">
+ public final void setDecimal(final BigDecimal bd) {
+ byte[] barr = bd.unscaledValue().toByteArray();
+ int len = barr.length;
+ setBytes(barr, len);
+ }
+
+ public final void setBytes(final byte[] value, int len) {
+ <#else>
public final void set${label}(final ${accessorType} value${putArgs}) {
+ </#if>
<#-- Must compute the write offset first; can't be inline because the
writeOffset() function has a side effect of possibly changing the buffer
address (bufAddr). -->
@@ -355,7 +365,7 @@ import org.apache.drill.exec.vector.accessor.reader.BaseScalarReader;
import org.apache.drill.exec.vector.accessor.writer.BaseScalarWriter;
public class ColumnAccessorUtils {
-
+
private ColumnAccessorUtils() { }
<@build vv.types "Required" "Reader" />
diff --git a/exec/vector/src/main/codegen/templates/ComplexWriters.java b/exec/vector/src/main/codegen/templates/ComplexWriters.java
index cfa049d12..6e1d8f3e2 100644
--- a/exec/vector/src/main/codegen/templates/ComplexWriters.java
+++ b/exec/vector/src/main/codegen/templates/ComplexWriters.java
@@ -98,7 +98,11 @@ public class ${eName}WriterImpl extends AbstractFieldWriter {
<#if !(minor.class == "Decimal9" || minor.class == "Decimal18" || minor.class == "Decimal28Sparse" || minor.class == "Decimal38Sparse" || minor.class == "Decimal28Dense" || minor.class == "Decimal38Dense")>
public void write${minor.class}(<#list fields as field>${field.type} ${field.name}<#if field_has_next>, </#if></#list>) {
+ <#if minor.class == "VarDecimal">
+ mutator.addSafe(idx(), <#list fields as field><#if field.name == "scale"><#break></#if>${field.name}<#if field_has_next && fields[field_index+1].name != "scale" >, </#if></#list>);
+ <#else>
mutator.addSafe(idx(), <#list fields as field>${field.name}<#if field_has_next>, </#if></#list>);
+ </#if>
vector.getMutator().setValueCount(idx()+1);
}
</#if>
@@ -123,7 +127,11 @@ public class ${eName}WriterImpl extends AbstractFieldWriter {
<#if !(minor.class == "Decimal9" || minor.class == "Decimal18" || minor.class == "Decimal28Sparse" || minor.class == "Decimal38Sparse" || minor.class == "Decimal28Dense" || minor.class == "Decimal38Dense")>
public void write${minor.class}(<#list fields as field>${field.type} ${field.name}<#if field_has_next>, </#if></#list>) {
+ <#if minor.class == "VarDecimal">
+ mutator.setSafe(idx(), <#if mode == "Nullable">1, </#if><#list fields as field><#if field.name == "scale"><#break></#if>${field.name}<#if field_has_next && fields[field_index+1].name != "scale" >, </#if></#list>);
+ <#else>
mutator.setSafe(idx(), <#if mode == "Nullable">1, </#if><#list fields as field>${field.name}<#if field_has_next>, </#if></#list>);
+ </#if>
vector.getMutator().setValueCount(idx()+1);
}
diff --git a/exec/vector/src/main/codegen/templates/HolderReaderImpl.java b/exec/vector/src/main/codegen/templates/HolderReaderImpl.java
index 0eca723ce..e46989ba5 100644
--- a/exec/vector/src/main/codegen/templates/HolderReaderImpl.java
+++ b/exec/vector/src/main/codegen/templates/HolderReaderImpl.java
@@ -160,6 +160,8 @@ public class ${holderMode}${name}HolderReaderImpl extends AbstractFieldReader {
<#if minor.class == "VarBinary">
return value;
+<#elseif minor.class == "VarDecimal">
+ return org.apache.drill.exec.util.DecimalUtility.getBigDecimalFromDrillBuf(holder.buffer, holder.start, holder.end-holder.start, holder.scale);
<#elseif minor.class == "Var16Char">
return new String(value);
<#elseif minor.class == "VarChar">
@@ -233,6 +235,8 @@ public class ${holderMode}${name}HolderReaderImpl extends AbstractFieldReader {
<#if minor.class == "VarBinary">
return value;
+<#elseif minor.class == "VarDecimal">
+ return org.apache.drill.exec.util.DecimalUtility.getBigDecimalFromDrillBuf(holder.buffer, holder.start, holder.end-holder.start, holder.scale);
<#elseif minor.class == "Var16Char">
return new String(value);
<#elseif minor.class == "VarChar">
diff --git a/exec/vector/src/main/codegen/templates/RepeatedValueVectors.java b/exec/vector/src/main/codegen/templates/RepeatedValueVectors.java
index 412f498c2..2b2b6bd9a 100644
--- a/exec/vector/src/main/codegen/templates/RepeatedValueVectors.java
+++ b/exec/vector/src/main/codegen/templates/RepeatedValueVectors.java
@@ -405,11 +405,19 @@ public final class Repeated${minor.class}Vector extends BaseRepeatedValueVector
}
<#if (fields?size > 1) && !(minor.class == "Decimal9" || minor.class == "Decimal18" || minor.class == "Decimal28Sparse" || minor.class == "Decimal38Sparse" || minor.class == "Decimal28Dense" || minor.class == "Decimal38Dense")>
+ <#if minor.class == "VarDecimal">
+ public void addSafe(int arrayIndex, <#list fields as field><#if field.name == "scale"><#break></#if>${field.type} ${field.name}<#if field_has_next && fields[field_index+1].name != "scale" >, </#if></#list>) {
+ int nextOffset = offsets.getAccessor().get(arrayIndex+1);
+ values.getMutator().setSafe(nextOffset, <#list fields as field><#if field.name == "scale"><#break></#if>${field.name}<#if field_has_next && fields[field_index+1].name != "scale">, </#if></#list>);
+ offsets.getMutator().setSafe(arrayIndex+1, nextOffset+1);
+ }
+ <#else>
public void addSafe(int rowIndex, <#list fields as field>${field.type} ${field.name}<#if field_has_next>, </#if></#list>) {
final int nextOffset = offsets.getAccessor().get(rowIndex+1);
values.getMutator().setSafe(nextOffset, <#list fields as field>${field.name}<#if field_has_next>, </#if></#list>);
offsets.getMutator().setSafe(rowIndex+1, nextOffset+1);
}
+ </#if>
</#if>
<#if minor.class == "Decimal28Sparse" || minor.class == "Decimal38Sparse">
diff --git a/exec/vector/src/main/codegen/templates/UnionReader.java b/exec/vector/src/main/codegen/templates/UnionReader.java
index 40ad89b82..54276f573 100644
--- a/exec/vector/src/main/codegen/templates/UnionReader.java
+++ b/exec/vector/src/main/codegen/templates/UnionReader.java
@@ -34,14 +34,14 @@ package org.apache.drill.exec.vector.complex.impl;
@SuppressWarnings("unused")
public class UnionReader extends AbstractFieldReader {
- private BaseReader[] readers = new BaseReader[43];
+ private BaseReader[] readers = new BaseReader[44];
public UnionVector data;
public UnionReader(UnionVector data) {
this.data = data;
}
- private static MajorType[] TYPES = new MajorType[43];
+ private static MajorType[] TYPES = new MajorType[44];
static {
for (MinorType minorType : MinorType.values()) {
diff --git a/exec/vector/src/main/codegen/templates/ValueHolders.java b/exec/vector/src/main/codegen/templates/ValueHolders.java
index 9982bd419..d5b4342df 100644
--- a/exec/vector/src/main/codegen/templates/ValueHolders.java
+++ b/exec/vector/src/main/codegen/templates/ValueHolders.java
@@ -90,9 +90,20 @@ public final class ${className} implements ValueHolder{
public static boolean getSign(int start, DrillBuf buffer) {
return ((buffer.getInt(start) & 0x80000000) != 0);
}
- </#if></#if>
- public MajorType getType() {return TYPE;}
+ public java.math.BigDecimal getBigDecimal() {
+ java.math.BigDecimal currentValue = org.apache.drill.exec.util.DecimalUtility.getBigDecimalFromSparse(buffer, start, nDecimalDigits, scale);
+ return currentValue;
+ }
+ </#if></#if>
+
+ <#if minor.class.startsWith("VarDecimal")>
+ public java.math.BigDecimal getBigDecimal() {
+ //System.out.println("valueHolder start " + start + " end " + " end " + " scale " + scale);
+ java.math.BigDecimal currentValue = org.apache.drill.exec.util.DecimalUtility.getBigDecimalFromDrillBuf(buffer, start, end-start, scale);
+ return currentValue;
+ }
+ </#if>
@Deprecated
public int hashCode(){
@@ -116,4 +127,4 @@ public final class ${className} implements ValueHolder{
</#list>
</#list>
-</#list> \ No newline at end of file
+</#list>
diff --git a/exec/vector/src/main/codegen/templates/VariableLengthVectors.java b/exec/vector/src/main/codegen/templates/VariableLengthVectors.java
index 876d6880d..8ab4c3a48 100644
--- a/exec/vector/src/main/codegen/templates/VariableLengthVectors.java
+++ b/exec/vector/src/main/codegen/templates/VariableLengthVectors.java
@@ -474,6 +474,15 @@ public final class ${minor.class}Vector extends BaseDataValueVector implements V
}
<#switch minor.class>
+ <#case "VarDecimal">
+ @Override
+ public ${friendlyType} getObject(int index) {
+ byte[] b = get(index);
+ BigInteger bi = b.length == 0 ? new BigInteger("0") : new BigInteger(b);
+ BigDecimal bd = new BigDecimal(bi, getField().getScale());
+ return bd;
+ }
+ <#break>
<#case "VarChar">
@Override
public ${friendlyType} getObject(int index) {
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/util/DecimalUtility.java b/exec/vector/src/main/java/org/apache/drill/exec/util/DecimalUtility.java
index 914d68dee..82809547d 100644
--- a/exec/vector/src/main/java/org/apache/drill/exec/util/DecimalUtility.java
+++ b/exec/vector/src/main/java/org/apache/drill/exec/util/DecimalUtility.java
@@ -152,18 +152,18 @@ public class DecimalUtility extends CoreDecimalUtility{
return getBigDecimalFromDrillBuf(data, startIndex, nDecimalDigits, scale, false);
}
- public static BigDecimal getBigDecimalFromSparse(DrillBuf data, int startIndex, int nDecimalDigits, int scale) {
+ public static BigDecimal getBigDecimalFromSparse(DrillBuf data, int startIndex, int nDecimalDigits, int scale) {
- // In the sparse representation we pad the scale with zeroes for ease of arithmetic, need to truncate
- return getBigDecimalFromDrillBuf(data, startIndex, nDecimalDigits, scale, true);
- }
+ // In the sparse representation we pad the scale with zeroes for ease of arithmetic, need to truncate
+ return getBigDecimalFromDrillBuf(data, startIndex, nDecimalDigits, scale, true);
+ }
- public static BigDecimal getBigDecimalFromDrillBuf(DrillBuf bytebuf, int start, int length, int scale) {
- byte[] value = new byte[length];
- bytebuf.getBytes(start, value, 0, length);
- BigInteger unscaledValue = new BigInteger(value);
- return new BigDecimal(unscaledValue, scale);
- }
+ public static BigDecimal getBigDecimalFromDrillBuf(DrillBuf bytebuf, int start, int length, int scale) {
+ byte[] value = new byte[length];
+ bytebuf.getBytes(start, value, 0, length);
+ BigInteger unscaledValue = new BigInteger(value);
+ return new BigDecimal(unscaledValue, scale);
+ }
public static BigDecimal getBigDecimalFromByteBuffer(ByteBuffer bytebuf, int start, int length, int scale) {
byte[] value = new byte[length];
@@ -355,11 +355,29 @@ public class DecimalUtility extends CoreDecimalUtility{
scale -= MAX_DIGITS;
}
- // Set the negative sign
- if (sign == true) {
- data.setInt(startIndex, data.getInt(startIndex) | 0x80000000);
+ // Set the negative sign
+ if (sign == true) {
+ data.setInt(startIndex, data.getInt(startIndex) | 0x80000000);
+ }
+
+ }
+
+ /**
+ * Converts from an input BigDecimal into varying width "VarDecimal" representation.
+ * The object that manages the "data" is assumed to already have the proper scale set,
+ * matching that of input.scale().
+ * @param input input decimal number to be stored
+ * @param data destination buffer to store the byte array representation of input
+ * @param startIndex starting index in data to hold the bytes
+ * @return startIndex + length of bytes stored (i.e., the next startIndex in the data buffer)
+ */
+ public static int getVarDecimalFromBigDecimal(BigDecimal input, ByteBuf data, int startIndex) {
+ byte[] bytes = input.unscaledValue().toByteArray();
+ int len = bytes.length;
+ data.setBytes(startIndex, bytes);
+ //System.out.println("getVarDecimal start " + startIndex + " len " + len + " value " + input);
+ return startIndex + len;
}
- }
public static long getDecimal18FromBigDecimal(BigDecimal input, int scale, int precision) {
// Truncate or pad to set the input to the correct scale
@@ -433,7 +451,31 @@ public class DecimalUtility extends CoreDecimalUtility{
buffer.setInt(start + (index * 4), value);
}
- public static int compareSparseBytes(DrillBuf left, int leftStart, boolean leftSign, int leftScale, int leftPrecision, DrillBuf right, int rightStart, boolean rightSign, int rightPrecision, int rightScale, int width, int nDecimalDigits, boolean absCompare) {
+ /**
+ * Compares two VarDecimal values, still stored in their respective Drill buffers
+ * @param left left value Drill buffer
+ * @param leftStart start offset of left value
+ * @param leftEnd end offset of left value
+ * @param leftScale scale of left value
+ * @param right right value Drill buffer
+ * @param rightStart start offset of right value
+ * @param rightEnd end offset of right value
+ * @param rightScale scale of right value
+ * @param absCompare comparison of absolute values is done iff this is true
+ * @return 1 if left > right, 0 if left = right, -1 if left < right. two values that are numerically equal, but with different
+ * scales (e.g., 2.00 and 2), are considered equal.
+ */
+ public static int compareVarLenBytes(DrillBuf left, int leftStart, int leftEnd, int leftScale, DrillBuf right, int rightStart, int rightEnd, int rightScale, boolean absCompare) {
+ java.math.BigDecimal bdLeft = getBigDecimalFromDrillBuf(left, leftStart, leftEnd - leftStart, leftScale);
+ java.math.BigDecimal bdRight = getBigDecimalFromDrillBuf(right, rightStart, rightEnd - rightStart, rightScale);
+ if (absCompare) {
+ bdLeft = bdLeft.abs();
+ bdRight = bdRight.abs();
+ }
+ return bdLeft.compareTo(bdRight);
+ }
+
+ public static int compareSparseBytes(DrillBuf left, int leftStart, boolean leftSign, int leftScale, int leftPrecision, DrillBuf right, int rightStart, boolean rightSign, int rightPrecision, int rightScale, int width, int nDecimalDigits, boolean absCompare) {
int invert = 1;
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/impl/MapOrListWriterImpl.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/impl/MapOrListWriterImpl.java
index b074abd17..1fa785748 100644
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/impl/MapOrListWriterImpl.java
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/impl/MapOrListWriterImpl.java
@@ -28,6 +28,7 @@ import org.apache.drill.exec.vector.complex.writer.Decimal28SparseWriter;
import org.apache.drill.exec.vector.complex.writer.Decimal38DenseWriter;
import org.apache.drill.exec.vector.complex.writer.Decimal38SparseWriter;
import org.apache.drill.exec.vector.complex.writer.Decimal9Writer;
+import org.apache.drill.exec.vector.complex.writer.VarDecimalWriter;
import org.apache.drill.exec.vector.complex.writer.Float4Writer;
import org.apache.drill.exec.vector.complex.writer.Float8Writer;
import org.apache.drill.exec.vector.complex.writer.IntWriter;
@@ -223,6 +224,11 @@ public class MapOrListWriterImpl implements MapOrListWriter {
}
@Override
+ public VarDecimalWriter varDecimal(String name) {
+ return (map != null) ? map.varDecimal(name) : list.varDecimal();
+ }
+
+ @Override
public Decimal38SparseWriter decimal38Sparse(String name) {
return (map != null) ? map.decimal38Sparse(name) : list.decimal38Sparse();
}