diff options
author | Dave Oshinsky <daveoshinsky@yahoo.com> | 2016-02-09 17:37:47 -0500 |
---|---|---|
committer | Volodymyr Vysotskyi <vvovyk@gmail.com> | 2018-05-04 20:30:50 +0300 |
commit | 79e27eadb86dfaa0e2d8bc514f3069bf02dc2762 (patch) | |
tree | ed49b2eb2d0cf3a31901088db29784581b99c844 /exec/vector | |
parent | 24193b1b038a6315681a65c76a67034b64f71fc5 (diff) |
DRILL-4184: Support variable length decimal fields in parquet
Diffstat (limited to 'exec/vector')
11 files changed, 138 insertions, 30 deletions
diff --git a/exec/vector/src/main/codegen/data/ValueVectorTypes.tdd b/exec/vector/src/main/codegen/data/ValueVectorTypes.tdd index 4d719b4f5..ca4653d3e 100644 --- a/exec/vector/src/main/codegen/data/ValueVectorTypes.tdd +++ b/exec/vector/src/main/codegen/data/ValueVectorTypes.tdd @@ -178,6 +178,15 @@ ] }, { + major: "VarLen", + width: 4, + javaType: "int", + boxedType: "DrillBuf", + minor: [ + { class: "VarDecimal", friendlyType: "BigDecimal", fields: [{name: "start", type: "int"}, {name: "end", type: "int"}, {name: "buffer", type: "DrillBuf"}, {name: "scale", type: "int", include: false}] } + ] + }, + { major: "Bit", width: 1, javaType: "int", diff --git a/exec/vector/src/main/codegen/templates/BaseWriter.java b/exec/vector/src/main/codegen/templates/BaseWriter.java index c65a75809..ad9c44e1f 100644 --- a/exec/vector/src/main/codegen/templates/BaseWriter.java +++ b/exec/vector/src/main/codegen/templates/BaseWriter.java @@ -111,6 +111,7 @@ package org.apache.drill.exec.vector.complex.writer; UInt8Writer uInt8(String name); VarCharWriter varChar(String name); Var16CharWriter var16Char(String name); + VarDecimalWriter varDecimal(String name); TinyIntWriter tinyInt(String name); SmallIntWriter smallInt(String name); IntWriter integer(String name); diff --git a/exec/vector/src/main/codegen/templates/ColumnAccessors.java b/exec/vector/src/main/codegen/templates/ColumnAccessors.java index d0a2ace6f..6068afa22 100644 --- a/exec/vector/src/main/codegen/templates/ColumnAccessors.java +++ b/exec/vector/src/main/codegen/templates/ColumnAccessors.java @@ -112,9 +112,9 @@ public class ColumnAccessors { <#if accessorType=="BigDecimal"> <#assign label="Decimal"> </#if> - <#assign varWidth = drillType == "VarChar" || drillType == "Var16Char" || drillType == "VarBinary" /> + <#assign varWidth = drillType == "VarChar" || drillType == "Var16Char" || drillType == "VarBinary" || drillType == "VarDecimal"/> <#assign decimal = drillType == "Decimal9" || drillType == "Decimal18" || - drillType == "Decimal28Sparse" || drillType == "Decimal38Sparse" /> + drillType == "Decimal28Sparse" || drillType == "Decimal38Sparse" || drillType == "VarDecimal"/> <#if varWidth> <#assign accessorType = "byte[]"> <#assign label = "Bytes"> @@ -135,15 +135,15 @@ public class ColumnAccessors { <#if varWidth> public static class ${drillType}ColumnReader extends BaseVarWidthReader { - + <#else> public static class ${drillType}ColumnReader extends BaseFixedWidthReader { - + private static final int VALUE_WIDTH = ${drillType}Vector.VALUE_WIDTH; <#if decimal> private MajorType type; - + </#if> </#if> <#if decimal> @@ -190,12 +190,12 @@ public class ColumnAccessors { <#elseif drillType == "IntervalDay"> final int offset = ${getOffset}; return DateUtilities.fromIntervalDay( - buf.getInt(offset), + buf.getInt(offset), buf.getInt(offset + ${minor.millisecondsOffset})); <#elseif drillType == "Interval"> final int offset = ${getOffset}; return DateUtilities.fromInterval( - buf.getInt(offset), + buf.getInt(offset), buf.getInt(offset + ${minor.daysOffset}), buf.getInt(offset + ${minor.millisecondsOffset})); <#elseif drillType == "Decimal28Sparse" || drillType == "Decimal38Sparse"> @@ -241,9 +241,9 @@ public class ColumnAccessors { public static class ${drillType}ColumnWriter extends BaseVarWidthWriter { <#else> public static class ${drillType}ColumnWriter extends BaseFixedWidthWriter { - + private static final int VALUE_WIDTH = ${drillType}Vector.VALUE_WIDTH; - + <#if decimal> private MajorType type; </#if> @@ -272,7 +272,17 @@ public class ColumnAccessors { </#if> @Override + <#if drillType = "VarDecimal"> + public final void setDecimal(final BigDecimal bd) { + byte[] barr = bd.unscaledValue().toByteArray(); + int len = barr.length; + setBytes(barr, len); + } + + public final void setBytes(final byte[] value, int len) { + <#else> public final void set${label}(final ${accessorType} value${putArgs}) { + </#if> <#-- Must compute the write offset first; can't be inline because the writeOffset() function has a side effect of possibly changing the buffer address (bufAddr). --> @@ -355,7 +365,7 @@ import org.apache.drill.exec.vector.accessor.reader.BaseScalarReader; import org.apache.drill.exec.vector.accessor.writer.BaseScalarWriter; public class ColumnAccessorUtils { - + private ColumnAccessorUtils() { } <@build vv.types "Required" "Reader" /> diff --git a/exec/vector/src/main/codegen/templates/ComplexWriters.java b/exec/vector/src/main/codegen/templates/ComplexWriters.java index cfa049d12..6e1d8f3e2 100644 --- a/exec/vector/src/main/codegen/templates/ComplexWriters.java +++ b/exec/vector/src/main/codegen/templates/ComplexWriters.java @@ -98,7 +98,11 @@ public class ${eName}WriterImpl extends AbstractFieldWriter { <#if !(minor.class == "Decimal9" || minor.class == "Decimal18" || minor.class == "Decimal28Sparse" || minor.class == "Decimal38Sparse" || minor.class == "Decimal28Dense" || minor.class == "Decimal38Dense")> public void write${minor.class}(<#list fields as field>${field.type} ${field.name}<#if field_has_next>, </#if></#list>) { + <#if minor.class == "VarDecimal"> + mutator.addSafe(idx(), <#list fields as field><#if field.name == "scale"><#break></#if>${field.name}<#if field_has_next && fields[field_index+1].name != "scale" >, </#if></#list>); + <#else> mutator.addSafe(idx(), <#list fields as field>${field.name}<#if field_has_next>, </#if></#list>); + </#if> vector.getMutator().setValueCount(idx()+1); } </#if> @@ -123,7 +127,11 @@ public class ${eName}WriterImpl extends AbstractFieldWriter { <#if !(minor.class == "Decimal9" || minor.class == "Decimal18" || minor.class == "Decimal28Sparse" || minor.class == "Decimal38Sparse" || minor.class == "Decimal28Dense" || minor.class == "Decimal38Dense")> public void write${minor.class}(<#list fields as field>${field.type} ${field.name}<#if field_has_next>, </#if></#list>) { + <#if minor.class == "VarDecimal"> + mutator.setSafe(idx(), <#if mode == "Nullable">1, </#if><#list fields as field><#if field.name == "scale"><#break></#if>${field.name}<#if field_has_next && fields[field_index+1].name != "scale" >, </#if></#list>); + <#else> mutator.setSafe(idx(), <#if mode == "Nullable">1, </#if><#list fields as field>${field.name}<#if field_has_next>, </#if></#list>); + </#if> vector.getMutator().setValueCount(idx()+1); } diff --git a/exec/vector/src/main/codegen/templates/HolderReaderImpl.java b/exec/vector/src/main/codegen/templates/HolderReaderImpl.java index 0eca723ce..e46989ba5 100644 --- a/exec/vector/src/main/codegen/templates/HolderReaderImpl.java +++ b/exec/vector/src/main/codegen/templates/HolderReaderImpl.java @@ -160,6 +160,8 @@ public class ${holderMode}${name}HolderReaderImpl extends AbstractFieldReader { <#if minor.class == "VarBinary"> return value; +<#elseif minor.class == "VarDecimal"> + return org.apache.drill.exec.util.DecimalUtility.getBigDecimalFromDrillBuf(holder.buffer, holder.start, holder.end-holder.start, holder.scale); <#elseif minor.class == "Var16Char"> return new String(value); <#elseif minor.class == "VarChar"> @@ -233,6 +235,8 @@ public class ${holderMode}${name}HolderReaderImpl extends AbstractFieldReader { <#if minor.class == "VarBinary"> return value; +<#elseif minor.class == "VarDecimal"> + return org.apache.drill.exec.util.DecimalUtility.getBigDecimalFromDrillBuf(holder.buffer, holder.start, holder.end-holder.start, holder.scale); <#elseif minor.class == "Var16Char"> return new String(value); <#elseif minor.class == "VarChar"> diff --git a/exec/vector/src/main/codegen/templates/RepeatedValueVectors.java b/exec/vector/src/main/codegen/templates/RepeatedValueVectors.java index 412f498c2..2b2b6bd9a 100644 --- a/exec/vector/src/main/codegen/templates/RepeatedValueVectors.java +++ b/exec/vector/src/main/codegen/templates/RepeatedValueVectors.java @@ -405,11 +405,19 @@ public final class Repeated${minor.class}Vector extends BaseRepeatedValueVector } <#if (fields?size > 1) && !(minor.class == "Decimal9" || minor.class == "Decimal18" || minor.class == "Decimal28Sparse" || minor.class == "Decimal38Sparse" || minor.class == "Decimal28Dense" || minor.class == "Decimal38Dense")> + <#if minor.class == "VarDecimal"> + public void addSafe(int arrayIndex, <#list fields as field><#if field.name == "scale"><#break></#if>${field.type} ${field.name}<#if field_has_next && fields[field_index+1].name != "scale" >, </#if></#list>) { + int nextOffset = offsets.getAccessor().get(arrayIndex+1); + values.getMutator().setSafe(nextOffset, <#list fields as field><#if field.name == "scale"><#break></#if>${field.name}<#if field_has_next && fields[field_index+1].name != "scale">, </#if></#list>); + offsets.getMutator().setSafe(arrayIndex+1, nextOffset+1); + } + <#else> public void addSafe(int rowIndex, <#list fields as field>${field.type} ${field.name}<#if field_has_next>, </#if></#list>) { final int nextOffset = offsets.getAccessor().get(rowIndex+1); values.getMutator().setSafe(nextOffset, <#list fields as field>${field.name}<#if field_has_next>, </#if></#list>); offsets.getMutator().setSafe(rowIndex+1, nextOffset+1); } + </#if> </#if> <#if minor.class == "Decimal28Sparse" || minor.class == "Decimal38Sparse"> diff --git a/exec/vector/src/main/codegen/templates/UnionReader.java b/exec/vector/src/main/codegen/templates/UnionReader.java index 40ad89b82..54276f573 100644 --- a/exec/vector/src/main/codegen/templates/UnionReader.java +++ b/exec/vector/src/main/codegen/templates/UnionReader.java @@ -34,14 +34,14 @@ package org.apache.drill.exec.vector.complex.impl; @SuppressWarnings("unused") public class UnionReader extends AbstractFieldReader { - private BaseReader[] readers = new BaseReader[43]; + private BaseReader[] readers = new BaseReader[44]; public UnionVector data; public UnionReader(UnionVector data) { this.data = data; } - private static MajorType[] TYPES = new MajorType[43]; + private static MajorType[] TYPES = new MajorType[44]; static { for (MinorType minorType : MinorType.values()) { diff --git a/exec/vector/src/main/codegen/templates/ValueHolders.java b/exec/vector/src/main/codegen/templates/ValueHolders.java index 9982bd419..d5b4342df 100644 --- a/exec/vector/src/main/codegen/templates/ValueHolders.java +++ b/exec/vector/src/main/codegen/templates/ValueHolders.java @@ -90,9 +90,20 @@ public final class ${className} implements ValueHolder{ public static boolean getSign(int start, DrillBuf buffer) { return ((buffer.getInt(start) & 0x80000000) != 0); } - </#if></#if> - public MajorType getType() {return TYPE;} + public java.math.BigDecimal getBigDecimal() { + java.math.BigDecimal currentValue = org.apache.drill.exec.util.DecimalUtility.getBigDecimalFromSparse(buffer, start, nDecimalDigits, scale); + return currentValue; + } + </#if></#if> + + <#if minor.class.startsWith("VarDecimal")> + public java.math.BigDecimal getBigDecimal() { + //System.out.println("valueHolder start " + start + " end " + " end " + " scale " + scale); + java.math.BigDecimal currentValue = org.apache.drill.exec.util.DecimalUtility.getBigDecimalFromDrillBuf(buffer, start, end-start, scale); + return currentValue; + } + </#if> @Deprecated public int hashCode(){ @@ -116,4 +127,4 @@ public final class ${className} implements ValueHolder{ </#list> </#list> -</#list>
\ No newline at end of file +</#list> diff --git a/exec/vector/src/main/codegen/templates/VariableLengthVectors.java b/exec/vector/src/main/codegen/templates/VariableLengthVectors.java index 876d6880d..8ab4c3a48 100644 --- a/exec/vector/src/main/codegen/templates/VariableLengthVectors.java +++ b/exec/vector/src/main/codegen/templates/VariableLengthVectors.java @@ -474,6 +474,15 @@ public final class ${minor.class}Vector extends BaseDataValueVector implements V } <#switch minor.class> + <#case "VarDecimal"> + @Override + public ${friendlyType} getObject(int index) { + byte[] b = get(index); + BigInteger bi = b.length == 0 ? new BigInteger("0") : new BigInteger(b); + BigDecimal bd = new BigDecimal(bi, getField().getScale()); + return bd; + } + <#break> <#case "VarChar"> @Override public ${friendlyType} getObject(int index) { diff --git a/exec/vector/src/main/java/org/apache/drill/exec/util/DecimalUtility.java b/exec/vector/src/main/java/org/apache/drill/exec/util/DecimalUtility.java index 914d68dee..82809547d 100644 --- a/exec/vector/src/main/java/org/apache/drill/exec/util/DecimalUtility.java +++ b/exec/vector/src/main/java/org/apache/drill/exec/util/DecimalUtility.java @@ -152,18 +152,18 @@ public class DecimalUtility extends CoreDecimalUtility{ return getBigDecimalFromDrillBuf(data, startIndex, nDecimalDigits, scale, false); } - public static BigDecimal getBigDecimalFromSparse(DrillBuf data, int startIndex, int nDecimalDigits, int scale) { + public static BigDecimal getBigDecimalFromSparse(DrillBuf data, int startIndex, int nDecimalDigits, int scale) { - // In the sparse representation we pad the scale with zeroes for ease of arithmetic, need to truncate - return getBigDecimalFromDrillBuf(data, startIndex, nDecimalDigits, scale, true); - } + // In the sparse representation we pad the scale with zeroes for ease of arithmetic, need to truncate + return getBigDecimalFromDrillBuf(data, startIndex, nDecimalDigits, scale, true); + } - public static BigDecimal getBigDecimalFromDrillBuf(DrillBuf bytebuf, int start, int length, int scale) { - byte[] value = new byte[length]; - bytebuf.getBytes(start, value, 0, length); - BigInteger unscaledValue = new BigInteger(value); - return new BigDecimal(unscaledValue, scale); - } + public static BigDecimal getBigDecimalFromDrillBuf(DrillBuf bytebuf, int start, int length, int scale) { + byte[] value = new byte[length]; + bytebuf.getBytes(start, value, 0, length); + BigInteger unscaledValue = new BigInteger(value); + return new BigDecimal(unscaledValue, scale); + } public static BigDecimal getBigDecimalFromByteBuffer(ByteBuffer bytebuf, int start, int length, int scale) { byte[] value = new byte[length]; @@ -355,11 +355,29 @@ public class DecimalUtility extends CoreDecimalUtility{ scale -= MAX_DIGITS; } - // Set the negative sign - if (sign == true) { - data.setInt(startIndex, data.getInt(startIndex) | 0x80000000); + // Set the negative sign + if (sign == true) { + data.setInt(startIndex, data.getInt(startIndex) | 0x80000000); + } + + } + + /** + * Converts from an input BigDecimal into varying width "VarDecimal" representation. + * The object that manages the "data" is assumed to already have the proper scale set, + * matching that of input.scale(). + * @param input input decimal number to be stored + * @param data destination buffer to store the byte array representation of input + * @param startIndex starting index in data to hold the bytes + * @return startIndex + length of bytes stored (i.e., the next startIndex in the data buffer) + */ + public static int getVarDecimalFromBigDecimal(BigDecimal input, ByteBuf data, int startIndex) { + byte[] bytes = input.unscaledValue().toByteArray(); + int len = bytes.length; + data.setBytes(startIndex, bytes); + //System.out.println("getVarDecimal start " + startIndex + " len " + len + " value " + input); + return startIndex + len; } - } public static long getDecimal18FromBigDecimal(BigDecimal input, int scale, int precision) { // Truncate or pad to set the input to the correct scale @@ -433,7 +451,31 @@ public class DecimalUtility extends CoreDecimalUtility{ buffer.setInt(start + (index * 4), value); } - public static int compareSparseBytes(DrillBuf left, int leftStart, boolean leftSign, int leftScale, int leftPrecision, DrillBuf right, int rightStart, boolean rightSign, int rightPrecision, int rightScale, int width, int nDecimalDigits, boolean absCompare) { + /** + * Compares two VarDecimal values, still stored in their respective Drill buffers + * @param left left value Drill buffer + * @param leftStart start offset of left value + * @param leftEnd end offset of left value + * @param leftScale scale of left value + * @param right right value Drill buffer + * @param rightStart start offset of right value + * @param rightEnd end offset of right value + * @param rightScale scale of right value + * @param absCompare comparison of absolute values is done iff this is true + * @return 1 if left > right, 0 if left = right, -1 if left < right. two values that are numerically equal, but with different + * scales (e.g., 2.00 and 2), are considered equal. + */ + public static int compareVarLenBytes(DrillBuf left, int leftStart, int leftEnd, int leftScale, DrillBuf right, int rightStart, int rightEnd, int rightScale, boolean absCompare) { + java.math.BigDecimal bdLeft = getBigDecimalFromDrillBuf(left, leftStart, leftEnd - leftStart, leftScale); + java.math.BigDecimal bdRight = getBigDecimalFromDrillBuf(right, rightStart, rightEnd - rightStart, rightScale); + if (absCompare) { + bdLeft = bdLeft.abs(); + bdRight = bdRight.abs(); + } + return bdLeft.compareTo(bdRight); + } + + public static int compareSparseBytes(DrillBuf left, int leftStart, boolean leftSign, int leftScale, int leftPrecision, DrillBuf right, int rightStart, boolean rightSign, int rightPrecision, int rightScale, int width, int nDecimalDigits, boolean absCompare) { int invert = 1; diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/impl/MapOrListWriterImpl.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/impl/MapOrListWriterImpl.java index b074abd17..1fa785748 100644 --- a/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/impl/MapOrListWriterImpl.java +++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/impl/MapOrListWriterImpl.java @@ -28,6 +28,7 @@ import org.apache.drill.exec.vector.complex.writer.Decimal28SparseWriter; import org.apache.drill.exec.vector.complex.writer.Decimal38DenseWriter; import org.apache.drill.exec.vector.complex.writer.Decimal38SparseWriter; import org.apache.drill.exec.vector.complex.writer.Decimal9Writer; +import org.apache.drill.exec.vector.complex.writer.VarDecimalWriter; import org.apache.drill.exec.vector.complex.writer.Float4Writer; import org.apache.drill.exec.vector.complex.writer.Float8Writer; import org.apache.drill.exec.vector.complex.writer.IntWriter; @@ -223,6 +224,11 @@ public class MapOrListWriterImpl implements MapOrListWriter { } @Override + public VarDecimalWriter varDecimal(String name) { + return (map != null) ? map.varDecimal(name) : list.varDecimal(); + } + + @Override public Decimal38SparseWriter decimal38Sparse(String name) { return (map != null) ? map.decimal38Sparse(name) : list.decimal38Sparse(); } |