aboutsummaryrefslogtreecommitdiff
path: root/exec/vector
diff options
context:
space:
mode:
authorPaul Rogers <progers@cloudera.com>2018-03-10 23:43:36 -0800
committerArina Ielchiieva <arina.yelchiyeva@gmail.com>2018-04-06 12:05:14 +0300
commit4f2182e41f4474ca42ae6d572a9c5d5ff274d984 (patch)
treeb6141e433a5aede597efd197141bfe22b1cfcfa6 /exec/vector
parent127e4150b9495c465f8c37a534dfd50512013765 (diff)
DRILL-6230: Extend row set readers to handle hyper vectors
closes #1161
Diffstat (limited to 'exec/vector')
-rw-r--r--exec/vector/src/main/codegen/templates/ColumnAccessors.java251
-rw-r--r--exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ArrayReader.java40
-rw-r--r--exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ColumnReader.java83
-rw-r--r--exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ColumnReaderIndex.java102
-rw-r--r--exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ObjectReader.java26
-rw-r--r--exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ScalarElementReader.java65
-rw-r--r--exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ScalarReader.java13
-rw-r--r--exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/TupleReader.java31
-rw-r--r--exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/UnsupportedConversionError.java52
-rw-r--r--exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ValueType.java57
-rw-r--r--exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/impl/VectorPrinter.java72
-rw-r--r--exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/package-info.java184
-rw-r--r--exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/AbstractArrayReader.java188
-rw-r--r--exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/AbstractObjectReader.java21
-rw-r--r--exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/AbstractScalarReader.java205
-rw-r--r--exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/AbstractTupleReader.java85
-rw-r--r--exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/ArrayReaderImpl.java357
-rw-r--r--exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/BaseElementReader.java187
-rw-r--r--exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/BaseScalarReader.java198
-rw-r--r--exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/ColumnReaderFactory.java52
-rw-r--r--exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/MapReader.java49
-rw-r--r--exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/NullStateReader.java52
-rw-r--r--exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/NullStateReaders.java193
-rw-r--r--exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/ObjectArrayReader.java159
-rw-r--r--exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/OffsetVectorReader.java70
-rw-r--r--exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/ReaderEvents.java (renamed from exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/FixedWidthElementReaderIndex.java)19
-rw-r--r--exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/ScalarArrayReader.java102
-rw-r--r--exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/VectorAccessor.java5
-rw-r--r--exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/VectorAccessors.java344
-rw-r--r--exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/package-info.java65
-rw-r--r--exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/ColumnWriterFactory.java4
-rw-r--r--exec/vector/src/main/java/org/apache/drill/exec/vector/complex/ListVector.java2
32 files changed, 2189 insertions, 1144 deletions
diff --git a/exec/vector/src/main/codegen/templates/ColumnAccessors.java b/exec/vector/src/main/codegen/templates/ColumnAccessors.java
index 14ec1e879..4836099b1 100644
--- a/exec/vector/src/main/codegen/templates/ColumnAccessors.java
+++ b/exec/vector/src/main/codegen/templates/ColumnAccessors.java
@@ -1,3 +1,4 @@
+<#macro copyright>
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
@@ -16,9 +17,10 @@
* limitations under the License.
*/
+// This class is generated using Freemarker and the ${.template_name} template.
+</#macro>
<@pp.dropOutputFile />
<@pp.changeOutputFile name="/org/apache/drill/exec/vector/accessor/ColumnAccessors.java" />
-<#include "/@includes/license.ftl" />
<#macro getType drillType label>
@Override
public ValueType valueType() {
@@ -31,74 +33,6 @@
</#if>
}
</#macro>
-<#macro bindReader vectorPrefix drillType isArray >
- <#if drillType = "Decimal9" || drillType == "Decimal18">
- private MajorType type;
- </#if>
- private ${vectorPrefix}${drillType}Vector.Accessor accessor;
-
- @Override
- public void bindVector(ValueVector vector) {
- <#if drillType = "Decimal9" || drillType == "Decimal18">
- type = vector.getField().getType();
- </#if>
- accessor = ((${vectorPrefix}${drillType}Vector) vector).getAccessor();
- }
-
- <#if drillType = "Decimal9" || drillType == "Decimal18">
- @Override
- public void bindVector(MajorType type, VectorAccessor va) {
- super.bindVector(type, va);
- this.type = type;
- }
-
- </#if>
- private ${vectorPrefix}${drillType}Vector.Accessor accessor() {
- if (vectorAccessor == null) {
- return accessor;
- } else {
- return ((${vectorPrefix}${drillType}Vector) vectorAccessor.vector()).getAccessor();
- }
- }
-</#macro>
-<#macro get drillType accessorType label isArray>
- @Override
- public ${accessorType} get${label}(<#if isArray>int index</#if>) {
- <#assign getObject ="getObject"/>
- <#if isArray>
- <#assign indexVar = "index"/>
- <#else>
- <#assign indexVar = ""/>
- </#if>
- <#if drillType == "VarChar" || drillType == "Var16Char" || drillType == "VarBinary">
- return accessor().get(vectorIndex.vectorIndex(${indexVar}));
- <#elseif drillType == "Decimal9" || drillType == "Decimal18">
- return DecimalUtility.getBigDecimalFromPrimitiveTypes(
- accessor().get(vectorIndex.vectorIndex(${indexVar})),
- type.getScale(),
- type.getPrecision());
- <#elseif accessorType == "BigDecimal" || accessorType == "Period">
- return accessor().${getObject}(vectorIndex.vectorIndex(${indexVar}));
- <#elseif drillType == "UInt1">
- return ((int) accessor().get(vectorIndex.vectorIndex(${indexVar}))) & 0xFF;
- <#else>
- return accessor().get(vectorIndex.vectorIndex(${indexVar}));
- </#if>
- }
- <#if drillType == "VarChar">
-
- @Override
- public String getString(<#if isArray>int index</#if>) {
- return new String(getBytes(${indexVar}), Charsets.UTF_8);
- }
- <#elseif drillType == "Var16Char">
-
- @Override
- public String getString(<#if isArray>int index</#if>) {
- return new String(getBytes(${indexVar}), Charsets.UTF_16);
- }
- </#if>
-</#macro>
<#macro build types vectorType accessorType>
<#if vectorType == "Repeated">
<#assign fnPrefix = "Array" />
@@ -126,24 +60,27 @@
</#list>
}
</#macro>
+<@copyright />
package org.apache.drill.exec.vector.accessor;
import java.math.BigDecimal;
import org.apache.drill.common.types.TypeProtos.MajorType;
-import org.apache.drill.common.types.TypeProtos.MinorType;
+import org.apache.drill.exec.vector.DateUtilities;
+import org.apache.drill.exec.record.metadata.ColumnMetadata;
import org.apache.drill.exec.vector.*;
import org.apache.drill.exec.util.DecimalUtility;
-import org.apache.drill.exec.vector.accessor.reader.BaseScalarReader;
-import org.apache.drill.exec.vector.accessor.reader.BaseElementReader;
+import org.apache.drill.exec.vector.accessor.reader.BaseScalarReader.BaseVarWidthReader;
+import org.apache.drill.exec.vector.accessor.reader.BaseScalarReader.BaseFixedWidthReader;
import org.apache.drill.exec.vector.accessor.reader.VectorAccessor;
-import org.apache.drill.exec.vector.accessor.writer.BaseScalarWriter;
import org.apache.drill.exec.vector.accessor.writer.AbstractFixedWidthWriter.BaseFixedWidthWriter;
import org.apache.drill.exec.vector.accessor.writer.BaseVarWidthWriter;
import com.google.common.base.Charsets;
+import io.netty.buffer.DrillBuf;
+
import org.joda.time.Period;
/**
@@ -161,8 +98,6 @@ import org.joda.time.Period;
* row.)
*/
-// This class is generated using freemarker and the ${.template_name} template.
-
public class ColumnAccessors {
<#list vv.types as type>
@@ -177,56 +112,141 @@ public class ColumnAccessors {
<#if accessorType=="BigDecimal">
<#assign label="Decimal">
</#if>
- <#if drillType == "VarChar" || drillType == "Var16Char">
+ <#assign varWidth = drillType == "VarChar" || drillType == "Var16Char" || drillType == "VarBinary" />
+ <#assign decimal = drillType == "Decimal9" || drillType == "Decimal18" ||
+ drillType == "Decimal28Sparse" || drillType == "Decimal38Sparse" />
+ <#if varWidth>
<#assign accessorType = "byte[]">
<#assign label = "Bytes">
+ <#assign putArgs = ", int len">
+ <#else>
+ <#assign putArgs = "">
+ </#if>
+ <#if javaType == "char">
+ <#assign putType = "short" />
+ <#assign doCast = true />
+ <#else>
+ <#assign putType = javaType />
+ <#assign doCast = (cast == "set") />
</#if>
<#if ! notyet>
//------------------------------------------------------------------------
// ${drillType} readers and writers
- public static class ${drillType}ColumnReader extends BaseScalarReader {
+ <#if varWidth>
+ public static class ${drillType}ColumnReader extends BaseVarWidthReader {
+
+ <#else>
+ public static class ${drillType}ColumnReader extends BaseFixedWidthReader {
+
+ private static final int VALUE_WIDTH = ${drillType}Vector.VALUE_WIDTH;
- <@bindReader "" drillType false />
+ <#if decimal>
+ private MajorType type;
+
+ </#if>
+ </#if>
+ <#if decimal>
+ @Override
+ public void bindVector(ColumnMetadata schema, VectorAccessor va) {
+ super.bindVector(schema, va);
+ <#if decimal>
+ type = va.type();
+ </#if>
+ }
+ </#if>
<@getType drillType label />
- <@get drillType accessorType label false/>
- }
-
- public static class Nullable${drillType}ColumnReader extends BaseScalarReader {
-
- <@bindReader "Nullable" drillType false />
-
- <@getType drillType label />
+ <#if ! varWidth>
+ @Override public int width() { return VALUE_WIDTH; }
+ </#if>
@Override
- public boolean isNull() {
- return accessor().isNull(vectorIndex.vectorIndex());
+ public ${accessorType} get${label}() {
+ <#assign getObject ="getObject"/>
+ <#assign indexVar = ""/>
+ final DrillBuf buf = bufferAccessor.buffer();
+ <#if ! varWidth>
+ final int readOffset = vectorIndex.offset();
+ <#assign getOffset = "readOffset * VALUE_WIDTH">
+ </#if>
+ <#if varWidth>
+ final long entry = offsetsReader.getEntry();
+ return buf.unsafeGetMemory((int) (entry >> 32), (int) (entry & 0xFFFF_FFFF));
+ <#elseif drillType == "Decimal9">
+ return DecimalUtility.getBigDecimalFromPrimitiveTypes(
+ buf.getInt(${getOffset}),
+ type.getScale(),
+ type.getPrecision());
+ <#elseif drillType == "Decimal18">
+ return DecimalUtility.getBigDecimalFromPrimitiveTypes(
+ buf.getLong(${getOffset}),
+ type.getScale(),
+ type.getPrecision());
+ <#elseif drillType == "IntervalYear">
+ return DateUtilities.fromIntervalYear(
+ buf.getInt(${getOffset}));
+ <#elseif drillType == "IntervalDay">
+ final int offset = ${getOffset};
+ return DateUtilities.fromIntervalDay(
+ buf.getInt(offset),
+ buf.getInt(offset + ${minor.millisecondsOffset}));
+ <#elseif drillType == "Interval">
+ final int offset = ${getOffset};
+ return DateUtilities.fromInterval(
+ buf.getInt(offset),
+ buf.getInt(offset + ${minor.daysOffset}),
+ buf.getInt(offset + ${minor.millisecondsOffset}));
+ <#elseif drillType == "Decimal28Sparse" || drillType == "Decimal38Sparse">
+ return DecimalUtility.getBigDecimalFromSparse(buf, ${getOffset},
+ ${minor.nDecimalDigits}, type.getScale());
+ <#elseif drillType == "Decimal28Dense" || drillType == "Decimal38Dense">
+ return DecimalUtility.getBigDecimalFromDense(buf, ${getOffset},
+ ${minor.nDecimalDigits}, type.getScale(),
+ ${minor.maxPrecisionDigits}, VALUE_WIDTH);
+ <#elseif drillType == "UInt1">
+ return buf.getByte(${getOffset}) & 0xFF;
+ <#elseif drillType == "UInt2">
+ return buf.getShort(${getOffset}) & 0xFFFF;
+ <#elseif drillType == "UInt4">
+ // Should be the following:
+ // return ((long) buf.unsafeGetInt(${getOffset})) & 0xFFFF_FFFF;
+ // else, the unsigned values of 32 bits are mapped to negative.
+ return buf.getInt(${getOffset});
+ <#elseif drillType == "Float4">
+ return Float.intBitsToFloat(buf.getInt(${getOffset}));
+ <#elseif drillType == "Float8">
+ return Double.longBitsToDouble(buf.getLong(${getOffset}));
+ <#else>
+ return buf.get${putType?cap_first}(${getOffset});
+ </#if>
}
+ <#if drillType == "VarChar">
- <@get drillType accessorType label false />
- }
-
- public static class Repeated${drillType}ColumnReader extends BaseElementReader {
-
- <@bindReader "" drillType true />
-
- <@getType drillType label />
+ @Override
+ public String getString() {
+ return new String(getBytes(${indexVar}), Charsets.UTF_8);
+ }
+ <#elseif drillType == "Var16Char">
- <@get drillType accessorType label true />
+ @Override
+ public String getString() {
+ return new String(getBytes(${indexVar}), Charsets.UTF_16);
+ }
+ </#if>
}
- <#assign varWidth = drillType == "VarChar" || drillType == "Var16Char" || drillType == "VarBinary" />
<#if varWidth>
public static class ${drillType}ColumnWriter extends BaseVarWidthWriter {
<#else>
public static class ${drillType}ColumnWriter extends BaseFixedWidthWriter {
- <#if drillType = "Decimal9" || drillType == "Decimal18" ||
- drillType == "Decimal28Sparse" || drillType == "Decimal38Sparse">
+
+ private static final int VALUE_WIDTH = ${drillType}Vector.VALUE_WIDTH;
+
+ <#if decimal>
private MajorType type;
</#if>
- private static final int VALUE_WIDTH = ${drillType}Vector.VALUE_WIDTH;
</#if>
private final ${drillType}Vector vector;
@@ -234,8 +254,7 @@ public class ColumnAccessors {
<#if varWidth>
super(((${drillType}Vector) vector).getOffsetVector());
<#else>
- <#if drillType = "Decimal9" || drillType == "Decimal18" ||
- drillType == "Decimal28Sparse" || drillType == "Decimal38Sparse">
+ <#if decimal>
type = vector.getField().getType();
</#if>
</#if>
@@ -300,12 +319,12 @@ public class ColumnAccessors {
<#elseif drillType == "IntervalDay">
final int offset = ${putAddr};
drillBuf.setInt(offset, value.getDays());
- drillBuf.setInt(offset + 4, periodToMillis(value));
+ drillBuf.setInt(offset + 4, DateUtilities.periodToMillis(value));
<#elseif drillType == "Interval">
final int offset = ${putAddr};
drillBuf.setInt(offset, value.getYears() * 12 + value.getMonths());
drillBuf.setInt(offset + 4, value.getDays());
- drillBuf.setInt(offset + 8, periodToMillis(value));
+ drillBuf.setInt(offset + 8, DateUtilities.periodToMillis(value));
<#elseif drillType == "Float4">
drillBuf.setInt(${putAddr}, Float.floatToRawIntBits((float) value));
<#elseif drillType == "Float8">
@@ -335,18 +354,22 @@ public class ColumnAccessors {
</#if>
</#list>
</#list>
- public static int periodToMillis(Period value) {
- return ((value.getHours() * 60 +
- value.getMinutes()) * 60 +
- value.getSeconds()) * 1000 +
- value.getMillis();
- }
+}
+<@pp.changeOutputFile name="/org/apache/drill/exec/vector/accessor/ColumnAccessorUtils.java" />
+<@copyright />
-<@build vv.types "Required" "Reader" />
+package org.apache.drill.exec.vector.accessor;
-<@build vv.types "Nullable" "Reader" />
+import org.apache.drill.common.types.TypeProtos.MinorType;
+import org.apache.drill.exec.vector.accessor.ColumnAccessors.*;
+import org.apache.drill.exec.vector.accessor.reader.BaseScalarReader;
+import org.apache.drill.exec.vector.accessor.writer.BaseScalarWriter;
-<@build vv.types "Repeated" "Reader" />
+public class ColumnAccessorUtils {
+
+ private ColumnAccessorUtils() { }
+
+<@build vv.types "Required" "Reader" />
<@build vv.types "Required" "Writer" />
}
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ArrayReader.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ArrayReader.java
index 8f33f0ecf..0679c3b53 100644
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ArrayReader.java
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ArrayReader.java
@@ -32,7 +32,7 @@ package org.apache.drill.exec.vector.accessor;
* {@see ArrayWriter}
*/
-public interface ArrayReader {
+public interface ArrayReader extends ColumnReader {
/**
* Number of elements in the array.
@@ -50,26 +50,6 @@ public interface ArrayReader {
ObjectType entryType();
/**
- * Return a reader for the elements of a scalar array.
- * @return reader for scalar elements
- */
-
- ScalarElementReader elements();
-
- /**
- * Return a generic object reader for the array entry. Not available
- * for scalar elements. Positions the reader to read the selected
- * element.
- *
- * @param index array index
- * @return generic object reader
- */
-
- ObjectReader entry(int index);
- TupleReader tuple(int index);
- ArrayReader array(int index);
-
- /**
* Return the generic object reader for the array element. This
* version <i>does not</i> position the reader, the client must
* call {@link setPosn()} to set the position. This form allows
@@ -77,6 +57,7 @@ public interface ArrayReader {
*/
ObjectReader entry();
+ ScalarReader scalar();
TupleReader tuple();
ArrayReader array();
@@ -88,19 +69,14 @@ public interface ArrayReader {
void setPosn(int index);
- /**
- * Return the entire array as an <tt>List</tt> of objects.
- * Note, even if the array is scalar, the elements are still returned
- * as a list. This method is primarily for testing.
- * @return array as a <tt>List</tt> of objects
- */
-
- Object getObject();
+ void rewind();
/**
- * Return the entire array as a string. Primarily for debugging.
- * @return string representation of the array
+ * Move forward one position.
+ *
+ * @return true if another position is available, false if
+ * the end of the array is reached
*/
- String getAsString();
+ boolean next();
}
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ColumnReader.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ColumnReader.java
new file mode 100644
index 000000000..15e5c742c
--- /dev/null
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ColumnReader.java
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.vector.accessor;
+
+import org.apache.drill.exec.record.metadata.ColumnMetadata;
+
+/**
+ * Base interface for all column readers, defining a generic set of methods
+ * that all readers provide. In particular, given the metadata and the object
+ * type, one can determine what to do with each reader when working with readers
+ * generically. The <tt>getObject()</tt> and <tt>getAsString()</tt> methods provide
+ * generic data access for tests and debugging.
+ */
+
+public interface ColumnReader {
+
+ ColumnMetadata schema();
+
+ /**
+ * The type of this reader.
+ *
+ * @return type of reader
+ */
+
+ ObjectType type();
+
+ /**
+ * Determine if this value is null.
+ * <ul>
+ * <li>Nullable scalar: determine if the value is null.</li>
+ * <li>Non-nullable scalar: always returns <tt>false</tt>.</li>
+ * <li>Arrays: always returns </tt>false</tt.></li>
+ * <li>Lists: determine if the list for the current row is null.
+ * In a list, an array entry can be null, empty, or can contain
+ * items. In repeated types, the array itself is never null.
+ * If the array is null, then it implicitly has no entries.</li>
+ * <li>Map or Repeated Map: Always returns <tt>false</tt>.</li>
+ * <li>Map inside a union, or in a list that contains a union,
+ * the tuple itself can be null.</li>
+ * <li>Union: Determine if the current value is null. Null values have no type
+ * and no associated reader.</li>
+ * </ul>
+ *
+ * @return <tt>true</tt> if this value is null; <tt>false</tt> otherwise
+ */
+
+ boolean isNull();
+
+ /**
+ * Return the value of the underlying data as a Java object.
+ * Primarily for testing
+ * <ul>
+ * <li>Array: Return the entire array as an <tt>List</tt> of objects.
+ * Note, even if the array is scalar, the elements are still returned
+ * as a list.</li>
+ * </ul>
+ * @return the value as a Java object
+ */
+
+ Object getObject();
+
+ /**
+ * Return the entire object as a string. Primarily for debugging.
+ * @return string representation of the object
+ */
+
+ String getAsString();
+}
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ColumnReaderIndex.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ColumnReaderIndex.java
index b40b70560..edc362377 100644
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ColumnReaderIndex.java
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ColumnReaderIndex.java
@@ -18,11 +18,105 @@
package org.apache.drill.exec.vector.accessor;
/**
- * Index into a vector batch, or an array, at read time.
- * Supports direct, indirect and hyper-batches.
+ * The reader structure is heavily recursive. The top-level reader
+ * iterates over batches, possibly through an indirection vector
+ * (SV2 or SV4.) The row is tuple of top-level vectors. Each top-level
+ * vector may be an array. Iteration through the array works identically
+ * to iteration over the batch as a whole. (In fact, the scalar readers
+ * don't know if they are top-level or array readers.) Array nesting
+ * can continue to any level of depth.
+ * <p>
+ * Further, when used with a logical join, the top-level iteration
+ * may be over an array, with an implicit join out to enclosing nesting
+ * levels.
+ * <p>
+ * Because of this, the same index interface must work at all nesting
+ * levels: at the top, and within arrays. This interface
+ * supports a usage model as follows:<pre><code>
+ * ColumnReaderIndex index = ...
+ * while (index.hasNext()) {
+ * index.next();
+ * int hyperIndex = index.hyperVectorIndex();
+ * int vectorOffset = index.offset();
+ * }</code></pre>
+ * <p>
+ * When convenient, the following abbreviated form is also
+ * supported:<pre><code>
+ * ColumnReaderIndex index = ...
+ * while (index.next()) {
+ * int hyperIndex = index.hyperVectorIndex();
+ * int vectorOffset = index.offset();
+ * }</code></pre>
+ * <p>
+ * For a top-level index, the check of <tt>hasNext()</tt> and
+ * call to <tt>next()</tt> is done by the row set reader. For
+ * arrays, the call to <tt>hasNext()</tt> is done by the array
+ * reader. The call to <tt>next()</tt> is done by the scalar
+ * reader (for scalar arrays) or the array reader (for other
+ * arrays.)
+ * <p>
+ * The hyper-vector index has meaning only for top-level vectors,
+ * and is ignored by nested vectors. (Nested vectors work by navigating
+ * down from a top-level vector.) But, as noted above, any given
+ * reader does not know if it is at the top or nested level, instead
+ * it is the {@link VectorAccessor} abstraction that works out the
+ * nesting levels.
*/
public interface ColumnReaderIndex {
- int batchIndex();
- int vectorIndex();
+
+ /**
+ * Ordinal index within the batch or array. Increments from -1.
+ * (The position before the first item.)
+ * Identifies the logical row number of top-level records,
+ * or the array element for arrays. Actual physical
+ * index may be different if an indirection layer is in use.
+ *
+ * @return logical read index
+ */
+
+ int logicalIndex();
+
+ /**
+ * When used with a hyper-vector (SV4) based batch, returns the
+ * index of the current batch within the hyper-batch. If this is
+ * a single batch, or a nested index, then always returns 0.
+ *
+ * @return batch index of the current row within the
+ * hyper-batch
+ */
+
+ int hyperVectorIndex();
+
+ /**
+ * Vector offset to read. For top-level vectors, the offset may be
+ * through an indirection (SV2 or SV4). For arrays, the offset is the
+ * absolute position, with the vector of the current array element.
+ *
+ * @return vector read index
+ */
+
+ int offset();
+
+ /**
+ * Advances the index to the next position. Used:
+ * <ul>
+ * <li>At the top level for normal readers or</li>
+ * <liAt a nested level for implicit join readers, and</li>
+ * <li>An each array level to iterate over arrays.</li>
+ * </ul>
+ *
+ * @return true if another value is available, false if EOF
+ */
+
+ boolean next();
+
+ /**
+ * Return the number of items that this index indexes: top-level record
+ * count for the root index; total element count for nested arrays.
+ *
+ * @return element count at this index level
+ */
+
+ int size();
}
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ObjectReader.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ObjectReader.java
index 9c53e5813..e3527c590 100644
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ObjectReader.java
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ObjectReader.java
@@ -29,32 +29,8 @@ package org.apache.drill.exec.vector.accessor;
* {@see ObjectWriter>
*/
-public interface ObjectReader {
-
- /**
- * The type of this reader.
- *
- * @return type of reader
- */
-
- ObjectType type();
+public interface ObjectReader extends ColumnReader {
ScalarReader scalar();
- ScalarElementReader elements();
TupleReader tuple();
ArrayReader array();
-
- /**
- * Return the value of the underlying data as a Java object.
- * Primarily for testing
- * @return Java object that represents the underlying value
- */
-
- Object getObject();
-
- /**
- * Return the entire object as a string. Primarily for debugging.
- * @return string representation of the object
- */
-
- String getAsString();
}
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ScalarElementReader.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ScalarElementReader.java
deleted file mode 100644
index d1f31a82f..000000000
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ScalarElementReader.java
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.drill.exec.vector.accessor;
-
-import java.math.BigDecimal;
-
-import org.joda.time.Period;
-
-/**
- * Interface to access the values of an array column. In general, each
- * vector implements just one of the get methods. Check the vector type
- * to know which method to use. Though, generally, when writing test
- * code, the type is known to the test writer.
- * <p>
- * Arrays allow random access to the values within the array. The index
- * passed to each method is the index into the array for the current
- * row and column. (This means that arrays are three dimensional:
- * the usual (row, column) dimensions plus an array index dimension:
- * (row, column, array index).
- * <p>
- * Note that the <tt>isNull()</tt> method is provided for completeness,
- * but no Drill array allows null values at present.
- * <p>
- * {@see ScalarWriter}
- */
-
-public interface ScalarElementReader {
- /**
- * Describe the type of the value. This is a compression of the
- * value vector type: it describes which method will return the
- * vector value.
- * @return the value type which indicates which get method
- * is valid for the column
- */
-
- ValueType valueType();
- int size();
-
- boolean isNull(int index);
- int getInt(int index);
- long getLong(int index);
- double getDouble(int index);
- String getString(int index);
- byte[] getBytes(int index);
- BigDecimal getDecimal(int index);
- Period getPeriod(int index);
-
- Object getObject(int index);
- String getAsString(int index);
-}
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ScalarReader.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ScalarReader.java
index e1c26bf29..5b09039eb 100644
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ScalarReader.java
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ScalarReader.java
@@ -44,7 +44,7 @@ import org.joda.time.Period;
* {@see ScalarWriter}
*/
-public interface ScalarReader {
+public interface ScalarReader extends ColumnReader {
/**
* Describe the type of the value. This is a compression of the
* value vector type: it describes which method will return the
@@ -54,14 +54,6 @@ public interface ScalarReader {
*/
ValueType valueType();
-
- /**
- * Report if the column is null. Non-nullable columns always
- * return <tt>false</tt>.
- * @return true if the column value is null, false if the
- * value is set
- */
- boolean isNull();
int getInt();
long getLong();
double getDouble();
@@ -69,7 +61,4 @@ public interface ScalarReader {
byte[] getBytes();
BigDecimal getDecimal();
Period getPeriod();
-
- Object getObject();
- String getAsString();
}
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/TupleReader.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/TupleReader.java
index 8d691c3a2..c33f57994 100644
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/TupleReader.java
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/TupleReader.java
@@ -25,15 +25,33 @@ import org.apache.drill.exec.record.metadata.TupleMetadata;
* by name or column index (as defined in the tuple schema.)
* Also provides two generic methods to get the value as a
* Java object or as a string.
- * <p>
- * {@see TupleWriter}
+ *
+ * @see {@link TupleWriter}
*/
-public interface TupleReader {
- TupleMetadata schema();
+public interface TupleReader extends ColumnReader {
+ TupleMetadata tupleSchema();
int columnCount();
+ /**
+ * Return a column reader by column index as reported by the
+ * associated metadata.
+ *
+ * @param colIndex column index
+ * @return reader for the column
+ * @throws IndexOutOfRangeException if the index is invalid
+ */
+
ObjectReader column(int colIndex);
+
+ /**
+ * Return a column reader by name.
+ *
+ * @param colIndex column name
+ * @return reader for the column, or <tt>null</tt> if no such
+ * column exists
+ */
+
ObjectReader column(String colName);
// Convenience methods
@@ -46,9 +64,4 @@ public interface TupleReader {
TupleReader tuple(String colName);
ArrayReader array(int colIndex);
ArrayReader array(String colName);
- ScalarElementReader elements(int colIndex);
- ScalarElementReader elements(String colName);
-
- Object getObject();
- String getAsString();
}
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/UnsupportedConversionError.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/UnsupportedConversionError.java
new file mode 100644
index 000000000..dee2612f0
--- /dev/null
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/UnsupportedConversionError.java
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.vector.accessor;
+
+import org.apache.drill.exec.record.metadata.ColumnMetadata;
+
+/**
+ * Raised when a column accessor reads or writes the value using the wrong
+ * Java type (which may indicate an data inconsistency in the input data.)
+ */
+
+public class UnsupportedConversionError extends UnsupportedOperationException {
+
+ private static final long serialVersionUID = 1L;
+
+ private UnsupportedConversionError(String message) {
+ super(message);
+ }
+
+ public static UnsupportedConversionError readError(ColumnMetadata schema, String javaType) {
+ return new UnsupportedConversionError(
+ String.format("Column `%s`: Unsupported conversion from Drill type %s to Java type %s",
+ schema.name(), schema.type().name(), javaType));
+ }
+
+ public static UnsupportedConversionError writeError(ColumnMetadata schema, String javaType) {
+ return new UnsupportedConversionError(
+ String.format("Column `%s`: Unsupported conversion from Java type %s to Drill type %s",
+ schema.name(), schema.type().name(), javaType));
+ }
+
+ public static UnsupportedConversionError nullError(ColumnMetadata schema) {
+ return new UnsupportedConversionError(
+ String.format("Column `%s`: Type %s %s is not nullable",
+ schema.name(), schema.mode().name(), schema.type().name()));
+ }
+}
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ValueType.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ValueType.java
index e6687dcd3..5059977ab 100644
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ValueType.java
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ValueType.java
@@ -27,5 +27,60 @@ package org.apache.drill.exec.vector.accessor;
*/
public enum ValueType {
- INTEGER, LONG, DOUBLE, STRING, BYTES, DECIMAL, PERIOD
+
+ /**
+ * The value is set from an integer: TINYINT,
+ * SMALLINT, INT, UINT1, and UINT2.
+ */
+
+ INTEGER,
+
+ /**
+ * The value set from a long: BIGINT and
+ * UINT4.
+ */
+
+ LONG,
+
+ /**
+ * Type is set from a double: FLOAT4 and FLOAT8.
+ */
+ DOUBLE,
+
+ /**
+ * The value can be set from a string (for convenience).
+ * VARCHAR and VAR16CHAR.
+ */
+
+ STRING,
+
+ /**
+ * The value is set from a byte buffer. VARCHAR (in production
+ * code), VAR16CHAR, VARBINARY.
+ */
+
+ BYTES,
+
+ /**
+ * The value is set from a BigDecimal: any of Drill's decimal
+ * types.
+ */
+
+ DECIMAL,
+
+ /**
+ * The value is set from a Period. Any of Drill's date/time
+ * types. (Note: there is a known bug in which Drill incorrectly
+ * differentiates between local date/times (those without a timezone)
+ * and absolute date/times (those with a timezone.) Caveat emptor.
+ */
+
+ PERIOD,
+
+ /**
+ * The value has no type. This is typically a dummy writer used
+ * for unprojected columns.
+ */
+
+ NULL
}
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/impl/VectorPrinter.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/impl/VectorPrinter.java
new file mode 100644
index 000000000..45847bcb5
--- /dev/null
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/impl/VectorPrinter.java
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.vector.accessor.impl;
+
+import org.apache.drill.exec.vector.UInt4Vector;
+import org.apache.drill.exec.vector.ValueVector;
+import org.apache.drill.exec.vector.VarCharVector;
+
+import com.google.common.base.Charsets;
+
+/**
+ * Handy tool to visualize string and offset vectors for
+ * debugging.
+ */
+
+public class VectorPrinter {
+
+ public static void printOffsets(UInt4Vector vector, int start, int length) {
+ header(vector, start, length);
+ for (int i = start, j = 0; j < length; i++, j++) {
+ if (j > 0) {
+ System.out.print(" ");
+ }
+ System.out.print(vector.getAccessor().get(i));
+ }
+ System.out.print("], addr = ");
+ System.out.println(vector.getBuffer().addr());
+ }
+
+ public static void printStrings(VarCharVector vector, int start, int length) {
+ printOffsets(vector.getOffsetVector(), start, length + 1);
+ header(vector, start, length);
+ System.out.println();
+ for (int i = start, j = 0; j < length; i++, j++) {
+ System.out.print(" ");
+ System.out.print(i);
+ System.out.print(": \"");
+ System.out.print(stringAt(vector, i));
+ System.out.println("\"");
+ }
+ System.out.println("]");
+ }
+
+ public static void header(ValueVector vector, int start, int length) {
+ System.out.print(vector.getClass());
+ System.out.print(": (");
+ System.out.print(start);
+ System.out.print(" - ");
+ System.out.print(start + length - 1);
+ System.out.print("): [");
+ }
+
+ public static String stringAt(VarCharVector vector, int i) {
+ return new String(vector.getAccessor().get(i), Charsets.UTF_8);
+ }
+
+}
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/package-info.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/package-info.java
index c90a7342e..990ab1312 100644
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/package-info.java
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/package-info.java
@@ -161,6 +161,190 @@
* the same interface supported the original mutator-based implementation and
* the revised Netty-based implementation. The benefit, however, is stark;
* the direct-to-Netty version is up to 4x faster (for repeated types).
+ *
+ * <h4>Tuple Model</h4>
+ *
+ * Drill has the idea of row and of a map. (A Drill map is much like a "struct":
+ * every instance of the "map" must have the same columns.) Both are instances
+ * of the relational concept of a "tuple." In relational theory, a tuple is
+ * a collection of values in which each value has a name and a position. The
+ * name is for the user, the position (index) allows efficient implementation.
+ * <p>
+ * Drill is unusual among query and DB engines in that it does not normally
+ * use indexes. The reason is easy to understand. Suppose two files contain
+ * columns a and b. File 1, read by minor fragment 0, contains the columns in
+ * the order (a, b). But, file 2, read by minor fragment 1, contains the columns
+ * in the order (b, a). Drill considers this the same schema. Since column
+ * order can vary, Drill has avoided depending on column order. (But, only
+ * partly; many bugs have cropped up because some parts of the code do
+ * require common ordering.)
+ * <p>
+ * Here we observe that column order varies only across fragments. We have
+ * control of the column order within our own fragment. (We can coerce varying
+ * order into a desired order. If the above two files are read by the same
+ * scan operator, then the first file sets the order at (a, b), and the second
+ * files (b, a) order can be coerced into the (a, b) order.
+ * <p>
+ * Given this insight, the readers and writers here promote position to a
+ * first-class concept. Code can access columns by name (for convenience,
+ * especially in testing) or by position (for efficiency.)
+ * <p>
+ * Further, it is often convenient to fetch a column accessor (reader or
+ * writer) once, then cache it. The design here ensures that such caching works
+ * well. The goal is that, eventually, operators will code-generate references
+ * to cached readers and writers instead of generating code that works directly
+ * with the vectors.
+ *
+ * <h4>Lists and Unions</h4>
+ *
+ * Drill provides a List and a Union type. These types are incomplete, buggy
+ * and ill-supported by Drill's operators. However, they are key to Drill's
+ * JSON-powered, schema-free marketing message. Thus, we must support them
+ * in the reader/writer level even if they are broken and under-used elsewhere
+ * in Drill. (If we did not support them, then the JSON readers could not use
+ * the new model, and we'd have to support both the old and new versions, which
+ * would create a bigger mess than we started with.)
+ * <p>
+ * Drill's other types have a more-or-less simple mapping to the relational
+ * model, allowing simple reader and writer interfaces. But, the Union and List
+ * types are not a good fit and cause a very large amount of complexity in the
+ * reader and writer models.
+ * <p>
+ * A Union is just that: it is a container for a variety of typed vectors. It
+ * is like a "union" in C: it has members for each type, but only one type is
+ * in use at any one time. However, unlike C, the implementation is more like
+ * a C "struct" every vector takes space or every row, even if no value is stored
+ * in that row. That is, a Drill union is as if a naive C programmer used a
+ * "struct" when s/he should have used a union.
+ * <p>
+ * Unions are designed to evolve dynamically as data is read. Suppose we read
+ * the following JSON:<pre></code>
+ * {a: 10} {a: "foo"} {a: null} {a: 12.34}
+ * </code></pre>
+ * Here, we discover the need for an Int type, then a Varchar, then mark a
+ * value as null and finally a Float. The union adds the desired types as we
+ * request them. The writer mimics this behavior, using a listener to do the
+ * needed vector work.
+ * <p>
+ * Further, a union can be null. It carries a types vector that indicates the
+ * type of each row. A zero-value indicates that the union as a whole is null.
+ * In this case, null means no value, is is not, say, a null Int or null
+ * Varchar: it is simply null (as in JSON). Since at most one vector within the union
+ * carries a value, the element vectors must also be nullable. This means
+ * that a union has two null bits: one or the union, the other for the
+ * selected type. It is not clear what Drill semantics are supposed to be. Here
+ * the writers assume that either the whole union is null, or that exactly one
+ * member is non-null. Readers are more paranoid: they assume each member is null
+ * if either the union is null or the member itself is null. (Yes, a bit of a
+ * mess...)
+ * <p>
+ * The current union vector format is highly inefficient.
+ * If the union concept is needed, then it should
+ * be redesigned, perhaps as a variable-width vector in which each entry
+ * consists of a type/value pair. (For variable-width values such as
+ * strings, the implementation would be a triple of (type, length,
+ * value). The API here is designed to abstract away the implementation
+ * and should work equally well for the current "union" implementation and
+ * the possible "variant" implementation. As a result, when changing the
+ * API, avoid introducing methods that assume an implementation.
+ * <p>
+ * Lists add another layer of complexity. A list is, logically, a repeated
+ * union. But, for whatever historical reasons, a List can be other things
+ * as well. First, it can have no type at all: a list of nothing. This likely
+ * has no meaning, but the semantics of the List vector allow it. Second, the
+ * List can be an array of a single type in which each entry can be null.
+ * (Normal Repeated types can have an empty array for a row, but cannot have
+ * a null entry. Lists can have either an empty array or a null array in
+ * order to model the JSON <tt>null</tt> and <tt>[]</tt> cases.)
+ * <p>
+ * When a List has a single type, it stores the backing vector directly within
+ * the List. But, a list can also be a list of unions. In this case, the List
+ * stores a union vector as its backing vector. Here, we now have three ways
+ * to indicate null: the List's bits vector, the type vector in the union, and
+ * the bits vector in each element vector. Again, the writer assumes that
+ * if the List vector is null, the entire value for that row is null. The reader
+ * is again paranoid and handles all three nullable states. (Again, a huge
+ * mess.)
+ * <p>
+ * The readers can provide a nice API for these cases since we know the List
+ * format up front. They can present the list as either a nullable array of
+ * a single type, or as an array of unions.
+ * <p>
+ * Writers have more of a challenge. If we knew that a List was being used as
+ * a list of, say, Nullable Int, we could present the List as an array writer
+ * with Int elements. But, the List allows dynamic type addition, as with unions.
+ * (In the case of the list, it has internal special handling for the single vs.
+ * many type case.)
+ * <p>
+ * To isolate the client from the list representation, it is simpler to always
+ * present a List an array of variants. But, this is awkward in the single-type
+ * case. The solution is to use metadata. If the metadata promises to use only
+ * a single type, the writer can use the nullable array of X format. If the
+ * metadata says to use a union (the default), then the List is presented as
+ * an array of unions, even when the list has 0 or 1 member types. (The
+ * complexity here is excessive: Drill should really redesign this feature to make
+ * it simpler and to better fit the relational model.)
+ *
+ * <h4>Vector Evolution</h4>
+ *
+ * Drill uses value vector classes created during the rush to ship Drill 1.0.
+ * They are not optimal: the key value is that the vectors work.
+ * <p>
+ * The Apache Arrow project created a refined version of the vector classes.
+ * Much talk has occurred about ripping out Drill's implementation to use
+ * Arrow instead.
+ * <p>
+ * However, even Arrow has limits:
+ * <ul>
+ * <li>Like Drill, it uses twice the number of positions in the offset vector
+ * as for the values vector. (Drill allocates power-of-two sizes. The offset
+ * vector has one more entry than values. With a power-of-two number of values,
+ * offsets are rounded to the next power of two.)</li>
+ * <li>Like Drill before this work, Arrow does not manage vector sizes; it
+ * allows vectors to grow without bound, causing the memory problems that this
+ * project seeks to resolve.</li>
+ * <li>Like Drill, Arrow implements unions as a space-inefficient collection
+ * of vectors format.</li>
+ * </ul>
+ * If we learn from the above, we might want to create a Value Vectors 2.0
+ * based on the following concepts:
+ * <ul>
+ * <li>Store vector values as a chain of fixed-size buffers. This avoids
+ * memory fragmentation, makes memory allocation much more efficient, is
+ * easier on the client, and avoids internal fragmentation.</li>
+ * <li>Store offsets as the end value, not the start value. This eliminates
+ * the extra offset position, simplifies indexing, and can save on internal
+ * memory fragmentation.</li>
+ * <li>Store unions using "variant encoding" as described above.</li>
+ * </ul>
+ * Such changes would be a huge project if every operator continued to work
+ * directly with vectors and memory buffers. In fact, the cost would be so
+ * high that these improvements might never be done.
+ * <p>
+ * Therefore, a goal of this reader/writer layer is to isolate the operators
+ * from vector implementation. For this to work, the accessors must be at least
+ * as efficient as direct vector access. (They are now more efficient.)
+ * <p>
+ * Once all operators use this layer, a switch to Arrow, or an evolution toward
+ * Value Vectors 2.0 will be much easier. Simply change the vector format and
+ * update the reader and writer implementations. The rest of the code will
+ * remain unchanged. (Note, to achieve this goal, it is important to carefully
+ * design the accessor API [interfaces] to hide implementation details.)
+ *
+ * <h4>Simpler Reader API</h4>
+ *
+ * A key value of Drill is the ability for users to add custom record readers.
+ * But, at present, doing so is very complex because the developer must know
+ * quite a bit about Drill internals. At this level, they must know how to
+ * allocate vectors, how to write to each kind of vector, how to keep track
+ * of array sizes, how to set the vector and batch row counts, and more. In
+ * general, there is only one right way to do this work. (Though some readers
+ * use the old-style vector writers, others work with direct memory instead
+ * of with vectors, and so on.)
+ * <p>
+ * This layer handles all that work, providing a simple API that encourages
+ * more custom readers because the work to create the readers becomes far
+ * simpler. (Other layers tackle other parts of the problem as well.)
*/
package org.apache.drill.exec.vector.accessor;
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/AbstractArrayReader.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/AbstractArrayReader.java
deleted file mode 100644
index 7fb0c9dc1..000000000
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/AbstractArrayReader.java
+++ /dev/null
@@ -1,188 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.drill.exec.vector.accessor.reader;
-
-import org.apache.drill.exec.vector.UInt4Vector.Accessor;
-import org.apache.drill.exec.vector.accessor.ArrayReader;
-import org.apache.drill.exec.vector.accessor.ColumnReaderIndex;
-import org.apache.drill.exec.vector.accessor.ObjectReader;
-import org.apache.drill.exec.vector.accessor.ObjectType;
-import org.apache.drill.exec.vector.accessor.ScalarElementReader;
-import org.apache.drill.exec.vector.accessor.TupleReader;
-import org.apache.drill.exec.vector.complex.RepeatedValueVector;
-
-/**
- * Reader for an array-valued column. This reader provides access to specific
- * array members via an array index. This is an abstract base class;
- * subclasses are generated for each repeated value vector type.
- */
-
-public abstract class AbstractArrayReader implements ArrayReader {
-
- /**
- * Object representation of an array reader.
- */
-
- public static class ArrayObjectReader extends AbstractObjectReader {
-
- private AbstractArrayReader arrayReader;
-
- public ArrayObjectReader(AbstractArrayReader arrayReader) {
- this.arrayReader = arrayReader;
- }
-
- @Override
- public void bindIndex(ColumnReaderIndex index) {
- arrayReader.bindIndex(index);
- }
-
- @Override
- public ObjectType type() {
- return ObjectType.ARRAY;
- }
-
- @Override
- public ArrayReader array() {
- return arrayReader;
- }
-
- @Override
- public ScalarElementReader elements() {
- return arrayReader.elements();
- }
-
- @Override
- public Object getObject() {
- return arrayReader.getObject();
- }
-
- @Override
- public String getAsString() {
- return arrayReader.getAsString();
- }
-
- @Override
- public void reposition() {
- arrayReader.reposition();
- }
- }
-
- public static class BaseElementIndex {
- private final ColumnReaderIndex base;
- protected int startOffset;
- protected int length;
-
- public BaseElementIndex(ColumnReaderIndex base) {
- this.base = base;
- }
-
- public int batchIndex() {
- return base.batchIndex();
- }
-
- public void reset(int startOffset, int length) {
- assert length >= 0;
- assert startOffset >= 0;
- this.startOffset = startOffset;
- this.length = length;
- }
-
- public int size() { return length; }
-
- public int elementIndex(int index) {
- if (index < 0 || length <= index) {
- throw new IndexOutOfBoundsException("Index = " + index + ", length = " + length);
- }
- return startOffset + index;
- }
- }
-
- private final Accessor accessor;
- private final VectorAccessor vectorAccessor;
- protected ColumnReaderIndex baseIndex;
- protected BaseElementIndex elementIndex;
-
- public AbstractArrayReader(RepeatedValueVector vector) {
- accessor = vector.getOffsetVector().getAccessor();
- vectorAccessor = null;
- }
-
- public AbstractArrayReader(VectorAccessor vectorAccessor) {
- accessor = null;
- this.vectorAccessor = vectorAccessor;
- }
-
- public void bindIndex(ColumnReaderIndex index) {
- baseIndex = index;
- if (vectorAccessor != null) {
- vectorAccessor.bind(index);
- }
- }
-
- private Accessor accessor() {
- if (accessor != null) {
- return accessor;
- }
- return ((RepeatedValueVector) (vectorAccessor.vector())).getOffsetVector().getAccessor();
- }
-
- public void reposition() {
- final int index = baseIndex.vectorIndex();
- final Accessor curAccesssor = accessor();
- final int startPosn = curAccesssor.get(index);
- elementIndex.reset(startPosn, curAccesssor.get(index + 1) - startPosn);
- }
-
- @Override
- public int size() { return elementIndex.size(); }
-
- @Override
- public ScalarElementReader elements() {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public ObjectReader entry(int index) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public TupleReader tuple(int index) {
- return entry(index).tuple();
- }
-
- @Override
- public ArrayReader array(int index) {
- return entry(index).array();
- }
-
- @Override
- public ObjectReader entry() {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public TupleReader tuple() {
- return entry().tuple();
- }
-
- @Override
- public ArrayReader array() {
- return entry().array();
- }
-}
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/AbstractObjectReader.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/AbstractObjectReader.java
index 59a066e05..2a801795f 100644
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/AbstractObjectReader.java
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/AbstractObjectReader.java
@@ -17,18 +17,18 @@
*/
package org.apache.drill.exec.vector.accessor.reader;
+import org.apache.drill.exec.record.metadata.ColumnMetadata;
import org.apache.drill.exec.vector.accessor.ArrayReader;
-import org.apache.drill.exec.vector.accessor.ColumnReaderIndex;
+import org.apache.drill.exec.vector.accessor.ColumnReader;
import org.apache.drill.exec.vector.accessor.ObjectReader;
-import org.apache.drill.exec.vector.accessor.ScalarElementReader;
+import org.apache.drill.exec.vector.accessor.ObjectType;
import org.apache.drill.exec.vector.accessor.ScalarReader;
import org.apache.drill.exec.vector.accessor.TupleReader;
public abstract class AbstractObjectReader implements ObjectReader {
- public abstract void bindIndex(ColumnReaderIndex index);
-
- public void reposition() { }
+ @Override
+ public ColumnMetadata schema() { return reader().schema(); }
@Override
public ScalarReader scalar() {
@@ -45,8 +45,13 @@ public abstract class AbstractObjectReader implements ObjectReader {
throw new UnsupportedOperationException();
}
+ public abstract ReaderEvents events();
+
+ public abstract ColumnReader reader();
+
@Override
- public ScalarElementReader elements() {
- throw new UnsupportedOperationException();
- }
+ public boolean isNull() { return reader().isNull(); }
+
+ @Override
+ public ObjectType type() { return reader().type(); }
}
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/AbstractScalarReader.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/AbstractScalarReader.java
new file mode 100644
index 000000000..203de2323
--- /dev/null
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/AbstractScalarReader.java
@@ -0,0 +1,205 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.vector.accessor.reader;
+
+import java.math.BigDecimal;
+
+import org.apache.drill.exec.record.metadata.ColumnMetadata;
+import org.apache.drill.exec.vector.accessor.ColumnReader;
+import org.apache.drill.exec.vector.accessor.ColumnReaderIndex;
+import org.apache.drill.exec.vector.accessor.ObjectType;
+import org.apache.drill.exec.vector.accessor.ScalarReader;
+import org.apache.drill.exec.vector.accessor.UnsupportedConversionError;
+import org.apache.drill.exec.vector.accessor.ValueType;
+import org.apache.drill.exec.vector.accessor.impl.AccessorUtilities;
+import org.joda.time.Period;
+
+public abstract class AbstractScalarReader implements ScalarReader, ReaderEvents {
+
+ public static class ScalarObjectReader extends AbstractObjectReader {
+
+ private AbstractScalarReader scalarReader;
+
+ public ScalarObjectReader(AbstractScalarReader scalarReader) {
+ this.scalarReader = scalarReader;
+ }
+
+ @Override
+ public ScalarReader scalar() {
+ return scalarReader;
+ }
+
+ @Override
+ public Object getObject() {
+ return scalarReader.getObject();
+ }
+
+ @Override
+ public String getAsString() {
+ return scalarReader.getAsString();
+ }
+
+ @Override
+ public ReaderEvents events() { return scalarReader; }
+
+ @Override
+ public ColumnReader reader() { return scalarReader; }
+ }
+
+ public static class NullReader extends AbstractScalarReader {
+
+ protected final ColumnMetadata schema;
+
+ protected NullReader(ColumnMetadata schema) {
+ this.schema = schema;
+ }
+
+ @Override
+ public ValueType valueType() { return ValueType.NULL; }
+
+ @Override
+ public boolean isNull() { return true; }
+
+ @Override
+ public void bindIndex(ColumnReaderIndex rowIndex) { }
+
+ @Override
+ public ColumnMetadata schema() { return schema; }
+ }
+
+ protected ColumnReaderIndex vectorIndex;
+ protected NullStateReader nullStateReader;
+
+ public static ScalarObjectReader nullReader(ColumnMetadata schema) {
+ return new ScalarObjectReader(new NullReader(schema));
+ }
+
+ @Override
+ public void bindIndex(ColumnReaderIndex rowIndex) {
+ vectorIndex = rowIndex;
+ nullStateReader.bindIndex(rowIndex);
+ }
+
+ @Override
+ public void bindNullState(NullStateReader nullStateReader) {
+ this.nullStateReader = nullStateReader;
+ }
+
+ @Override
+ public ObjectType type() { return ObjectType.SCALAR; }
+
+ @Override
+ public NullStateReader nullStateReader() { return nullStateReader; }
+
+ @Override
+ public void reposition() { }
+
+ @Override
+ public boolean isNull() {
+ return nullStateReader.isNull();
+ }
+
+ protected UnsupportedConversionError conversionError(String javaType) {
+ return UnsupportedConversionError.writeError(schema(), javaType);
+ }
+
+ @Override
+ public int getInt() {
+ throw conversionError("int");
+ }
+
+ @Override
+ public long getLong() {
+ throw conversionError("long");
+ }
+
+ @Override
+ public double getDouble() {
+ throw conversionError("double");
+ }
+
+ @Override
+ public String getString() {
+ throw conversionError("String");
+ }
+
+ @Override
+ public byte[] getBytes() {
+ throw conversionError("bytes");
+ }
+
+ @Override
+ public BigDecimal getDecimal() {
+ throw conversionError("Decimal");
+ }
+
+ @Override
+ public Period getPeriod() {
+ throw conversionError("Period");
+ }
+
+ @Override
+ public Object getObject() {
+ if (isNull()) {
+ return null;
+ }
+ switch (valueType()) {
+ case BYTES:
+ return getBytes();
+ case DECIMAL:
+ return getDecimal();
+ case DOUBLE:
+ return getDouble();
+ case INTEGER:
+ return getInt();
+ case LONG:
+ return getLong();
+ case PERIOD:
+ return getPeriod();
+ case STRING:
+ return getString();
+ default:
+ throw new IllegalStateException("Unexpected type: " + valueType());
+ }
+ }
+
+ @Override
+ public String getAsString() {
+ if (isNull()) {
+ return "null";
+ }
+ switch (valueType()) {
+ case BYTES:
+ return AccessorUtilities.bytesToString(getBytes());
+ case DOUBLE:
+ return Double.toString(getDouble());
+ case INTEGER:
+ return Integer.toString(getInt());
+ case LONG:
+ return Long.toString(getLong());
+ case STRING:
+ return "\"" + getString() + "\"";
+ case DECIMAL:
+ return getDecimal().toPlainString();
+ case PERIOD:
+ return getPeriod().normalizedStandard().toString();
+ default:
+ throw new IllegalArgumentException("Unsupported type " + valueType());
+ }
+ }
+}
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/AbstractTupleReader.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/AbstractTupleReader.java
index 0429f3e71..2c09e5b55 100644
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/AbstractTupleReader.java
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/AbstractTupleReader.java
@@ -20,12 +20,11 @@ package org.apache.drill.exec.vector.accessor.reader;
import java.util.ArrayList;
import java.util.List;
-import org.apache.drill.exec.record.metadata.TupleMetadata;
import org.apache.drill.exec.vector.accessor.ArrayReader;
+import org.apache.drill.exec.vector.accessor.ColumnReader;
import org.apache.drill.exec.vector.accessor.ColumnReaderIndex;
import org.apache.drill.exec.vector.accessor.ObjectReader;
import org.apache.drill.exec.vector.accessor.ObjectType;
-import org.apache.drill.exec.vector.accessor.ScalarElementReader;
import org.apache.drill.exec.vector.accessor.ScalarReader;
import org.apache.drill.exec.vector.accessor.TupleReader;
@@ -34,26 +33,15 @@ import org.apache.drill.exec.vector.accessor.TupleReader;
* column using either a name or a numeric index.
*/
-public abstract class AbstractTupleReader implements TupleReader {
+public abstract class AbstractTupleReader implements TupleReader, ReaderEvents {
public static class TupleObjectReader extends AbstractObjectReader {
- private AbstractTupleReader tupleReader;
+ private final AbstractTupleReader tupleReader;
public TupleObjectReader(AbstractTupleReader tupleReader) {
this.tupleReader = tupleReader;
}
-
- @Override
- public void bindIndex(ColumnReaderIndex index) {
- tupleReader.bindIndex(index);
- }
-
- @Override
- public ObjectType type() {
- return ObjectType.TUPLE;
- }
-
@Override
public TupleReader tuple() {
return tupleReader;
@@ -70,30 +58,42 @@ public abstract class AbstractTupleReader implements TupleReader {
}
@Override
- public void reposition() {
- tupleReader.reposition();
- }
+ public ReaderEvents events() { return tupleReader; }
+
+ @Override
+ public ColumnReader reader() { return tupleReader; }
}
- protected final TupleMetadata schema;
private final AbstractObjectReader readers[];
+ protected NullStateReader nullStateReader;
- protected AbstractTupleReader(TupleMetadata schema, AbstractObjectReader readers[]) {
- this.schema = schema;
+ protected AbstractTupleReader(AbstractObjectReader readers[]) {
this.readers = readers;
}
+ @Override
+ public ObjectType type() { return ObjectType.TUPLE; }
+
+ @Override
public void bindIndex(ColumnReaderIndex index) {
for (int i = 0; i < readers.length; i++) {
- readers[i].bindIndex(index);
+ readers[i].events().bindIndex(index);
}
}
@Override
- public TupleMetadata schema() { return schema; }
+ public void bindNullState(NullStateReader nullStateReader) {
+ this.nullStateReader = nullStateReader;
+ }
+
+ @Override
+ public NullStateReader nullStateReader() { return nullStateReader; }
@Override
- public int columnCount() { return schema().size(); }
+ public boolean isNull() { return nullStateReader.isNull(); }
+
+ @Override
+ public int columnCount() { return tupleSchema().size(); }
@Override
public ObjectReader column(int colIndex) {
@@ -102,13 +102,23 @@ public abstract class AbstractTupleReader implements TupleReader {
@Override
public ObjectReader column(String colName) {
- int index = schema.index(colName);
+ int index = tupleSchema().index(colName);
if (index == -1) {
return null; }
return readers[index];
}
@Override
+ public ObjectType type(int colIndex) {
+ return column(colIndex).type();
+ }
+
+ @Override
+ public ObjectType type(String colName) {
+ return column(colName).type();
+ }
+
+ @Override
public ScalarReader scalar(int colIndex) {
return column(colIndex).scalar();
}
@@ -139,28 +149,9 @@ public abstract class AbstractTupleReader implements TupleReader {
}
@Override
- public ObjectType type(int colIndex) {
- return column(colIndex).type();
- }
-
- @Override
- public ObjectType type(String colName) {
- return column(colName).type();
- }
-
- @Override
- public ScalarElementReader elements(int colIndex) {
- return column(colIndex).elements();
- }
-
- @Override
- public ScalarElementReader elements(String colName) {
- return column(colName).elements();
- }
-
public void reposition() {
for (int i = 0; i < columnCount(); i++) {
- readers[i].reposition();
+ readers[i].events().reposition();
}
}
@@ -176,14 +167,14 @@ public abstract class AbstractTupleReader implements TupleReader {
@Override
public String getAsString() {
StringBuilder buf = new StringBuilder();
- buf.append("(");
+ buf.append("{");
for (int i = 0; i < columnCount(); i++) {
if (i > 0) {
buf.append( ", " );
}
buf.append(readers[i].getAsString());
}
- buf.append(")");
+ buf.append("}");
return buf.toString();
}
}
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/ArrayReaderImpl.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/ArrayReaderImpl.java
new file mode 100644
index 000000000..7f2bf39ad
--- /dev/null
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/ArrayReaderImpl.java
@@ -0,0 +1,357 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.vector.accessor.reader;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.drill.exec.record.metadata.ColumnMetadata;
+import org.apache.drill.exec.vector.accessor.ArrayReader;
+import org.apache.drill.exec.vector.accessor.ColumnReader;
+import org.apache.drill.exec.vector.accessor.ColumnReaderIndex;
+import org.apache.drill.exec.vector.accessor.ObjectReader;
+import org.apache.drill.exec.vector.accessor.ObjectType;
+import org.apache.drill.exec.vector.accessor.ScalarReader;
+import org.apache.drill.exec.vector.accessor.TupleReader;
+
+/**
+ * Reader for an array-valued column. This reader provides access to specific
+ * array members via an array index. This class implements all arrays. The
+ * behavior for specific array types (scalar, map, lists, etc.) is provided
+ * through composition.
+ */
+
+public class ArrayReaderImpl implements ArrayReader, ReaderEvents {
+
+ /**
+ * Object representation of an array reader.
+ */
+
+ public static class ArrayObjectReader extends AbstractObjectReader {
+
+ private ArrayReaderImpl arrayReader;
+
+ public ArrayObjectReader(ArrayReaderImpl arrayReader) {
+ this.arrayReader = arrayReader;
+ }
+
+ @Override
+ public ArrayReader array() {
+ return arrayReader;
+ }
+
+ @Override
+ public Object getObject() {
+ return arrayReader.getObject();
+ }
+
+ @Override
+ public String getAsString() {
+ return arrayReader.getAsString();
+ }
+
+ @Override
+ public ReaderEvents events() { return arrayReader; }
+
+ @Override
+ public ColumnReader reader() { return arrayReader; }
+ }
+
+ /**
+ * Index into the vector of elements for a repeated vector.
+ * Indexes elements relative to the array. That is, if an array
+ * has five elements, the index here tracks elements 0..4.
+ * The actual vector index is given as the start offset plus the
+ * offset into the array.
+ * <p>
+ * Indexes allow random or sequential access. Random access is more
+ * typical for scalar arrays, while sequential access can be more convenient
+ * for tuple arrays.
+ */
+
+ public static class ElementReaderIndex implements ColumnReaderIndex {
+ protected final ColumnReaderIndex base;
+ protected int startOffset;
+ protected int length;
+ protected int position;
+
+ public ElementReaderIndex(ColumnReaderIndex base) {
+ this.base = base;
+ }
+
+ @Override
+ public int hyperVectorIndex() { return 0; }
+
+ /**
+ * Reposition this array index for a new array given the array start
+ * offset and length.
+ *
+ * @param startOffset first location within the array's
+ * offset vector
+ * @param length number of offset vector locations associated with this
+ * array
+ */
+
+ public void reset(int startOffset, int length) {
+ assert length >= 0;
+ assert startOffset >= 0;
+ this.startOffset = startOffset;
+ this.length = length;
+ position = -1;
+ }
+
+ public void rewind() {
+ position = -1;
+ }
+
+ @Override
+ public int size() { return length; }
+
+ /**
+ * Given a 0-based index relative to the current array, return an absolute offset
+ * vector location for the array value.
+ *
+ * @param index 0-based index into the current array
+ * @return absolute offset vector location for the array value
+ */
+
+ @Override
+ public int offset() {
+ if (position < 0 || length <= position) {
+ throw new IndexOutOfBoundsException("Index = " + position + ", length = " + length);
+ }
+ return startOffset + position;
+ }
+
+ @Override
+ public boolean next() {
+ if (++position < length) {
+ return true;
+ }
+ position = length;
+ return false;
+ }
+
+ /**
+ * Set the current iterator location to the given index offset.
+ *
+ * @param index 0-based index into the current array
+ */
+
+ public void set(int index) {
+ if (index < 0 || length < index) {
+ throw new IndexOutOfBoundsException("Index = " + index + ", length = " + length);
+ }
+ position = index;
+ }
+
+ @Override
+ public int logicalIndex() { return position; }
+ }
+
+ private final ColumnMetadata schema;
+ private final VectorAccessor arrayAccessor;
+ private final OffsetVectorReader offsetReader;
+ private final AbstractObjectReader elementReader;
+ protected ElementReaderIndex elementIndex;
+ protected NullStateReader nullStateReader;
+
+ public ArrayReaderImpl(ColumnMetadata schema, VectorAccessor va,
+ AbstractObjectReader elementReader) {
+ this.schema = schema;
+ arrayAccessor = va;
+ this.elementReader = elementReader;
+ offsetReader = new OffsetVectorReader(
+ VectorAccessors.arrayOffsetVectorAccessor(va));
+ }
+
+ /**
+ * Build a scalar array for a Repeated type. Such arrays are not nullable.
+ *
+ * @param arrayAccessor vector accessor for the repeated vector that holds
+ * the scalar values
+ * @param elementReader scalar reader used to decode each scalar value
+ * @return object reader which wraps the scalar array reader
+ */
+
+ public static ArrayObjectReader buildScalar(ColumnMetadata schema,
+ VectorAccessor arrayAccessor,
+ BaseScalarReader elementReader) {
+
+ // Reader is bound to the values vector inside the nullable vector.
+
+ elementReader.bindVector(schema,
+ VectorAccessors.arrayDataAccessor(arrayAccessor));
+
+ // The scalar array element can't be null.
+
+ elementReader.bindNullState(NullStateReaders.REQUIRED_STATE_READER);
+
+ // Create the array, giving it an offset vector reader based on the
+ // repeated vector's offset vector.
+
+ ArrayReaderImpl arrayReader = new ArrayReaderImpl(schema, arrayAccessor,
+ new AbstractScalarReader.ScalarObjectReader(elementReader));
+
+ // The array itself can't be null.
+
+ arrayReader.bindNullState(NullStateReaders.REQUIRED_STATE_READER);
+
+ // Wrap it all in an object reader.
+
+ return new ArrayObjectReader(arrayReader);
+ }
+
+ /**
+ * Build a repeated map reader.
+ *
+ * @param arrayAccessor vector accessor for the repeated map vector
+ * @param elementReader tuple reader that provides access to each
+ * tuple in the array
+ * @return object reader that wraps the map array reader
+ */
+
+ public static AbstractObjectReader buildTuple(ColumnMetadata schema,
+ VectorAccessor arrayAccessor,
+ AbstractObjectReader elementReader) {
+
+ // Create the array reader over the map vector.
+
+ ArrayReaderImpl arrayReader = new ArrayReaderImpl(schema, arrayAccessor, elementReader);
+
+ // The array itself can't be null.
+
+ arrayReader.bindNullState(NullStateReaders.REQUIRED_STATE_READER);
+
+ // Wrap it all in an object reader.
+
+ return new ArrayObjectReader(arrayReader);
+ }
+
+ @Override
+ public void bindIndex(ColumnReaderIndex index) {
+ arrayAccessor.bind(index);
+ offsetReader.bindIndex(index);
+ nullStateReader.bindIndex(index);
+ elementIndex = new ElementReaderIndex(index);
+ elementReader.events().bindIndex(elementIndex);
+ }
+
+ @Override
+ public void bindNullState(NullStateReader nullStateReader) {
+ this.nullStateReader = nullStateReader;
+ }
+
+ @Override
+ public ObjectType type() { return ObjectType.ARRAY; }
+
+ @Override
+ public ColumnMetadata schema() { return schema; }
+
+ @Override
+ public NullStateReader nullStateReader() { return nullStateReader; }
+
+ @Override
+ public boolean isNull() { return nullStateReader.isNull(); }
+
+ @Override
+ public void reposition() {
+ long entry = offsetReader.getEntry();
+ elementIndex.reset((int) (entry >> 32), (int) (entry & 0xFFFF_FFFF));
+ }
+
+ @Override
+ public boolean next() {
+ if (! elementIndex.next()) {
+ return false;
+ }
+ elementReader.events().reposition();
+ return true;
+ }
+
+ public ColumnReaderIndex elementIndex() { return elementIndex; }
+
+ @Override
+ public int size() { return elementIndex.size(); }
+
+ @Override
+ public void setPosn(int posn) {
+ elementIndex.set(posn);
+ elementReader.events().reposition();
+ }
+
+ @Override
+ public void rewind() {
+ elementIndex.rewind();
+ }
+
+ @Override
+ public ObjectReader entry() { return elementReader; }
+
+ @Override
+ public ObjectType entryType() { return elementReader.type(); }
+
+ @Override
+ public ScalarReader scalar() {
+ return entry().scalar();
+ }
+
+ @Override
+ public TupleReader tuple() {
+ return entry().tuple();
+ }
+
+ @Override
+ public ArrayReader array() {
+ return entry().array();
+ }
+
+ @Override
+ public Object getObject() {
+
+ // Simple: return elements as an object list.
+ // If really needed, could return as a typed array, but that
+ // is a bit of a hassle.
+
+ rewind();
+ List<Object> elements = new ArrayList<>();
+ while (next()) {
+ elements.add(elementReader.getObject());
+ }
+ return elements;
+ }
+
+ @Override
+ public String getAsString() {
+ if (isNull()) {
+ return "null";
+ }
+ rewind();
+ StringBuilder buf = new StringBuilder();
+ buf.append("[");
+ int i = 0;
+ while (next()) {
+ if (i++ > 0) {
+ buf.append( ", " );
+ }
+ buf.append(elementReader.getAsString());
+ }
+ buf.append("]");
+ return buf.toString();
+ }
+}
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/BaseElementReader.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/BaseElementReader.java
deleted file mode 100644
index f32c101c2..000000000
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/BaseElementReader.java
+++ /dev/null
@@ -1,187 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.drill.exec.vector.accessor.reader;
-
-import java.math.BigDecimal;
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.drill.common.types.TypeProtos.MajorType;
-import org.apache.drill.exec.vector.ValueVector;
-import org.apache.drill.exec.vector.accessor.ColumnReaderIndex;
-import org.apache.drill.exec.vector.accessor.ObjectType;
-import org.apache.drill.exec.vector.accessor.ScalarElementReader;
-import org.apache.drill.exec.vector.accessor.impl.AccessorUtilities;
-import org.joda.time.Period;
-
-public abstract class BaseElementReader implements ScalarElementReader {
-
- public static class ScalarElementObjectReader extends AbstractObjectReader {
-
- private BaseElementReader elementReader;
-
- public ScalarElementObjectReader(BaseElementReader elementReader) {
- this.elementReader = elementReader;
- }
-
- @Override
- public void bindIndex(ColumnReaderIndex index) {
- elementReader.bindIndex((ElementReaderIndex) index);
- }
-
- @Override
- public ObjectType type() {
- return ObjectType.SCALAR;
- }
-
- @Override
- public ScalarElementReader elements() {
- return elementReader;
- }
-
- @Override
- public Object getObject() {
- // Simple: return elements as an object list.
- // If really needed, could return as a typed array, but that
- // is a bit of a hassle.
-
- List<Object> elements = new ArrayList<>();
- for (int i = 0; i < elementReader.size(); i++) {
- elements.add(elementReader.getObject(i));
- }
- return elements;
- }
-
- @Override
- public String getAsString() {
- StringBuilder buf = new StringBuilder();
- buf.append("[");
- for (int i = 0; i < elementReader.size(); i++) {
- if (i > 0) {
- buf.append( ", " );
- }
- buf.append(elementReader.getAsString(i));
- }
- buf.append("]");
- return buf.toString();
- }
- }
-
- protected ElementReaderIndex vectorIndex;
- protected VectorAccessor vectorAccessor;
-
- public abstract void bindVector(ValueVector vector);
-
- public void bindVector(MajorType majorType, VectorAccessor va) {
- vectorAccessor = va;
- }
-
- protected void bindIndex(ElementReaderIndex rowIndex) {
- this.vectorIndex = rowIndex;
- }
-
- @Override
- public int size() { return vectorIndex.size(); }
-
- @Override
- public Object getObject(int index) {
- if (isNull(index)) {
- return "null";
- }
- switch (valueType()) {
- case BYTES:
- return getBytes(index);
- case DECIMAL:
- return getDecimal(index);
- case DOUBLE:
- return getDouble(index);
- case INTEGER:
- return getInt(index);
- case LONG:
- return getLong(index);
- case PERIOD:
- return getPeriod(index);
- case STRING:
- return getString(index);
- default:
- throw new IllegalStateException("Unexpected type: " + valueType());
- }
- }
-
- @Override
- public String getAsString(int index) {
- switch (valueType()) {
- case BYTES:
- return AccessorUtilities.bytesToString(getBytes(index));
- case DOUBLE:
- return Double.toString(getDouble(index));
- case INTEGER:
- return Integer.toString(getInt(index));
- case LONG:
- return Long.toString(getLong(index));
- case STRING:
- return "\"" + getString(index) + "\"";
- case DECIMAL:
- return getDecimal(index).toPlainString();
- case PERIOD:
- return getPeriod(index).normalizedStandard().toString();
- default:
- throw new IllegalArgumentException("Unsupported type " + valueType());
- }
- }
-
- @Override
- public boolean isNull(int index) {
- return false;
- }
-
- @Override
- public int getInt(int index) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public long getLong(int index) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public double getDouble(int index) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public String getString(int index) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public byte[] getBytes(int index) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public BigDecimal getDecimal(int index) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public Period getPeriod(int index) {
- throw new UnsupportedOperationException();
- }
-}
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/BaseScalarReader.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/BaseScalarReader.java
index fb9a71160..279fb58d0 100644
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/BaseScalarReader.java
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/BaseScalarReader.java
@@ -17,15 +17,11 @@
*/
package org.apache.drill.exec.vector.accessor.reader;
-import java.math.BigDecimal;
-
-import org.apache.drill.common.types.TypeProtos.MajorType;
-import org.apache.drill.exec.vector.ValueVector;
+import org.apache.drill.exec.record.metadata.ColumnMetadata;
+import org.apache.drill.exec.vector.BaseDataValueVector;
import org.apache.drill.exec.vector.accessor.ColumnReaderIndex;
-import org.apache.drill.exec.vector.accessor.ObjectType;
-import org.apache.drill.exec.vector.accessor.ScalarReader;
-import org.apache.drill.exec.vector.accessor.impl.AccessorUtilities;
-import org.joda.time.Period;
+
+import io.netty.buffer.DrillBuf;
/**
* Column reader implementation that acts as the basis for the
@@ -34,156 +30,122 @@ import org.joda.time.Period;
* method(s).
*/
-public abstract class BaseScalarReader implements ScalarReader {
+public abstract class BaseScalarReader extends AbstractScalarReader {
+
+ public abstract static class BaseFixedWidthReader extends BaseScalarReader {
- public static class ScalarObjectReader extends AbstractObjectReader {
+ public abstract int width();
+ }
- private BaseScalarReader scalarReader;
+ public abstract static class BaseVarWidthReader extends BaseScalarReader {
- public ScalarObjectReader(BaseScalarReader scalarReader) {
- this.scalarReader = scalarReader;
- }
+ protected OffsetVectorReader offsetsReader;
@Override
- public void bindIndex(ColumnReaderIndex index) {
- scalarReader.bindIndex(index);
+ public void bindVector(ColumnMetadata schema, VectorAccessor va) {
+ super.bindVector(schema, va);
+ offsetsReader = new OffsetVectorReader(
+ VectorAccessors.varWidthOffsetVectorAccessor(va));
}
@Override
- public ObjectType type() {
- return ObjectType.SCALAR;
+ public void bindIndex(ColumnReaderIndex index) {
+ super.bindIndex(index);
+ offsetsReader.bindIndex(index);
}
+ }
- @Override
- public ScalarReader scalar() {
- return scalarReader;
+ /**
+ * Provide access to the DrillBuf for the data vector.
+ */
+
+ public interface BufferAccessor {
+ DrillBuf buffer();
+ }
+
+ private static class SingleVectorBufferAccessor implements BufferAccessor {
+ private final DrillBuf buffer;
+
+ public SingleVectorBufferAccessor(VectorAccessor va) {
+ BaseDataValueVector vector = va.vector();
+ buffer = vector.getBuffer();
}
@Override
- public Object getObject() {
- return scalarReader.getObject();
+ public DrillBuf buffer() { return buffer; }
+ }
+
+ private static class HyperVectorBufferAccessor implements BufferAccessor {
+ private final VectorAccessor vectorAccessor;
+
+ public HyperVectorBufferAccessor(VectorAccessor va) {
+ vectorAccessor = va;
}
@Override
- public String getAsString() {
- return scalarReader.getAsString();
+ public DrillBuf buffer() {
+ BaseDataValueVector vector = vectorAccessor.vector();
+ return vector.getBuffer();
}
}
- protected ColumnReaderIndex vectorIndex;
+ protected ColumnMetadata schema;
protected VectorAccessor vectorAccessor;
+ protected BufferAccessor bufferAccessor;
- public static ScalarObjectReader build(ValueVector vector, BaseScalarReader reader) {
- reader.bindVector(vector);
- return new ScalarObjectReader(reader);
- }
+ public static ScalarObjectReader buildOptional(ColumnMetadata schema,
+ VectorAccessor va, BaseScalarReader reader) {
- public static AbstractObjectReader build(MajorType majorType, VectorAccessor va,
- BaseScalarReader reader) {
- reader.bindVector(majorType, va);
- return new ScalarObjectReader(reader);
- }
+ // Reader is bound to the values vector inside the nullable vector.
- public abstract void bindVector(ValueVector vector);
+ reader.bindVector(schema, VectorAccessors.nullableValuesAccessor(va));
- protected void bindIndex(ColumnReaderIndex rowIndex) {
- this.vectorIndex = rowIndex;
- if (vectorAccessor != null) {
- vectorAccessor.bind(rowIndex);
- }
- }
+ // The nullability of each value depends on the "bits" vector
+ // in the nullable vector.
- public void bindVector(MajorType majorType, VectorAccessor va) {
- vectorAccessor = va;
- }
+ reader.bindNullState(new NullStateReaders.NullableIsSetVectorStateReader(va));
- @Override
- public Object getObject() {
- if (isNull()) {
- return null;
- }
- switch (valueType()) {
- case BYTES:
- return getBytes();
- case DECIMAL:
- return getDecimal();
- case DOUBLE:
- return getDouble();
- case INTEGER:
- return getInt();
- case LONG:
- return getLong();
- case PERIOD:
- return getPeriod();
- case STRING:
- return getString();
- default:
- throw new IllegalStateException("Unexpected type: " + valueType());
- }
- }
+ // Wrap the reader in an object reader.
- @Override
- public String getAsString() {
- if (isNull()) {
- return "null";
- }
- switch (valueType()) {
- case BYTES:
- return AccessorUtilities.bytesToString(getBytes());
- case DOUBLE:
- return Double.toString(getDouble());
- case INTEGER:
- return Integer.toString(getInt());
- case LONG:
- return Long.toString(getLong());
- case STRING:
- return "\"" + getString() + "\"";
- case DECIMAL:
- return getDecimal().toPlainString();
- case PERIOD:
- return getPeriod().normalizedStandard().toString();
- default:
- throw new IllegalArgumentException("Unsupported type " + valueType());
- }
+ return new ScalarObjectReader(reader);
}
- @Override
- public boolean isNull() {
- return false;
- }
+ public static ScalarObjectReader buildRequired(ColumnMetadata schema,
+ VectorAccessor va, BaseScalarReader reader) {
- @Override
- public int getInt() {
- throw new UnsupportedOperationException();
- }
+ // Reader is bound directly to the required vector.
- @Override
- public long getLong() {
- throw new UnsupportedOperationException();
- }
+ reader.bindVector(schema, va);
- @Override
- public double getDouble() {
- throw new UnsupportedOperationException();
+ // The reader is required, values can't be null.
+
+ reader.bindNullState(NullStateReaders.REQUIRED_STATE_READER);
+
+ // Wrap the reader in an object reader.
+
+ return new ScalarObjectReader(reader);
}
- @Override
- public String getString() {
- throw new UnsupportedOperationException();
+ public void bindVector(ColumnMetadata schema, VectorAccessor va) {
+ this.schema = schema;
+ vectorAccessor = va;
+ bufferAccessor = bufferAccessor(va);
}
- @Override
- public byte[] getBytes() {
- throw new UnsupportedOperationException();
+ protected BufferAccessor bufferAccessor(VectorAccessor va) {
+ if (va.isHyper()) {
+ return new HyperVectorBufferAccessor(va);
+ } else {
+ return new SingleVectorBufferAccessor(va);
+ }
}
@Override
- public BigDecimal getDecimal() {
- throw new UnsupportedOperationException();
+ public void bindIndex(ColumnReaderIndex rowIndex) {
+ super.bindIndex(rowIndex);
+ vectorAccessor.bind(rowIndex);
}
@Override
- public Period getPeriod() {
- throw new UnsupportedOperationException();
- }
+ public ColumnMetadata schema() { return schema; }
}
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/ColumnReaderFactory.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/ColumnReaderFactory.java
index 0bcb6e291..ae15e5d7a 100644
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/ColumnReaderFactory.java
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/ColumnReaderFactory.java
@@ -17,12 +17,9 @@
*/
package org.apache.drill.exec.vector.accessor.reader;
-import org.apache.drill.common.types.TypeProtos.DataMode;
import org.apache.drill.common.types.TypeProtos.MajorType;
import org.apache.drill.common.types.TypeProtos.MinorType;
-import org.apache.drill.exec.vector.ValueVector;
-import org.apache.drill.exec.vector.accessor.ColumnAccessors;
-import org.apache.drill.exec.vector.complex.RepeatedValueVector;
+import org.apache.drill.exec.vector.accessor.ColumnAccessorUtils;
/**
* Gather generated reader classes into a set of class tables to allow rapid
@@ -35,19 +32,14 @@ public class ColumnReaderFactory {
private static final int typeCount = MinorType.values().length;
private static final Class<? extends BaseScalarReader> requiredReaders[] = new Class[typeCount];
- private static final Class<? extends BaseScalarReader> nullableReaders[] = new Class[typeCount];
- private static final Class<? extends BaseElementReader> elementReaders[] = new Class[typeCount];
static {
- ColumnAccessors.defineRequiredReaders(requiredReaders);
- ColumnAccessors.defineNullableReaders(nullableReaders);
- ColumnAccessors.defineArrayReaders(elementReaders);
+ ColumnAccessorUtils.defineRequiredReaders(requiredReaders);
}
- public static AbstractObjectReader buildColumnReader(ValueVector vector) {
- MajorType major = vector.getField().getType();
+ public static BaseScalarReader buildColumnReader(VectorAccessor va) {
+ MajorType major = va.type();
MinorType type = major.getMinorType();
- DataMode mode = major.getMode();
switch (type) {
case GENERIC_OBJECT:
@@ -57,41 +49,7 @@ public class ColumnReaderFactory {
case MAP:
throw new UnsupportedOperationException(type.toString());
default:
- switch (mode) {
- case OPTIONAL:
- return BaseScalarReader.build(vector, newAccessor(type, nullableReaders));
- case REQUIRED:
- return BaseScalarReader.build(vector, newAccessor(type, requiredReaders));
- case REPEATED:
- return ScalarArrayReader.build((RepeatedValueVector) vector, newAccessor(type, elementReaders));
- default:
- throw new UnsupportedOperationException(mode.toString());
- }
- }
- }
-
- public static AbstractObjectReader buildColumnReader(MajorType majorType, VectorAccessor va) {
- MinorType type = majorType.getMinorType();
- DataMode mode = majorType.getMode();
-
- switch (type) {
- case GENERIC_OBJECT:
- case LATE:
- case NULL:
- case LIST:
- case MAP:
- throw new UnsupportedOperationException(type.toString());
- default:
- switch (mode) {
- case OPTIONAL:
- return BaseScalarReader.build(majorType, va, newAccessor(type, nullableReaders));
- case REQUIRED:
- return BaseScalarReader.build(majorType, va, newAccessor(type, requiredReaders));
- case REPEATED:
- return ScalarArrayReader.build(majorType, va, newAccessor(type, elementReaders));
- default:
- throw new UnsupportedOperationException(mode.toString());
- }
+ return newAccessor(type, requiredReaders);
}
}
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/MapReader.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/MapReader.java
index 900e0a77c..732939147 100644
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/MapReader.java
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/MapReader.java
@@ -20,6 +20,8 @@ package org.apache.drill.exec.vector.accessor.reader;
import java.util.List;
import org.apache.drill.exec.record.metadata.ColumnMetadata;
+import org.apache.drill.exec.record.metadata.TupleMetadata;
+import org.apache.drill.exec.vector.accessor.ColumnReaderIndex;
/**
* Reader for a Drill Map type. Maps are actually tuples, just like rows.
@@ -27,17 +29,54 @@ import org.apache.drill.exec.record.metadata.ColumnMetadata;
public class MapReader extends AbstractTupleReader {
+ protected final ColumnMetadata schema;
+
+ /**
+ * Accessor for the map vector. This class does not use the map vector
+ * directory. However, in the case of a map hyper-vector, we need to
+ * tell the vector which batch to use. (For an array, the array does
+ * this work and the map accessor is null.)
+ */
+
+ private final VectorAccessor mapAccessor;
+
protected MapReader(ColumnMetadata schema, AbstractObjectReader readers[]) {
- super(schema.mapSchema(), readers);
+ this(schema, null, readers);
+ }
+
+ protected MapReader(ColumnMetadata schema,
+ VectorAccessor mapAccessor, AbstractObjectReader readers[]) {
+ super(readers);
+ this.schema = schema;
+ this.mapAccessor = mapAccessor;
}
- public static TupleObjectReader build(ColumnMetadata schema, AbstractObjectReader readers[]) {
- return new TupleObjectReader(new MapReader(schema, readers));
+ public static TupleObjectReader build(ColumnMetadata schema,
+ VectorAccessor mapAccessor,
+ AbstractObjectReader readers[]) {
+ MapReader mapReader = new MapReader(schema, mapAccessor, readers);
+ mapReader.bindNullState(NullStateReaders.REQUIRED_STATE_READER);
+ return new TupleObjectReader(mapReader);
}
- public static AbstractObjectReader build(ColumnMetadata metadata,
+ public static AbstractObjectReader build(ColumnMetadata schema,
+ VectorAccessor mapAccessor,
List<AbstractObjectReader> readers) {
AbstractObjectReader readerArray[] = new AbstractObjectReader[readers.size()];
- return build(metadata, readers.toArray(readerArray));
+ return build(schema, mapAccessor, readers.toArray(readerArray));
}
+
+ @Override
+ public void bindIndex(ColumnReaderIndex index) {
+ if (mapAccessor != null) {
+ mapAccessor.bind(index);
+ }
+ super.bindIndex(index);
+ }
+
+ @Override
+ public ColumnMetadata schema() { return schema; }
+
+ @Override
+ public TupleMetadata tupleSchema() { return schema.mapSchema(); }
}
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/NullStateReader.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/NullStateReader.java
new file mode 100644
index 000000000..e52c0f52e
--- /dev/null
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/NullStateReader.java
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.vector.accessor.reader;
+
+import org.apache.drill.exec.vector.accessor.ColumnReaderIndex;
+
+/**
+ * Internal mechanism to detect if a value is null. Handles the multiple ways
+ * that Drill represents nulls:
+ * <ul>
+ * <li>Required and repeated modes: value is never null.</li>
+ * <li>Optional mode: null state is carried by an associated "bits" vector.</li>
+ * <li>Union: null state is carried by <i>both</i> the bits state of
+ * the union itself, and the null state of the associated nullable vector.
+ * (The union states if the column value itself is null; the vector state is
+ * if that value is null, which will occur if either the column is null, or
+ * if the type of the column is something other than the type in question.)</li>
+ * <li>List, with a single data vector: null state is carried by the list vector
+ * and the associated nullable data vector. Presumably the list vector state
+ * takes precedence.</li>
+ * <li>List, with a union data vector (AKA variant array or union array): the
+ * null state is carried by all three of a) the list vector, b) the union
+ * vector, and c) the type vectors. Presumably, the list vector state has
+ * precedence.</li>
+ * </ul>
+ * <p>
+ * The interface here allows each reader to delegate the null logic to a
+ * separate component, keeping the data access portion itself simple.
+ * <p>
+ * As with all readers, this reader must handle both the single-batch and
+ * the hyper-batch cases.
+ */
+
+public interface NullStateReader {
+ void bindIndex(ColumnReaderIndex rowIndex);
+ boolean isNull();
+}
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/NullStateReaders.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/NullStateReaders.java
new file mode 100644
index 000000000..f1ea09a65
--- /dev/null
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/NullStateReaders.java
@@ -0,0 +1,193 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.vector.accessor.reader;
+
+import org.apache.drill.common.types.TypeProtos.MinorType;
+import org.apache.drill.exec.vector.accessor.ColumnAccessors.UInt1ColumnReader;
+import org.apache.drill.exec.vector.accessor.ColumnReaderIndex;
+import org.apache.drill.exec.vector.complex.UnionVector;
+
+public class NullStateReaders {
+
+ public static final RequiredStateReader REQUIRED_STATE_READER = new RequiredStateReader();
+
+ private NullStateReaders() { }
+
+ /**
+ * Dummy implementation of a null state reader for cases in which the
+ * value is never null. Use the {@link NullStateReaders#REQUIRED_STATE_READER} instance
+ * for this case.
+ */
+
+ protected static class RequiredStateReader implements NullStateReader {
+
+ @Override
+ public void bindIndex(ColumnReaderIndex rowIndex) { }
+
+ @Override
+ public boolean isNull() { return false; }
+ }
+
+ /**
+ * Holder for the NullableVector wrapper around a bits vector and a
+ * data vector. Manages the bits vector to extract the nullability
+ * value.
+ * <p>
+ * This class allows the same reader to handle both the required and
+ * nullable cases; the only difference is how nulls are handled.
+ */
+
+ protected static class NullableIsSetVectorStateReader implements NullStateReader {
+
+ private final VectorAccessor nullableAccessor;
+ private final UInt1ColumnReader isSetReader;
+
+ public NullableIsSetVectorStateReader(VectorAccessor nullableAccessor) {
+ this.nullableAccessor = nullableAccessor;
+ isSetReader = new UInt1ColumnReader();
+ isSetReader.bindVector(null,
+ VectorAccessors.nullableBitsAccessor(nullableAccessor));
+ isSetReader.bindNullState(REQUIRED_STATE_READER);
+ }
+
+ @Override
+ public void bindIndex(ColumnReaderIndex rowIndex) {
+ nullableAccessor.bind(rowIndex);
+ isSetReader.bindIndex(rowIndex);
+ }
+
+ @Override
+ public boolean isNull() {
+ return isSetReader.getInt() == 0;
+ }
+ }
+
+ /**
+ * Holder for the NullableVector wrapper around a bits vector and a
+ * data vector. Manages the bits vector to extract the nullability
+ * value.
+ * <p>
+ * This class allows the same reader to handle both the required and
+ * nullable cases; the only difference is how nulls are handled.
+ */
+
+ protected static class ListIsSetVectorStateReader implements NullStateReader {
+
+ private final VectorAccessor bitsAccessor;
+ private final UInt1ColumnReader isSetReader;
+
+ public ListIsSetVectorStateReader(VectorAccessor bitsAccessor) {
+ this.bitsAccessor = bitsAccessor;
+ isSetReader = new UInt1ColumnReader();
+ isSetReader.bindVector(null, bitsAccessor);
+ isSetReader.bindNullState(REQUIRED_STATE_READER);
+ }
+
+ @Override
+ public void bindIndex(ColumnReaderIndex rowIndex) {
+ bitsAccessor.bind(rowIndex);
+ isSetReader.bindIndex(rowIndex);
+ }
+
+ @Override
+ public boolean isNull() {
+ return isSetReader.getInt() == 0;
+ }
+ }
+
+ /**
+ * Null state that handles the strange union semantics that both
+ * the union and the values can be null. A value is null if either
+ * the union or the value is null. (Though, presumably, in the normal
+ * case either the union is null or one of the associated values is
+ * null.)
+ */
+
+ protected static class MemberNullStateReader implements NullStateReader {
+
+ private final NullStateReader unionNullState;
+ private final NullStateReader memberNullState;
+
+ public MemberNullStateReader(NullStateReader unionNullState, NullStateReader memberNullState) {
+ this.unionNullState = unionNullState;
+ this.memberNullState = memberNullState;
+ }
+
+ @Override
+ public void bindIndex(ColumnReaderIndex rowIndex) {
+ memberNullState.bindIndex(rowIndex);
+ }
+
+ @Override
+ public boolean isNull() {
+ return unionNullState.isNull() || memberNullState.isNull();
+ }
+ }
+
+ /**
+ * Handle the awkward situation with complex types. They don't carry their own
+ * bits (null state) vector. Instead, we define them as null if the type of
+ * the union is other than the type of the map or list. (Since the same vector
+ * that holds state also holds the is-null value, this check includes the
+ * check if the entire union is null.)
+ */
+
+ protected static class ComplexMemberStateReader implements NullStateReader {
+
+ private UInt1ColumnReader typeReader;
+ private MinorType type;
+
+ public ComplexMemberStateReader(UInt1ColumnReader typeReader, MinorType type) {
+ this.typeReader = typeReader;
+ this.type = type;
+ }
+
+ @Override
+ public void bindIndex(ColumnReaderIndex rowIndex) { }
+
+ @Override
+ public boolean isNull() {
+ return typeReader.getInt() != type.getNumber();
+ }
+ }
+
+ /**
+ * Extract null state from the union vector's type vector. The union reader
+ * manages the type reader, so no binding is done here.
+ */
+
+ protected static class TypeVectorStateReader implements NullStateReader {
+
+ public final UInt1ColumnReader typeReader;
+
+ public TypeVectorStateReader(UInt1ColumnReader typeReader) {
+ this.typeReader = typeReader;
+ }
+
+ @Override
+ public void bindIndex(ColumnReaderIndex rowIndex) {
+ typeReader.bindIndex(rowIndex);
+ }
+
+ @Override
+ public boolean isNull() {
+ return typeReader.getInt() == UnionVector.NULL_MARKER;
+ }
+ }
+
+}
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/ObjectArrayReader.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/ObjectArrayReader.java
deleted file mode 100644
index 9ed89f1c7..000000000
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/ObjectArrayReader.java
+++ /dev/null
@@ -1,159 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.drill.exec.vector.accessor.reader;
-
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.drill.exec.vector.accessor.ColumnReaderIndex;
-import org.apache.drill.exec.vector.accessor.ObjectReader;
-import org.apache.drill.exec.vector.accessor.ObjectType;
-import org.apache.drill.exec.vector.complex.RepeatedValueVector;
-
-/**
- * Reader for an array of either tuples or other arrays.
- */
-
-public class ObjectArrayReader extends AbstractArrayReader {
-
- /**
- * Index into the vector of elements for a repeated vector.
- * Keeps track of the current offset in terms of value positions.
- * This is a derived index. The base index points to an entry
- * in the offset vector for the array. This inner index picks
- * off elements within the range of offsets for that one entry.
- * For example:<pre><code>
- * [ ... 100 105 ...]
- * </code></pre>In the above the value 100 might be at outer
- * offset 5. The inner array will pick off the five values
- * 100...104.
- * <p>
- * Because arrays allow random access on read, the inner offset
- * is reset on each access to the array.
- */
-
- public static class ObjectElementReaderIndex extends BaseElementIndex implements ColumnReaderIndex {
-
- private int posn;
-
- public ObjectElementReaderIndex(ColumnReaderIndex base) {
- super(base);
- }
-
- @Override
- public int vectorIndex() {
- return startOffset + posn;
- }
-
- public void set(int index) {
- if (index < 0 || length <= index) {
- throw new IndexOutOfBoundsException("Index = " + index + ", length = " + length);
- }
- posn = index;
- }
-
- public int posn() { return posn; }
- }
-
- /**
- * Reader for each element.
- */
-
- private final AbstractObjectReader elementReader;
-
- /**
- * Index used to access elements.
- */
-
- private ObjectElementReaderIndex objElementIndex;
-
- private ObjectArrayReader(RepeatedValueVector vector, AbstractObjectReader elementReader) {
- super(vector);
- this.elementReader = elementReader;
- }
-
- private ObjectArrayReader(VectorAccessor vectorAccessor, AbstractObjectReader elementReader) {
- super(vectorAccessor);
- this.elementReader = elementReader;
- }
-
- public static ArrayObjectReader build(RepeatedValueVector vector,
- AbstractObjectReader elementReader) {
- return new ArrayObjectReader(
- new ObjectArrayReader(vector, elementReader));
- }
-
- public static AbstractObjectReader build(VectorAccessor vectorAccessor,
- AbstractObjectReader elementReader) {
- return new ArrayObjectReader(
- new ObjectArrayReader(vectorAccessor, elementReader));
- }
-
- @Override
- public void bindIndex(ColumnReaderIndex index) {
- super.bindIndex(index);
- objElementIndex = new ObjectElementReaderIndex(baseIndex);
- elementIndex = objElementIndex;
- elementReader.bindIndex(objElementIndex);
- }
-
- @Override
- public ObjectType entryType() {
- return elementReader.type();
- }
-
- @Override
- public void setPosn(int index) {
- objElementIndex.set(index);
- elementReader.reposition();
- }
-
- @Override
- public ObjectReader entry() {
- return elementReader;
- }
-
- @Override
- public ObjectReader entry(int index) {
- setPosn(index);
- return entry();
- }
-
- @Override
- public Object getObject() {
- List<Object> array = new ArrayList<>();
- for (int i = 0; i < objElementIndex.size(); i++) {
- array.add(entry(i).getObject());
- }
- return array;
- }
-
- @Override
- public String getAsString() {
- StringBuilder buf = new StringBuilder();
- buf.append("[");
- for (int i = 0; i < size(); i++) {
- if (i > 0) {
- buf.append( ", " );
- }
- buf.append(entry(i).getAsString());
- }
- buf.append("]");
- return buf.toString();
- }
-}
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/OffsetVectorReader.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/OffsetVectorReader.java
new file mode 100644
index 000000000..9d8163816
--- /dev/null
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/OffsetVectorReader.java
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.vector.accessor.reader;
+
+import org.apache.drill.exec.record.metadata.ColumnMetadata;
+import org.apache.drill.exec.vector.UInt4Vector;
+import org.apache.drill.exec.vector.accessor.ValueType;
+import org.apache.drill.exec.vector.accessor.reader.BaseScalarReader.BaseFixedWidthReader;
+import io.netty.buffer.DrillBuf;
+
+/**
+ * Reader for an offset vector.
+ */
+
+public class OffsetVectorReader extends BaseFixedWidthReader {
+
+ private static final int VALUE_WIDTH = UInt4Vector.VALUE_WIDTH;
+
+ public OffsetVectorReader(VectorAccessor offsetsAccessor) {
+ vectorAccessor = offsetsAccessor;
+ bufferAccessor = bufferAccessor(offsetsAccessor);
+ nullStateReader = NullStateReaders.REQUIRED_STATE_READER;
+ }
+
+ @Override
+ public ValueType valueType() {
+ return ValueType.INTEGER;
+ }
+
+ @Override public int width() { return VALUE_WIDTH; }
+
+ /**
+ * Return the offset and length of a value encoded as a long.
+ * The value is encoded to avoid the need to resolve the offset vector
+ * twice per value.
+ *
+ * @return a long with the format:<br>
+ * Upper 32 bits - offset: <tt>offset = (int) (entry >> 32)</tt><br>
+ * Lower 32 bits - length: <tt>length = (int) (entry & 0xFFFF_FFFF)</tt>
+ */
+
+ public long getEntry() {
+ final DrillBuf buf = bufferAccessor.buffer();
+ final int readOffset = vectorIndex.offset() * VALUE_WIDTH;
+ long start = buf.getInt(readOffset);
+ long end = buf.getInt(readOffset + VALUE_WIDTH);
+ return (start << 32) + (end - start);
+ }
+
+ @Override
+ public void reposition() { }
+
+ @Override
+ public ColumnMetadata schema() { return null; }
+}
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/FixedWidthElementReaderIndex.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/ReaderEvents.java
index 4f3aeeb91..2f759464d 100644
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/FixedWidthElementReaderIndex.java
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/ReaderEvents.java
@@ -18,21 +18,14 @@
package org.apache.drill.exec.vector.accessor.reader;
import org.apache.drill.exec.vector.accessor.ColumnReaderIndex;
-import org.apache.drill.exec.vector.accessor.reader.AbstractArrayReader.BaseElementIndex;
/**
- * Index into the vector of elements for a repeated vector.
- * Keeps track of the current offset in terms of value positions.
+ * Internal operations to wire up a set of readers.
*/
-public class FixedWidthElementReaderIndex extends BaseElementIndex implements ElementReaderIndex {
-
- public FixedWidthElementReaderIndex(ColumnReaderIndex base) {
- super(base);
- }
-
- @Override
- public int vectorIndex(int posn) {
- return elementIndex(posn);
- }
+public interface ReaderEvents {
+ void bindIndex(ColumnReaderIndex rowIndex);
+ void bindNullState(NullStateReader nullStateReader);
+ NullStateReader nullStateReader();
+ void reposition();
}
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/ScalarArrayReader.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/ScalarArrayReader.java
deleted file mode 100644
index d93e4a599..000000000
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/ScalarArrayReader.java
+++ /dev/null
@@ -1,102 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.drill.exec.vector.accessor.reader;
-
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.drill.common.types.TypeProtos.MajorType;
-import org.apache.drill.exec.vector.accessor.ColumnReaderIndex;
-import org.apache.drill.exec.vector.accessor.ObjectType;
-import org.apache.drill.exec.vector.accessor.ScalarElementReader;
-import org.apache.drill.exec.vector.complex.RepeatedValueVector;
-
-public class ScalarArrayReader extends AbstractArrayReader {
-
- private final BaseElementReader elementReader;
-
- private ScalarArrayReader(RepeatedValueVector vector,
- BaseElementReader elementReader) {
- super(vector);
- this.elementReader = elementReader;
- }
-
- private ScalarArrayReader(VectorAccessor va,
- BaseElementReader elementReader) {
- super(va);
- this.elementReader = elementReader;
- }
-
- public static ArrayObjectReader build(RepeatedValueVector vector,
- BaseElementReader elementReader) {
- elementReader.bindVector(vector.getDataVector());
- return new ArrayObjectReader(new ScalarArrayReader(vector, elementReader));
- }
-
- public static ArrayObjectReader build(MajorType majorType, VectorAccessor va,
- BaseElementReader elementReader) {
- elementReader.bindVector(majorType, va);
- return new ArrayObjectReader(new ScalarArrayReader(va, elementReader));
- }
-
- @Override
- public void bindIndex(ColumnReaderIndex index) {
- super.bindIndex(index);
- FixedWidthElementReaderIndex fwElementIndex = new FixedWidthElementReaderIndex(baseIndex);
- elementIndex = fwElementIndex;
- elementReader.bindIndex(fwElementIndex);
- }
-
- @Override
- public ObjectType entryType() {
- return ObjectType.SCALAR;
- }
-
- @Override
- public ScalarElementReader elements() {
- return elementReader;
- }
-
- @Override
- public void setPosn(int index) {
- throw new IllegalStateException("setPosn() not supported for scalar arrays");
- }
-
- @Override
- public Object getObject() {
- List<Object> elements = new ArrayList<>();
- for (int i = 0; i < size(); i++) {
- elements.add(elementReader.getObject(i));
- }
- return elements;
- }
-
- @Override
- public String getAsString() {
- StringBuilder buf = new StringBuilder();
- buf.append("[");
- for (int i = 0; i < size(); i++) {
- if (i > 0) {
- buf.append( ", " );
- }
- buf.append(elementReader.getAsString(i));
- }
- buf.append("]");
- return buf.toString();
- }
-}
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/VectorAccessor.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/VectorAccessor.java
index 1cf2a1965..050845b0a 100644
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/VectorAccessor.java
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/VectorAccessor.java
@@ -17,10 +17,13 @@
*/
package org.apache.drill.exec.vector.accessor.reader;
+import org.apache.drill.common.types.TypeProtos.MajorType;
import org.apache.drill.exec.vector.ValueVector;
import org.apache.drill.exec.vector.accessor.ColumnReaderIndex;
public interface VectorAccessor {
+ boolean isHyper();
+ MajorType type();
void bind(ColumnReaderIndex index);
- ValueVector vector();
+ <T extends ValueVector> T vector();
}
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/VectorAccessors.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/VectorAccessors.java
new file mode 100644
index 000000000..7d70d1de9
--- /dev/null
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/VectorAccessors.java
@@ -0,0 +1,344 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.vector.accessor.reader;
+
+import org.apache.drill.common.types.TypeProtos.MajorType;
+import org.apache.drill.common.types.TypeProtos.MinorType;
+import org.apache.drill.common.types.Types;
+import org.apache.drill.exec.vector.NullableVector;
+import org.apache.drill.exec.vector.ValueVector;
+import org.apache.drill.exec.vector.VariableWidthVector;
+import org.apache.drill.exec.vector.accessor.ColumnReaderIndex;
+import org.apache.drill.exec.vector.complex.AbstractMapVector;
+import org.apache.drill.exec.vector.complex.RepeatedValueVector;
+
+/**
+ * Collection of vector accessors. A single class handles the single-batch
+ * case. But, for hyper-vectors, we need a separate accessor for each
+ * (vector, sub-vector) combination to handle the
+ * indirections in the hyper-vector case.
+ * <p>
+ * For a required vector:<br>
+ * reader index --> hyper vector --> required vector
+ * <p>
+ * For a nullable vector:<br>
+ * reader index --> hyper vector --> nullable vector<br>
+ * nullable vector --> bits vector<br>
+ * --> values vector
+ * <p>
+ * For a repeated vector:<br>
+ * reader index --> hyper vector --> repeated vector<br>
+ * repeated vector --> offset vector<br>
+ * --> values vector
+ * <p>
+ * And so on. In each case, we must start with a top-level
+ * vector as indicated the row index, indirected through the
+ * SV4. That is done by the reader index. That points to a
+ * top-level vector in the hyper-vector.
+ * <p>
+ * Most of the vectors needed are nested. These inner vectors
+ * are not part of a hyper-vector list. Instead, we must get the
+ * top-level vector, then navigate down from that vector to the
+ * desired vector.
+ * <p>
+ * Sometimes the navigation is static (the "bits" vector for
+ * a nullable vector.) Other times, it is a bit more dynamic: a
+ * member of a map (given by index) or the member of a union
+ * (given by type.)
+ * <p>
+ * These accessors can be chained to handle deeply-nested
+ * structures such as an array of maps that contains a list of
+ * unions.
+ * <p>
+ * Because the navigation is required on every access, the use of hyper
+ * vectors is slow. Since hyper-vectors are seldom used, we
+ * optimize for the single-batch case by caching vectors at each
+ * stage. Thus, for the single-batch case, we use different accessor
+ * implementations. To keep the rest of the code simple, both the
+ * hyper and single batch cases use the same API, but they use
+ * entirely different implementations. The methods here choose
+ * the correct implementation for the single and hyper cases.
+ */
+
+public class VectorAccessors {
+
+ public static class NullVectorAccesor implements VectorAccessor {
+
+ private final MajorType type;
+
+ public NullVectorAccesor(MajorType type) {
+ this.type = type;
+ }
+
+ @Override
+ public boolean isHyper() { return false; }
+
+ @Override
+ public MajorType type() { return type; }
+
+ @Override
+ public void bind(ColumnReaderIndex index) { }
+
+ @Override
+ public <T extends ValueVector> T vector() {
+ throw new UnsupportedOperationException();
+ }
+ }
+
+ public static class SingleVectorAccessor implements VectorAccessor {
+
+ private final ValueVector vector;
+
+ public SingleVectorAccessor(ValueVector vector) {
+ this.vector = vector;
+ }
+
+ @Override
+ public boolean isHyper() { return false; }
+
+ @Override
+ public void bind(ColumnReaderIndex index) { }
+
+ @Override
+ public MajorType type() { return vector.getField().getType(); }
+
+ @SuppressWarnings("unchecked")
+ @Override
+ public <T extends ValueVector> T vector() { return (T) vector; }
+ }
+
+ /**
+ * Vector accessor used by the column accessors to obtain the vector for
+ * each column value. That is, position 0 might be batch 4, index 3,
+ * while position 1 might be batch 1, index 7, and so on.
+ */
+
+ public static abstract class BaseHyperVectorAccessor implements VectorAccessor {
+
+ protected final MajorType type;
+
+ public BaseHyperVectorAccessor(MajorType type) {
+ this.type = type;
+ }
+
+ @Override
+ public boolean isHyper() { return true; }
+
+ @Override
+ public void bind(ColumnReaderIndex index) { }
+
+ @Override
+ public MajorType type() { return type; }
+ }
+
+ /**
+ * Vector accessor for RepeatedVector &rarr; offsets vector
+ */
+
+ public static class ArrayOffsetHyperVectorAccessor extends BaseHyperVectorAccessor {
+
+ private VectorAccessor repeatedVectorAccessor;
+
+ public ArrayOffsetHyperVectorAccessor(VectorAccessor va) {
+ super(Types.required(MinorType.UINT4));
+ repeatedVectorAccessor = va;
+ }
+
+ @SuppressWarnings("unchecked")
+ @Override
+ public <T extends ValueVector> T vector() {
+ RepeatedValueVector vector = repeatedVectorAccessor.vector();
+ return (T) vector.getOffsetVector();
+ }
+ }
+
+ /**
+ * Vector accessor for RepeatedVector &rarr; data vector
+ */
+
+ public static class ArrayDataHyperVectorAccessor implements VectorAccessor {
+
+ private VectorAccessor repeatedVectorAccessor;
+
+ private ArrayDataHyperVectorAccessor(VectorAccessor va) {
+ repeatedVectorAccessor = va;
+ }
+
+ @Override
+ public boolean isHyper() { return true; }
+
+ @Override
+ public MajorType type() { return repeatedVectorAccessor.type(); }
+
+ @Override
+ public void bind(ColumnReaderIndex index) { }
+
+ @SuppressWarnings("unchecked")
+ @Override
+ public <T extends ValueVector> T vector() {
+ RepeatedValueVector vector = repeatedVectorAccessor.vector();
+ return (T) vector.getDataVector();
+ }
+ }
+
+ /**
+ * Vector accessor for VariableWidthVector &rarr; offsets vector
+ */
+
+ public static class VarWidthOffsetHyperVectorAccessor extends BaseHyperVectorAccessor {
+
+ private VectorAccessor varWidthVectorAccessor;
+
+ public VarWidthOffsetHyperVectorAccessor(VectorAccessor va) {
+ super(Types.required(MinorType.UINT4));
+ varWidthVectorAccessor = va;
+ }
+
+ @SuppressWarnings("unchecked")
+ @Override
+ public <T extends ValueVector> T vector() {
+ VariableWidthVector vector = varWidthVectorAccessor.vector();
+ return (T) vector.getOffsetVector();
+ }
+ }
+
+ /**
+ * Vector accessor for NullableVector &rarr; values vector
+ */
+
+ public static class NullableValuesHyperVectorAccessor implements VectorAccessor {
+
+ private VectorAccessor nullableAccessor;
+
+ private NullableValuesHyperVectorAccessor(VectorAccessor va) {
+ nullableAccessor = va;
+ }
+
+ @Override
+ public boolean isHyper() { return true; }
+
+ @Override
+ public MajorType type() { return nullableAccessor.type(); }
+
+ @Override
+ public void bind(ColumnReaderIndex index) { }
+
+ @SuppressWarnings("unchecked")
+ @Override
+ public <T extends ValueVector> T vector() {
+ NullableVector vector = nullableAccessor.vector();
+ return (T) vector.getValuesVector();
+ }
+ }
+
+ /**
+ * Vector accessor for NullableVector &rarr; bits vector
+ */
+
+ public static class NullableBitsHyperVectorStateReader extends BaseHyperVectorAccessor {
+
+ public final VectorAccessor nullableAccessor;
+
+ public NullableBitsHyperVectorStateReader(VectorAccessor nullableAccessor) {
+ super(Types.required(MinorType.UINT1));
+ this.nullableAccessor = nullableAccessor;
+ }
+
+ @SuppressWarnings("unchecked")
+ @Override
+ public <T extends ValueVector> T vector() {
+ NullableVector vector = nullableAccessor.vector();
+ return (T) vector.getBitsVector();
+ }
+ }
+
+ /**
+ * Vector accessor for AbstractMapVector &rarr; member vector
+ */
+
+ public static class MapMemberHyperVectorAccessor extends BaseHyperVectorAccessor {
+
+ private final VectorAccessor mapAccessor;
+ private final int index;
+
+ public MapMemberHyperVectorAccessor(VectorAccessor va, int index, MajorType type) {
+ super(type);
+ mapAccessor = va;
+ this.index = index;
+ }
+
+ @SuppressWarnings("unchecked")
+ @Override
+ public <T extends ValueVector> T vector() {
+ AbstractMapVector vector = mapAccessor.vector();
+ return (T) vector.getChildByOrdinal(index);
+ }
+ }
+
+ // Methods to create vector accessors for sub-vectors internal to various
+ // value vectors. These methods are called from the readers themselves rather
+ // than the reader builders.
+
+ public static VectorAccessor arrayOffsetVectorAccessor(VectorAccessor repeatedAccessor) {
+ if (repeatedAccessor.isHyper()) {
+ return new ArrayOffsetHyperVectorAccessor(repeatedAccessor);
+ } else {
+ RepeatedValueVector vector = repeatedAccessor.vector();
+ return new SingleVectorAccessor(vector.getOffsetVector());
+ }
+ }
+
+ public static VectorAccessor arrayDataAccessor(VectorAccessor repeatedAccessor) {
+ if (repeatedAccessor.isHyper()) {
+ return new ArrayDataHyperVectorAccessor(repeatedAccessor);
+ } else {
+ RepeatedValueVector vector = repeatedAccessor.vector();
+ return new SingleVectorAccessor(
+ vector.getDataVector());
+ }
+ }
+
+ public static VectorAccessor varWidthOffsetVectorAccessor(VectorAccessor varWidthAccessor) {
+ if (varWidthAccessor.isHyper()) {
+ return new VarWidthOffsetHyperVectorAccessor(varWidthAccessor);
+ } else {
+ VariableWidthVector vector = varWidthAccessor.vector();
+ return new SingleVectorAccessor(vector.getOffsetVector());
+ }
+ }
+
+ public static VectorAccessor nullableValuesAccessor(VectorAccessor nullableAccessor) {
+ if (nullableAccessor.isHyper()) {
+ return new NullableValuesHyperVectorAccessor(nullableAccessor);
+ } else {
+ NullableVector vector = nullableAccessor.vector();
+ return new SingleVectorAccessor(
+ vector.getValuesVector());
+ }
+ }
+
+ public static VectorAccessor nullableBitsAccessor(VectorAccessor nullableAccessor) {
+ if (nullableAccessor.isHyper()) {
+ return new NullableBitsHyperVectorStateReader(nullableAccessor);
+ } else {
+ NullableVector vector = nullableAccessor.vector();
+ return new SingleVectorAccessor(
+ vector.getBitsVector());
+ }
+ }
+}
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/package-info.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/package-info.java
index a94d2e844..823dd2896 100644
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/package-info.java
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/package-info.java
@@ -17,10 +17,67 @@
*/
/**
* Provides the reader hierarchy as explained in the API package.
- * The only caveat is that a simplification is provided for arrays of
- * scalar values: rather than a scalar reader for each value, the
- * {#link ScalarElementReader} class provides access to the entire array
- * via indexed get methods.
+ *
+ * <h4>Structure</h4>
+ *
+ * The reader implementation divides into four parts:
+ * <ol>
+ * <li>The readers themselves which start with scalar readers to
+ * decode data from vectors, then build up to nullable, array,
+ * union and list readers. Readers are built up via composition,
+ * often using the (internal) offset vector reader.</li>
+ * <li>The column index abstraction that steps through items in a collection.
+ * At the top level, the index points to the current row. The top level
+ * may include an indirection (an SV2 or SV4) which is handled by the
+ * column index. Within arrays, the column index points to each element
+ * of the array.</li>
+ * <li>The vector accessor which provides a unified interface for both
+ * the single-batch and hyper-batch cases. The single-batch versions
+ * simply hold onto the vector itself. The hyper-batch versions either
+ * provide access to a specific vector within a hyper-vector (for
+ * top-level vectors), or navigate from a top-level vector down to an
+ * inner vector (for nested vectors.)</li>
+ * <li>The null state abstraction which provides a uniform way to
+ * detect nullability. For example, within the reader system, the
+ * reader for nullable and required vectors differ only in the associated
+ * null state reader. Unions and lists have complex null state
+ * logic: the nullability of a value depends on the nullability
+ * of the list, the union, and the value itself. The null state
+ * class implements this logic independent of the reader structure.
+ * </li>
+ * </ul>
+ *
+ * <h4>Composition</h4>
+ *
+ * The result is that reader structure makes heavy use of composition:
+ * readers are built up from each of the above components. The number of
+ * actual reader classes is small, but the methods to build the readers are
+ * complex. Most structure is built at build time. Indexes, however are
+ * provided at a later "bind" time at which a bind call traverses the
+ * reader tree to associate an index with each reader and vector accessor.
+ * When a reader is for an array, the bind step creates the index for the
+ * array elements.
+ *
+ * <h4>Construction</h4>
+ *
+ * Construction of readers is a multi-part process.
+ * <ul>
+ * <li>Start with a single or hyper-vector batch.</li>
+ * <li>The reader builders in another package parse the batch structure,
+ * create the required metadata, wrap the (single or hyper) vectors in
+ * a vector accessor, and call methods in this package.</li>
+ * <li>Methods here perform the final construction based on the specific
+ * type of the reader.</li>
+ * </ul>
+ * <p>
+ * The work divides into two main categories:
+ * <ul>
+ * <li>The work which is based on
+ * the vector structure and single/hyper-vector structure, which is done
+ * elsewhere.</li>
+ * <li>The work which is based on the structure of the readers (with
+ * vector cardinality factored out), which is done here.</li>
+ * </ul>
*/
package org.apache.drill.exec.vector.accessor.reader; \ No newline at end of file
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/ColumnWriterFactory.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/ColumnWriterFactory.java
index 30811bb39..4a668c4ed 100644
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/ColumnWriterFactory.java
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/ColumnWriterFactory.java
@@ -28,7 +28,7 @@ import org.apache.drill.exec.record.metadata.ColumnMetadata;
import org.apache.drill.exec.vector.NullableVector;
import org.apache.drill.exec.vector.UInt4Vector;
import org.apache.drill.exec.vector.ValueVector;
-import org.apache.drill.exec.vector.accessor.ColumnAccessors;
+import org.apache.drill.exec.vector.accessor.ColumnAccessorUtils;
import org.apache.drill.exec.vector.accessor.writer.AbstractArrayWriter.ArrayObjectWriter;
import org.apache.drill.exec.vector.accessor.writer.AbstractScalarWriter.ScalarObjectWriter;
import org.apache.drill.exec.vector.accessor.writer.AbstractTupleWriter.TupleObjectWriter;
@@ -56,7 +56,7 @@ public class ColumnWriterFactory {
private static final Class<? extends BaseScalarWriter> requiredWriters[] = new Class[typeCount];
static {
- ColumnAccessors.defineRequiredWriters(requiredWriters);
+ ColumnAccessorUtils.defineRequiredWriters(requiredWriters);
}
public static AbstractObjectWriter buildColumnWriter(ColumnMetadata schema, ValueVector vector) {
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/ListVector.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/ListVector.java
index 45d91606a..49bd51074 100644
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/ListVector.java
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/ListVector.java
@@ -55,7 +55,7 @@ public class ListVector extends BaseRepeatedValueVector {
public ListVector(MaterializedField field, BufferAllocator allocator, CallBack callBack) {
super(field, allocator);
- this.bits = new UInt1Vector(MaterializedField.create("$bits$", Types.required(MinorType.UINT1)), allocator);
+ this.bits = new UInt1Vector(MaterializedField.create(BITS_VECTOR_NAME, Types.required(MinorType.UINT1)), allocator);
offsets = getOffsetVector();
this.field.addChild(getDataVector().getField());
this.writer = new UnionListWriter(this);