aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--common/src/main/java/org/apache/drill/common/types/Types.java53
-rw-r--r--exec/java-exec/src/main/resources/bootstrap-storage-plugins.json5
-rw-r--r--exec/java-exec/src/test/java/org/apache/drill/BaseTestQuery.java36
-rw-r--r--exec/java-exec/src/test/java/org/apache/drill/DrillTestWrapper.java557
-rw-r--r--exec/java-exec/src/test/java/org/apache/drill/TestAltSortQueries.java10
-rw-r--r--exec/java-exec/src/test/java/org/apache/drill/TestBuilder.java499
-rw-r--r--exec/java-exec/src/test/java/org/apache/drill/TestTestFramework.java397
-rw-r--r--exec/java-exec/src/test/java/org/apache/drill/exec/HyperVectorValueIterator.java98
-rw-r--r--exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/writer/TestParquetWriter.java410
-rw-r--r--exec/java-exec/src/test/resources/sort/testSelectWithLimitOffset.tsv5
-rw-r--r--exec/java-exec/src/test/resources/store/json/json_simple_with_null.json16
-rw-r--r--exec/java-exec/src/test/resources/testframework/decimal_test.json3
-rw-r--r--exec/java-exec/src/test/resources/testframework/decimal_test.tsv1
-rw-r--r--exec/java-exec/src/test/resources/testframework/schema_change_int_to_string_non-matching.json30
-rw-r--r--exec/java-exec/src/test/resources/testframework/small_test_data.json5
-rw-r--r--exec/java-exec/src/test/resources/testframework/small_test_data.tsv5
-rw-r--r--exec/java-exec/src/test/resources/testframework/small_test_data_extra.tsv7
-rw-r--r--exec/java-exec/src/test/resources/testframework/small_test_data_extra_col.tsv5
-rw-r--r--exec/java-exec/src/test/resources/testframework/small_test_data_reordered.tsv5
-rw-r--r--exec/java-exec/src/test/resources/testframework/small_test_data_reordered_non_match_extra.tsv7
-rw-r--r--exec/java-exec/src/test/resources/testframework/small_test_data_reordered_non_match_fewer.tsv3
-rw-r--r--pom.xml1
22 files changed, 1764 insertions, 394 deletions
diff --git a/common/src/main/java/org/apache/drill/common/types/Types.java b/common/src/main/java/org/apache/drill/common/types/Types.java
index 8ae3edda2..04fec54ac 100644
--- a/common/src/main/java/org/apache/drill/common/types/Types.java
+++ b/common/src/main/java/org/apache/drill/common/types/Types.java
@@ -18,7 +18,9 @@
package org.apache.drill.common.types;
import static org.apache.drill.common.types.TypeProtos.DataMode.REPEATED;
+import static org.apache.drill.common.types.TypeProtos.MinorType.*;
+import org.apache.drill.common.exceptions.DrillRuntimeException;
import org.apache.drill.common.types.TypeProtos.DataMode;
import org.apache.drill.common.types.TypeProtos.MajorType;
import org.apache.drill.common.types.TypeProtos.MinorType;
@@ -370,6 +372,57 @@ public class Types {
}
}
+ public static String getNameOfMinorType(MinorType type) {
+ switch (type) {
+ case BIT:
+ return "bool";
+ case TINYINT:
+ return "tinyint";
+ case UINT1:
+ return "uint1";
+ case SMALLINT:
+ return "smallint";
+ case UINT2:
+ return "uint2";
+ case INT:
+ return "int";
+ case UINT4:
+ return "uint4";
+ case BIGINT:
+ return "bigint";
+ case UINT8:
+ return "uint8";
+ case FLOAT4:
+ return "float";
+ case FLOAT8:
+ return "double";
+ case DECIMAL9:
+ return "decimal";
+ case DECIMAL18:
+ return "decimal";
+ case DECIMAL28SPARSE:
+ return "decimal";
+ case DECIMAL38SPARSE:
+ return "decimal";
+ case VARCHAR:
+ return "varchar";
+ case VAR16CHAR:
+ return "utf16";
+ case DATE:
+ return "date";
+ case TIME:
+ return "time";
+ case TIMESTAMP:
+ return "timestamp";
+ case VARBINARY:
+ return "binary";
+ case LATE:
+ throw new DrillRuntimeException("The late type should never appear in execution or an SQL query, so it does not have a name to refer to it.");
+ default:
+ throw new DrillRuntimeException("Unrecognized type " + type);
+ }
+ }
+
public static String toString(MajorType type) {
return type != null ? "MajorType[" + TextFormat.shortDebugString(type) + "]" : "null";
}
diff --git a/exec/java-exec/src/main/resources/bootstrap-storage-plugins.json b/exec/java-exec/src/main/resources/bootstrap-storage-plugins.json
index 4a20beacf..6bf1872ae 100644
--- a/exec/java-exec/src/main/resources/bootstrap-storage-plugins.json
+++ b/exec/java-exec/src/main/resources/bootstrap-storage-plugins.json
@@ -47,6 +47,11 @@
extensions: [ "csv" ],
delimiter: ","
},
+ "tsv" : {
+ type: "text",
+ extensions: [ "tsv" ],
+ delimiter: "\t"
+ },
"json" : {
type: "json"
},
diff --git a/exec/java-exec/src/test/java/org/apache/drill/BaseTestQuery.java b/exec/java-exec/src/test/java/org/apache/drill/BaseTestQuery.java
index 18fe84e03..0de39382b 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/BaseTestQuery.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/BaseTestQuery.java
@@ -18,13 +18,24 @@
package org.apache.drill;
import java.io.IOException;
+import java.io.UnsupportedEncodingException;
+import java.lang.reflect.Array;
import java.net.URL;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Iterator;
import java.util.List;
+import java.util.Map;
import java.util.Properties;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.atomic.AtomicInteger;
+import com.google.common.base.Joiner;
+import com.google.common.base.Strings;
import org.apache.drill.common.config.DrillConfig;
+import org.apache.drill.common.types.TypeProtos;
+import org.apache.drill.common.types.Types;
import org.apache.drill.common.util.TestTools;
import org.apache.drill.exec.ExecConstants;
import org.apache.drill.exec.ExecTest;
@@ -37,7 +48,11 @@ import org.apache.drill.exec.memory.BufferAllocator;
import org.apache.drill.exec.memory.TopLevelAllocator;
import org.apache.drill.exec.proto.UserBitShared.QueryId;
import org.apache.drill.exec.proto.UserBitShared.QueryType;
+import org.apache.drill.exec.record.BatchSchema;
+import org.apache.drill.exec.record.HyperVectorWrapper;
+import org.apache.drill.exec.record.MaterializedField;
import org.apache.drill.exec.record.RecordBatchLoader;
+import org.apache.drill.exec.record.VectorWrapper;
import org.apache.drill.exec.rpc.RpcException;
import org.apache.drill.exec.rpc.user.ConnectionThrottle;
import org.apache.drill.exec.rpc.user.QueryResultBatch;
@@ -45,7 +60,10 @@ import org.apache.drill.exec.rpc.user.UserResultsListener;
import org.apache.drill.exec.server.Drillbit;
import org.apache.drill.exec.server.RemoteServiceSet;
import org.apache.drill.exec.util.VectorUtil;
+import org.apache.drill.exec.vector.ValueVector;
+import org.apache.hadoop.io.Text;
import org.junit.AfterClass;
+import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.rules.TestRule;
import org.junit.rules.TestWatcher;
@@ -54,6 +72,9 @@ import org.junit.runner.Description;
import com.google.common.base.Charsets;
import com.google.common.io.Resources;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+
public class BaseTestQuery extends ExecTest{
static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(BaseTestQuery.class);
@@ -110,12 +131,14 @@ public class BaseTestQuery extends ExecTest{
}
}
-
-
protected BufferAllocator getAllocator() {
return allocator;
}
+ public TestBuilder testBuilder() {
+ return new TestBuilder(allocator);
+ }
+
@AfterClass
public static void closeClient() throws IOException{
if (client != null) {
@@ -150,12 +173,12 @@ public class BaseTestQuery extends ExecTest{
return testRunAndReturn(QueryType.PHYSICAL, physical);
}
- protected List<QueryResultBatch> testRunAndReturn(QueryType type, String query) throws Exception{
+ public static List<QueryResultBatch> testRunAndReturn(QueryType type, String query) throws Exception{
query = query.replace("[WORKING_PATH]", TestTools.getWorkingPath());
return client.runQuery(type, query);
}
- protected int testRunAndPrint(QueryType type, String query) throws Exception{
+ public static int testRunAndPrint(QueryType type, String query) throws Exception{
query = query.replace("[WORKING_PATH]", TestTools.getWorkingPath());
PrintingResultsListener resultListener = new PrintingResultsListener(client.getConfig(), Format.TSV, VectorUtil.DEFAULT_COLUMN_WIDTH);
client.runQuery(type, query, resultListener);
@@ -182,7 +205,7 @@ public class BaseTestQuery extends ExecTest{
}
}
- protected void test(String query) throws Exception{
+ public static void test(String query) throws Exception{
String[] queries = query.split(";");
for (String q : queries) {
if (q.trim().isEmpty()) {
@@ -220,7 +243,7 @@ public class BaseTestQuery extends ExecTest{
test(getFile(file));
}
- protected String getFile(String resource) throws IOException{
+ public static String getFile(String resource) throws IOException{
URL url = Resources.getResource(resource);
if (url == null) {
throw new IOException(String.format("Unable to find path %s.", resource));
@@ -309,5 +332,4 @@ public class BaseTestQuery extends ExecTest{
return formattedResults.toString();
}
-
}
diff --git a/exec/java-exec/src/test/java/org/apache/drill/DrillTestWrapper.java b/exec/java-exec/src/test/java/org/apache/drill/DrillTestWrapper.java
new file mode 100644
index 000000000..f06203e5c
--- /dev/null
+++ b/exec/java-exec/src/test/java/org/apache/drill/DrillTestWrapper.java
@@ -0,0 +1,557 @@
+/*******************************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ ******************************************************************************/
+package org.apache.drill;
+
+import org.apache.drill.common.expression.SchemaPath;
+import org.apache.drill.common.types.TypeProtos;
+import org.apache.drill.exec.HyperVectorValueIterator;
+import org.apache.drill.exec.exception.SchemaChangeException;
+import org.apache.drill.exec.memory.BufferAllocator;
+import org.apache.drill.exec.proto.UserBitShared;
+import org.apache.drill.exec.proto.UserBitShared.QueryType;
+import org.apache.drill.exec.record.BatchSchema;
+import org.apache.drill.exec.record.HyperVectorWrapper;
+import org.apache.drill.exec.record.MaterializedField;
+import org.apache.drill.exec.record.RecordBatchLoader;
+import org.apache.drill.exec.record.VectorWrapper;
+import org.apache.drill.exec.rpc.user.QueryResultBatch;
+import org.apache.drill.exec.vector.ValueVector;
+import org.apache.hadoop.io.Text;
+
+import java.io.UnsupportedEncodingException;
+import java.lang.reflect.Array;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+
+/**
+ * An object to encapsulate the options for a Drill unit test, as well as the execution methods to perform the tests and
+ * validation of results.
+ *
+ * To construct an instance easily, look at the TestBuilder class. From an implementation of
+ * the BaseTestQuery class, and instance of the builder is accessible through the testBuilder() method.
+ */
+public class DrillTestWrapper {
+ static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(BaseTestQuery.class);
+
+ // TODO - when in JSON, read baseline in all text mode to avoid precision loss for decimal values
+
+ // This flag will enable all of the values that are validated to be logged. For large validations this is time consuming
+ // so this is not exposed in a way that it can be enabled for an individual test. It can be changed here while debugging
+ // a test to see all of the output, but as this framework is doing full validation, there is no reason to keep it on as
+ // it will only make the test slower.
+ private static boolean VERBOSE_DEBUG = false;
+
+ // The motivation behind the TestBuilder was to provide a clean API for test writers. The model is mostly designed to
+ // prepare all of the components necessary for running the tests, before the TestWrapper is initialized. There is however
+ // one case where the setup for the baseline is driven by the test query results, and this is implicit type enforcement
+ // for the baseline data. In this case there needs to be a call back into the TestBuilder once we know the type information
+ // from the test query.
+ private TestBuilder testBuilder;
+ // test query to run
+ private String query;
+ // The type of query provided
+ private UserBitShared.QueryType queryType;
+ // The type of query provided for the baseline
+ private UserBitShared.QueryType baselineQueryType;
+ // should ordering be enforced in the baseline check
+ private boolean ordered;
+ // TODO - implement this
+ private boolean approximateEquality;
+ private BufferAllocator allocator;
+ // queries to run before the baseline or test queries, can be used to set options
+ private String baselineOptionSettingQueries;
+ private String testOptionSettingQueries;
+ // two different methods are available for comparing ordered results, the default reads all of the records
+ // into giant lists of objects, like one giant on-heap batch of 'vectors'
+ // this flag enables the other approach which iterates through a hyper batch for the test query results and baseline
+ // while this does work faster and use less memory, it can be harder to debug as all of the elements are not in a
+ // single list
+ private boolean highPerformanceComparison;
+ // if the baseline is a single option test writers can provide the baseline values and columns
+ // without creating a file, these are provided to the builder in the baselineValues() and baselineColumns() methods
+ // and translated into a map in the builder
+ private List<Map> baselineRecords;
+
+ public DrillTestWrapper(TestBuilder testBuilder, BufferAllocator allocator, String query, QueryType queryType,
+ String baselineOptionSettingQueries, String testOptionSettingQueries,
+ QueryType baselineQueryType, boolean ordered, boolean approximateEquality,
+ boolean highPerformanceComparison, List<Map> baselineRecords) {
+ this.testBuilder = testBuilder;
+ this.allocator = allocator;
+ this.query = query;
+ this.queryType = queryType;
+ this.baselineQueryType = baselineQueryType;
+ this.ordered = ordered;
+ this.approximateEquality = approximateEquality;
+ this.baselineOptionSettingQueries = baselineOptionSettingQueries;
+ this.testOptionSettingQueries = testOptionSettingQueries;
+ this.highPerformanceComparison = highPerformanceComparison;
+ this.baselineRecords = baselineRecords;
+ }
+
+ public void run() throws Exception {
+ if (ordered) {
+ compareOrderedResults();
+ } else {
+ compareUnorderedResults();
+ }
+ }
+
+ private BufferAllocator getAllocator() {
+ return allocator;
+ }
+
+ private void compareHyperVectors(Map<String, HyperVectorValueIterator> expectedRecords,
+ Map<String, HyperVectorValueIterator> actualRecords) throws Exception {
+ for (String s : expectedRecords.keySet()) {
+ assertNotNull("Expected column '" + s + "' not found.", actualRecords.get(s));
+ assertEquals(expectedRecords.get(s).getTotalRecords(), actualRecords.get(s).getTotalRecords());
+ HyperVectorValueIterator expectedValues = expectedRecords.get(s);
+ HyperVectorValueIterator actualValues = actualRecords.get(s);
+ int i = 0;
+ while (expectedValues.hasNext()) {
+ compareValuesErrorOnMismatch(expectedValues.next(), actualValues.next(), i, s);
+ i++;
+ }
+ }
+ for (HyperVectorValueIterator hvi : expectedRecords.values()) {
+ for (ValueVector vv : hvi.getHyperVector().getValueVectors()) {
+ vv.clear();
+ }
+ }
+ for (HyperVectorValueIterator hvi : actualRecords.values()) {
+ for (ValueVector vv : hvi.getHyperVector().getValueVectors()) {
+ vv.clear();
+ }
+ }
+ }
+
+ private void compareMergedVectors(Map<String, List> expectedRecords, Map<String, List> actualRecords) throws Exception {
+
+ for (String s : actualRecords.keySet()) {
+ assertNotNull("Unexpected extra column " + s + " returned by query.", expectedRecords.get(s));
+ assertEquals("Incorrect number of rows returned by query.", expectedRecords.get(s).size(), actualRecords.get(s).size());
+ List expectedValues = expectedRecords.get(s);
+ List actualValues = actualRecords.get(s);
+ assertEquals("Different number of records returned", expectedValues.size(), actualValues.size());
+
+ for (int i = 0; i < expectedValues.size(); i++) {
+ compareValuesErrorOnMismatch(expectedValues.get(i), actualValues.get(i), i, s);
+ }
+ }
+ if (actualRecords.size() < expectedRecords.size()) {
+ throw new Exception(findMissingColumns(expectedRecords.keySet(), actualRecords.keySet()));
+ }
+ }
+
+ private Map<String, HyperVectorValueIterator> addToHyperVectorMap(List<QueryResultBatch> records, RecordBatchLoader loader,
+ BatchSchema schema) throws SchemaChangeException, UnsupportedEncodingException {
+ // TODO - this does not handle schema changes
+ Map<String, HyperVectorValueIterator> combinedVectors = new HashMap();
+
+ long totalRecords = 0;
+ QueryResultBatch batch;
+ int size = records.size();
+ for (int i = 0; i < size; i++) {
+ batch = records.get(i);
+ loader = new RecordBatchLoader(getAllocator());
+ loader.load(batch.getHeader().getDef(), batch.getData());
+ logger.debug("reading batch with " + loader.getRecordCount() + " rows, total read so far " + totalRecords);
+ totalRecords += loader.getRecordCount();
+ for (VectorWrapper w : loader) {
+ String field = w.getField().toExpr();
+ if (!combinedVectors.containsKey(field)) {
+ MaterializedField mf = w.getField();
+ ValueVector[] vvList = (ValueVector[]) Array.newInstance(mf.getValueClass(), 1);
+ vvList[0] = w.getValueVector();
+ combinedVectors.put(mf.getPath().toExpr(), new HyperVectorValueIterator(mf, new HyperVectorWrapper(mf,
+ vvList)));
+ } else {
+ combinedVectors.get(field).getHyperVector().addVector(w.getValueVector());
+ }
+
+ }
+ }
+ for (HyperVectorValueIterator hvi : combinedVectors.values()) {
+ hvi.determineTotalSize();
+ }
+ return combinedVectors;
+ }
+
+ /**
+ * Only use this method if absolutely needed. There are utility methods to compare results of single queries.
+ * The current use case for exposing this is setting session or system options between the test and verification
+ * queries.
+ *
+ * TODO - evaluate adding an interface to allow setting session and system options before running queries
+ * @param records
+ * @param loader
+ * @param schema
+ * @return
+ * @throws SchemaChangeException
+ * @throws UnsupportedEncodingException
+ */
+ private Map<String, List> addToCombinedVectorResults(List<QueryResultBatch> records, RecordBatchLoader loader,
+ BatchSchema schema) throws SchemaChangeException, UnsupportedEncodingException {
+ // TODO - this does not handle schema changes
+ Map<String, List> combinedVectors = new HashMap();
+
+ long totalRecords = 0;
+ QueryResultBatch batch;
+ int size = records.size();
+ for (int i = 0; i < size; i++) {
+ batch = records.get(0);
+ loader.load(batch.getHeader().getDef(), batch.getData());
+ if (schema == null) {
+ schema = loader.getSchema();
+ for (MaterializedField mf : schema) {
+ combinedVectors.put(mf.getPath().toExpr(), new ArrayList());
+ }
+ }
+ logger.debug("reading batch with " + loader.getRecordCount() + " rows, total read so far " + totalRecords);
+ totalRecords += loader.getRecordCount();
+ for (VectorWrapper w : loader) {
+ String field = w.getField().toExpr();
+ for (int j = 0; j < loader.getRecordCount(); j++) {
+ Object obj = w.getValueVector().getAccessor().getObject(j);
+ if (obj != null) {
+ if (obj instanceof Text) {
+ obj = obj.toString();
+ if (obj.equals("")) {
+ System.out.println(w.getField());
+ }
+ }
+ else if (obj instanceof byte[]) {
+ obj = new String((byte[]) obj, "UTF-8");
+ }
+ }
+ combinedVectors.get(field).add(obj);
+ }
+ }
+ records.remove(0);
+ batch.release();
+ loader.clear();
+ }
+ return combinedVectors;
+ }
+
+ /**
+ * Use this method only if necessary to validate one query against another. If you are just validating against a
+ * baseline file use one of the simpler interfaces that will write the validation query for you.
+ *
+ * @throws Exception
+ */
+ protected void compareUnorderedResults() throws Exception {
+ RecordBatchLoader loader = new RecordBatchLoader(getAllocator());
+ BatchSchema schema = null;
+
+ BaseTestQuery.test(testOptionSettingQueries);
+ List<QueryResultBatch> expected = BaseTestQuery.testRunAndReturn(queryType, query);
+
+ addTypeInfoIfMissing(expected.get(0), testBuilder);
+
+ List<Map> expectedRecords = new ArrayList<>();
+ addToMaterializedResults(expectedRecords, expected, loader, schema);
+
+ List<QueryResultBatch> results = new ArrayList();
+ List<Map> actualRecords = new ArrayList<>();
+ // If baseline data was not provided to the test builder directly, we must run a query for the baseline, this includes
+ // the cases where the baseline is stored in a file.
+ if (baselineRecords == null) {
+ BaseTestQuery.test(baselineOptionSettingQueries);
+ results = BaseTestQuery.testRunAndReturn(baselineQueryType, testBuilder.getValidationQuery());
+ addToMaterializedResults(actualRecords, results, loader, schema);
+ } else {
+ actualRecords = baselineRecords;
+ }
+
+ compareResults(expectedRecords, actualRecords);
+ cleanupBatches(expected, results);
+ }
+
+ /**
+ * Use this method only if necessary to validate one query against another. If you are just validating against a
+ * baseline file use one of the simpler interfaces that will write the validation query for you.
+ *
+ * @throws Exception
+ */
+ protected void compareOrderedResults() throws Exception {
+ if (highPerformanceComparison) {
+ if (baselineQueryType != null) {
+ throw new Exception("Cannot do a high performance comparison without using a baseline file");
+ }
+ compareResultsHyperVector();
+ } else {
+ compareMergedOnHeapVectors();
+ }
+ }
+
+ public void compareMergedOnHeapVectors() throws Exception {
+ RecordBatchLoader loader = new RecordBatchLoader(getAllocator());
+ BatchSchema schema = null;
+
+ BaseTestQuery.test(testOptionSettingQueries);
+ List<QueryResultBatch> results = BaseTestQuery.testRunAndReturn(queryType, query);
+ // To avoid extra work for test writers, types can optionally be inferred from the test query
+ addTypeInfoIfMissing(results.get(0), testBuilder);
+
+ Map<String, List> actualSuperVectors = addToCombinedVectorResults(results, loader, schema);
+
+ List<QueryResultBatch> expected = null;
+ Map<String, List> expectedSuperVectors = null;
+
+ // If baseline data was not provided to the test builder directly, we must run a query for the baseline, this includes
+ // the cases where the baseline is stored in a file.
+ if (baselineRecords == null) {
+ BaseTestQuery.test(baselineOptionSettingQueries);
+ expected = BaseTestQuery.testRunAndReturn(baselineQueryType, testBuilder.getValidationQuery());
+ expectedSuperVectors = addToCombinedVectorResults(expected, loader, schema);
+ } else {
+ // data is built in the TestBuilder in a row major format as it is provided by the user
+ // translate it here to vectorized, the representation expected by the ordered comparison
+ expectedSuperVectors = new HashMap();
+ expected = new ArrayList();
+ for (String s : ((Map<String, Object>)baselineRecords.get(0)).keySet()) {
+ expectedSuperVectors.put(s, new ArrayList());
+ }
+ for (Map<String, Object> m : baselineRecords) {
+ for (String s : m.keySet()) {
+ expectedSuperVectors.get(s).add(m.get(s));
+ }
+ }
+ }
+
+ compareMergedVectors(expectedSuperVectors, actualSuperVectors);
+
+ cleanupBatches(expected, results);
+ }
+
+ public void compareResultsHyperVector() throws Exception {
+ RecordBatchLoader loader = new RecordBatchLoader(getAllocator());
+ BatchSchema schema = null;
+
+ BaseTestQuery.test(testOptionSettingQueries);
+ List<QueryResultBatch> results = BaseTestQuery.testRunAndReturn(queryType, query);
+ // To avoid extra work for test writers, types can optionally be inferred from the test query
+ addTypeInfoIfMissing(results.get(0), testBuilder);
+
+ Map<String, HyperVectorValueIterator> actualSuperVectors = addToHyperVectorMap(results, loader, schema);
+
+ BaseTestQuery.test(baselineOptionSettingQueries);
+ List<QueryResultBatch> expected = BaseTestQuery.testRunAndReturn(baselineQueryType, testBuilder.getValidationQuery());
+
+ Map<String, HyperVectorValueIterator> expectedSuperVectors = addToHyperVectorMap(expected, loader, schema);
+
+ compareHyperVectors(expectedSuperVectors, actualSuperVectors);
+ cleanupBatches(results, expected);
+ }
+
+ private void addTypeInfoIfMissing(QueryResultBatch batch, TestBuilder testBuilder) {
+ if (! testBuilder.typeInfoSet()) {
+ Map<SchemaPath, TypeProtos.MajorType> typeMap = getTypeMapFromBatch(batch);
+ testBuilder.baselineTypes(typeMap);
+ }
+
+ }
+
+ private Map<SchemaPath, TypeProtos.MajorType> getTypeMapFromBatch(QueryResultBatch batch) {
+ Map<SchemaPath, TypeProtos.MajorType> typeMap = new HashMap();
+ for (int i = 0; i < batch.getHeader().getDef().getFieldCount(); i++) {
+ typeMap.put(MaterializedField.create(batch.getHeader().getDef().getField(i)).getPath(),
+ batch.getHeader().getDef().getField(i).getMajorType());
+ }
+ return typeMap;
+ }
+
+ private void cleanupBatches(List<QueryResultBatch>... results) {
+ for (List<QueryResultBatch> resultList : results ) {
+ for (QueryResultBatch result : resultList) {
+ result.release();
+ }
+ }
+ }
+
+ protected void addToMaterializedResults(List<Map> materializedRecords, List<QueryResultBatch> records, RecordBatchLoader loader,
+ BatchSchema schema) throws SchemaChangeException, UnsupportedEncodingException {
+ long totalRecords = 0;
+ QueryResultBatch batch;
+ int size = records.size();
+ for (int i = 0; i < size; i++) {
+ batch = records.get(0);
+ loader.load(batch.getHeader().getDef(), batch.getData());
+ if (schema == null) {
+ schema = loader.getSchema();
+ }
+ logger.debug("reading batch with " + loader.getRecordCount() + " rows, total read so far " + totalRecords);
+ totalRecords += loader.getRecordCount();
+ for (int j = 0; j < loader.getRecordCount(); j++) {
+ HashMap<String, Object> record = new HashMap<>();
+ for (VectorWrapper w : loader) {
+ Object obj = w.getValueVector().getAccessor().getObject(j);
+ if (obj != null) {
+ if (obj instanceof Text) {
+ obj = obj.toString();
+ if (obj.equals("")) {
+ System.out.println(w.getField());
+ }
+ }
+ else if (obj instanceof byte[]) {
+ obj = new String((byte[]) obj, "UTF-8");
+ }
+ record.put(w.getField().toExpr(), obj);
+ }
+ record.put(w.getField().toExpr(), obj);
+ }
+ materializedRecords.add(record);
+ }
+ records.remove(0);
+ batch.release();
+ loader.clear();
+ }
+ }
+
+ public boolean compareValuesErrorOnMismatch(Object expected, Object actual, int counter, String column) throws Exception {
+
+ if (compareValues(expected, actual, counter, column)) {
+ return true;
+ }
+ if (expected == null) {
+ throw new Exception("at position " + counter + " column '" + column + "' mismatched values, expected: null " +
+ "but received " + actual + "(" + actual.getClass().getSimpleName() + ")");
+ }
+ if (actual == null) {
+ throw new Exception("unexpected null at position " + counter + " column '" + column + "' should have been: " + expected);
+ }
+ if (actual instanceof byte[]) {
+ throw new Exception("at position " + counter + " column '" + column + "' mismatched values, expected: "
+ + new String((byte[])expected, "UTF-8") + " but received " + new String((byte[])actual, "UTF-8"));
+ }
+ if (!expected.equals(actual)) {
+ throw new Exception("at position " + counter + " column '" + column + "' mismatched values, expected: "
+ + expected + "(" + expected.getClass().getSimpleName() + ") but received " + actual + "(" + actual.getClass().getSimpleName() + ")");
+ }
+ return true;
+ }
+
+ public boolean compareValues(Object expected, Object actual, int counter, String column) throws Exception {
+ if (expected == null) {
+ if (actual == null) {
+ if (VERBOSE_DEBUG) {
+ logger.debug("(1) at position " + counter + " column '" + column + "' matched value: " + expected );
+ }
+ return true;
+ } else {
+ return false;
+ }
+ }
+ if (actual == null) {
+ return false;
+ }
+ if (actual instanceof byte[]) {
+ if ( ! Arrays.equals((byte[]) expected, (byte[]) actual)) {
+ return false;
+ } else {
+ if (VERBOSE_DEBUG) {
+ logger.debug("at position " + counter + " column '" + column + "' matched value " + new String((byte[])expected, "UTF-8"));
+ }
+ return true;
+ }
+ }
+ if (!expected.equals(actual)) {
+ return false;
+ } else {
+ if (VERBOSE_DEBUG) {
+ logger.debug("at position " + counter + " column '" + column + "' matched value: " + expected );
+ }
+ }
+ return true;
+ }
+
+ /**
+ * Compare two result sets, ignoring ordering.
+ *
+ * @param expectedRecords - list of records from baseline
+ * @param actualRecords - list of records from test query, WARNING - this list is destroyed in this method
+ * @throws Exception
+ */
+ private void compareResults(List<Map> expectedRecords, List<Map> actualRecords) throws Exception {
+
+ assertEquals("Different number of records returned", expectedRecords.size(), actualRecords.size());
+
+ String missing = "";
+ int i = 0;
+ int counter = 0;
+ boolean found;
+ for (Map<String, Object> expectedRecord : expectedRecords) {
+ i = 0;
+ found = false;
+ findMatch:
+ for (Map<String, Object> actualRecord : actualRecords) {
+ for (String s : actualRecord.keySet()) {
+ if (!expectedRecord.containsKey(s)) {
+ throw new Exception("Unexpected column '" + s + "' returned by query.");
+ }
+ if ( ! compareValues(expectedRecord.get(s), actualRecord.get(s), counter, s)) {
+ i++;
+ continue findMatch;
+ }
+ }
+ if (actualRecord.size() < expectedRecord.size()) {
+ throw new Exception(findMissingColumns(expectedRecord.keySet(), actualRecord.keySet()));
+ }
+ found = true;
+ break;
+ }
+ if (!found) {
+ throw new Exception("Did not find expected record in result set: " + printRecord(expectedRecord));
+ } else {
+ actualRecords.remove(i);
+ counter++;
+ }
+ }
+ logger.debug(missing);
+ System.out.println(missing);
+ assertEquals(0, actualRecords.size());
+ }
+
+ private String findMissingColumns(Set<String> expected, Set<String> actual) {
+ String missingCols = "";
+ for (String colName : expected) {
+ if (!actual.contains(colName)) {
+ missingCols += colName + ", ";
+ }
+ }
+ return "Expected column(s) " + missingCols + " not found in result set.";
+ }
+
+ private String printRecord(Map<String, Object> record) {
+ String ret = "";
+ for (String s : record.keySet()) {
+ ret += s + " : " + record.get(s) + ", ";
+ }
+ return ret + "\n";
+ }
+
+}
diff --git a/exec/java-exec/src/test/java/org/apache/drill/TestAltSortQueries.java b/exec/java-exec/src/test/java/org/apache/drill/TestAltSortQueries.java
index 57689080e..a554db2a2 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/TestAltSortQueries.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/TestAltSortQueries.java
@@ -41,9 +41,17 @@ public class TestAltSortQueries extends BaseTestQuery{
test("select employee_id, first_name, last_name from cp.`employee.json` order by employee_id limit 5 ");
}
+ // TODO - This is currently passing but I think that it is still in error,
+ // the new verification for this test was written against the output that was previously not being checked
+ // It looks like there is an off by one error in the results, see the baseline file for the current results
@Test
public void testSelectWithLimitOffset() throws Exception{
- test("select employee_id, first_name, last_name from cp.`employee.json` order by employee_id limit 5 offset 10 ");
+ testBuilder()
+ .sqlQuery("select employee_id, first_name, last_name from cp.`employee.json` order by employee_id limit 5 offset 10 ")
+ .ordered()
+ .csvBaselineFile("sort/testSelectWithLimitOffset.tsv")
+ .baselineColumns("employee_id", "first_name", "last_name")
+ .build().run();
}
@Test
diff --git a/exec/java-exec/src/test/java/org/apache/drill/TestBuilder.java b/exec/java-exec/src/test/java/org/apache/drill/TestBuilder.java
new file mode 100644
index 000000000..9334c6e84
--- /dev/null
+++ b/exec/java-exec/src/test/java/org/apache/drill/TestBuilder.java
@@ -0,0 +1,499 @@
+/*******************************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ ******************************************************************************/
+package org.apache.drill;
+
+import com.google.common.base.Joiner;
+import org.antlr.runtime.ANTLRStringStream;
+import org.antlr.runtime.CommonTokenStream;
+import org.antlr.runtime.RecognitionException;
+import org.apache.drill.common.expression.SchemaPath;
+import org.apache.drill.common.expression.parser.ExprLexer;
+import org.apache.drill.common.expression.parser.ExprParser;
+import org.apache.drill.common.types.TypeProtos;
+import org.apache.drill.common.types.Types;
+import org.apache.drill.exec.memory.BufferAllocator;
+import org.apache.drill.exec.proto.UserBitShared;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import static org.junit.Assert.assertEquals;
+
+public class TestBuilder {
+
+ // test query to run
+ private String query;
+ // the type of query for the test
+ private UserBitShared.QueryType queryType;
+ // should the validation enforce ordering
+ private Boolean ordered;
+ private boolean approximateEquality;
+ private BufferAllocator allocator;
+ // Used to pass the type information associated with particular column names rather than relying on the
+ // ordering of the columns in the CSV file, or the default type inferences when reading JSON, this is used for the
+ // case where results of the test query are adding type casts to the baseline queries, this saves a little bit of
+ // setup in cases where strict type enforcement is not necessary for a given test
+ protected Map<SchemaPath, TypeProtos.MajorType> baselineTypeMap;
+ // queries to run before the baseline or test queries, can be used to set options
+ private String baselineOptionSettingQueries;
+ private String testOptionSettingQueries;
+ // two different methods are available for comparing ordered results, the default reads all of the records
+ // into giant lists of objects, like one giant on-heap batch of 'vectors'
+ // this flag enables the other approach which iterates through a hyper batch for the test query results and baseline
+ // while this does work faster and use less memory, it can be harder to debug as all of the elements are not in a
+ // single list
+ private boolean highPerformanceComparison;
+ // for cases where the result set is just a single record, test writers can avoid creating a lot of small baseline
+ // files by providing a list of baseline values
+ private Object[] baselineValues;
+ // column names for use with the baseline values
+ protected String[] baselineColumns;
+ // In cases where we need to verify larger datasets without the risk of running the baseline data through
+ // the drill engine, results can be provided in a list of maps. While this model does make a lot of sense, there is a
+ // lot of work to make the type handling/casting work correctly, and making robust complex type handling work completely outside
+ // of the drill engine for generating baselines would likely be more work than it would be worth. For now we will be
+ // going with an approach of using this facility to validate the parts of the drill engine that could break in ways
+ // that would affect the reading of baseline files (i.e. we need robust test for storage engines, project and casting that
+ // use this interface) and then rely on the engine for the rest of the tests that will use the baseline queries.
+ private List<Map> baselineRecords;
+
+ public TestBuilder(BufferAllocator allocator) {
+ this.allocator = allocator;
+ reset();
+ }
+
+ public TestBuilder(BufferAllocator allocator, String query, UserBitShared.QueryType queryType, Boolean ordered,
+ boolean approximateEquality, Map<SchemaPath, TypeProtos.MajorType> baselineTypeMap,
+ String baselineOptionSettingQueries, String testOptionSettingQueries, boolean highPerformanceComparison) {
+ this(allocator);
+ if (ordered == null) {
+ throw new RuntimeException("Ordering not set, when using a baseline file or query you must explicitly call the ordered() or unOrdered() method on the " + this.getClass().getSimpleName());
+ }
+ this.query = query;
+ this.queryType = queryType;
+ this.ordered = ordered;
+ this.approximateEquality = approximateEquality;
+ this.baselineTypeMap = baselineTypeMap;
+ this.baselineOptionSettingQueries = baselineOptionSettingQueries;
+ this.testOptionSettingQueries = testOptionSettingQueries;
+ this.highPerformanceComparison = highPerformanceComparison;
+ }
+
+ protected TestBuilder reset() {
+ query = "";
+ ordered = null;
+ approximateEquality = false;
+ highPerformanceComparison = false;
+ testOptionSettingQueries = "";
+ baselineOptionSettingQueries = "";
+ baselineRecords = null;
+ return this;
+ }
+
+ public DrillTestWrapper build() throws Exception {
+ if ( ! ordered && highPerformanceComparison ) {
+ throw new Exception("High performance comparison only available for ordered checks, to enforce this restriction, ordered() must be called first.");
+ }
+ return new DrillTestWrapper(this, allocator, query, queryType, baselineOptionSettingQueries, testOptionSettingQueries,
+ getValidationQueryType(), ordered, approximateEquality, highPerformanceComparison, baselineRecords);
+ }
+
+ public TestBuilder sqlQuery(String query) {
+ this.query = query;
+ this.queryType = UserBitShared.QueryType.SQL;
+ return this;
+ }
+
+ public TestBuilder sqlQueryFromFile(String queryFile) throws IOException {
+ String query = BaseTestQuery.getFile(queryFile);
+ this.query = query;
+ this.queryType = UserBitShared.QueryType.SQL;
+ return this;
+ }
+
+ public TestBuilder physicalPlanFromFile(String queryFile) throws IOException {
+ String query = BaseTestQuery.getFile(queryFile);
+ this.query = query;
+ this.queryType = UserBitShared.QueryType.PHYSICAL;
+ return this;
+ }
+
+ public TestBuilder ordered() {
+ this.ordered = true;
+ return this;
+ }
+
+ public TestBuilder unOrdered() {
+ this.ordered = false;
+ return this;
+ }
+
+ // this can only be used with ordered verifications, it does run faster and use less memory but may be
+ // a little harder to debug as it iterates over a hyper batch rather than reading all of the values into
+ // large on-heap lists
+ public TestBuilder highPerformanceComparison() throws Exception {
+ this.highPerformanceComparison = true;
+ return this;
+ }
+
+ // list of queries to run before the baseline query, can be used to set several options
+ // list takes the form of a semi-colon separated list
+ public TestBuilder optionSettingQueriesForBaseline(String queries) {
+ this.baselineOptionSettingQueries = queries;
+ return this;
+ }
+
+ // list of queries to run before the test query, can be used to set several options
+ // list takes the form of a semi-colon separated list
+ public TestBuilder optionSettingQueriesForTestQuery(String queries) {
+ this.testOptionSettingQueries = queries;
+ return this;
+ }
+ public TestBuilder approximateEquality() {
+ this.approximateEquality = true;
+ return this;
+ }
+
+ // modified code from SchemaPath.De class. This should be used sparingly and only in tests if absolutely needed.
+ public static SchemaPath parsePath(String path) {
+ try {
+ // logger.debug("Parsing expression string '{}'", expr);
+ ExprLexer lexer = new ExprLexer(new ANTLRStringStream(path));
+ CommonTokenStream tokens = new CommonTokenStream(lexer);
+ ExprParser parser = new ExprParser(tokens);
+
+ //TODO: move functionregistry and error collector to injectables.
+ //ctxt.findInjectableValue(valueId, forProperty, beanInstance)
+ ExprParser.parse_return ret = parser.parse();
+
+ // ret.e.resolveAndValidate(expr, errorCollector);
+ if (ret.e instanceof SchemaPath) {
+ return (SchemaPath) ret.e;
+ } else {
+ throw new IllegalStateException("Schema path is not a valid format.");
+ }
+ } catch (RecognitionException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ String getValidationQuery() throws Exception {
+ throw new RuntimeException("Must provide some kind of baseline, either a baseline file or another query");
+ }
+
+ protected UserBitShared.QueryType getValidationQueryType() throws Exception {
+ if (singleExplicitBaselineRecord()) {
+ return null;
+ }
+ throw new RuntimeException("Must provide some kind of baseline, either a baseline file or another query");
+ }
+
+ public JSONTestBuilder jsonBaselineFile(String filePath) {
+ return new JSONTestBuilder(filePath, allocator, query, queryType, ordered, approximateEquality,
+ baselineTypeMap, baselineOptionSettingQueries, testOptionSettingQueries, highPerformanceComparison);
+ }
+
+ public CSVTestBuilder csvBaselineFile(String filePath) {
+ return new CSVTestBuilder(filePath, allocator, query, queryType, ordered, approximateEquality,
+ baselineTypeMap, baselineOptionSettingQueries, testOptionSettingQueries, highPerformanceComparison);
+ }
+
+ public TestBuilder baselineTypes(Map<SchemaPath, TypeProtos.MajorType> baselineTypeMap) {
+ this.baselineTypeMap = baselineTypeMap;
+ return this;
+ }
+
+ boolean typeInfoSet() {
+ if (baselineTypeMap != null) {
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ // indicate that the tests query should be checked for an empty result set
+ public TestBuilder expectsEmptyResultSet() {
+ unOrdered();
+ baselineRecords = new ArrayList();
+ return this;
+ }
+
+ /**
+ * This method is used to pass in a simple list of values for a single record verification without
+ * the need to create a CSV or JSON file to store the baseline.
+ *
+ * This can be called repeatedly to pass a list of records to verify. It works for both ordered and unordered
+ * checks.
+ *
+ * @param baselineValues - the baseline values to validate
+ * @return
+ */
+ public TestBuilder baselineValues(Object ... baselineValues) {
+ if (ordered == null) {
+ throw new RuntimeException("Ordering not set, before specifying baseline data you must explicitly call the ordered() or unOrdered() method on the " + this.getClass().getSimpleName());
+ }
+ if (baselineRecords == null) {
+ baselineRecords = new ArrayList();
+ }
+ Map<String, Object> ret = new HashMap();
+ int i = 0;
+ assertEquals("Must supply the same number of baseline values as columns.", baselineValues.length, baselineColumns.length);
+ for (String s : baselineColumns) {
+ ret.put(s, baselineValues[i]);
+ i++;
+ }
+ this.baselineRecords.add(ret);
+ return this;
+ }
+
+ /**
+ * This can be used in cases where we want to avoid issues with the assumptions made by the test framework.
+ * Most of the methods for verification in the framework run drill queries to generate the read baseline files or
+ * execute alternative baseline queries. This model relies on basic functionality of reading files with storage
+ * plugins and applying casts/projects to be stable.
+ *
+ * This method can be used to verify the engine for these cases and any other future execution paths that would
+ * be used by both the test query and baseline. Without tests like this it is possible that some tests
+ * could falsely report as passing, as both the test query and baseline query could run into the same problem
+ * with an assumed stable code path and produce the same erroneous result.
+ *
+ * @param materializedRecords - a list of maps representing materialized results
+ * @return
+ */
+ public TestBuilder baselineRecords(List<Map> materializedRecords) {
+ this.baselineRecords = materializedRecords;
+ return this;
+ }
+
+ /**
+ * This setting has a slightly different impact on the test depending on some of the other
+ * configuration options are set.
+ *
+ * If a JSON baseline file is given, this list will act as a project list to verify the
+ * test query against a subset of the columns in the file.
+ *
+ * For a CSV baseline file, these will act as aliases for columns [0 .. n] in the repeated
+ * varchar column that is read out of CSV.
+ *
+ * For a baseline sql query, this currently has no effect.
+ *
+ * For explicit baseline values given in java code with the baselineValues() method, these will
+ * be used to create a map for the one record verification.
+ */
+ public TestBuilder baselineColumns(String... columns) {
+ for (int i = 0; i < columns.length; i++) {
+ columns[i] = parsePath(columns[i]).toExpr();
+ }
+ this.baselineColumns = columns;
+ return this;
+ }
+
+ private boolean singleExplicitBaselineRecord() {
+ return baselineRecords != null;
+ }
+
+ // provide a SQL query to validate against
+ public BaselineQueryTestBuilder sqlBaselineQuery(String baselineQuery) {
+ return new BaselineQueryTestBuilder(baselineQuery, UserBitShared.QueryType.SQL, allocator, query, queryType, ordered, approximateEquality,
+ baselineTypeMap, baselineOptionSettingQueries, testOptionSettingQueries, highPerformanceComparison);
+ }
+
+ // provide a path to a file containing a SQL query to use as a baseline
+ public BaselineQueryTestBuilder sqlBaselineQueryFromFile(String baselineQueryFilename) throws IOException {
+ String baselineQuery = BaseTestQuery.getFile(baselineQueryFilename);
+ return new BaselineQueryTestBuilder(baselineQuery, UserBitShared.QueryType.SQL, allocator, query, queryType, ordered, approximateEquality,
+ baselineTypeMap, baselineOptionSettingQueries, testOptionSettingQueries, highPerformanceComparison);
+ }
+
+ // as physical plans are verbose, this is the only option provided for specifying them, we should enforce
+ // that physical plans, or any large JSON strings do not live in the Java source as literals
+ public BaselineQueryTestBuilder physicalPlanBaselineQueryFromFile(String baselinePhysicalPlanPath) throws IOException {
+ String baselineQuery = BaseTestQuery.getFile(baselinePhysicalPlanPath);
+ return new BaselineQueryTestBuilder(baselineQuery, UserBitShared.QueryType.PHYSICAL, allocator, query, queryType, ordered, approximateEquality,
+ baselineTypeMap, baselineOptionSettingQueries, testOptionSettingQueries, highPerformanceComparison);
+ }
+
+ private String getDecimalPrecisionScaleInfo(TypeProtos.MajorType type) {
+ String precision = "";
+ switch(type.getMinorType()) {
+ case DECIMAL18:
+ case DECIMAL28SPARSE:
+ case DECIMAL38SPARSE:
+ case DECIMAL38DENSE:
+ case DECIMAL28DENSE:
+ case DECIMAL9:
+ precision = String.format("(%d,%d)", type.getScale(), type.getPrecision());
+ break;
+ default:
+ ; // do nothing empty string set above
+ }
+ return precision;
+ }
+
+ public class CSVTestBuilder extends TestBuilder {
+
+ // path to the baseline file that will be inserted into the validation query
+ private String baselineFilePath;
+ // use to cast the baseline file columns, if not set the types
+ // that come out of the test query drive interpretation of baseline
+ private TypeProtos.MajorType[] baselineTypes;
+
+ CSVTestBuilder(String baselineFile, BufferAllocator allocator, String query, UserBitShared.QueryType queryType, Boolean ordered,
+ boolean approximateEquality, Map<SchemaPath, TypeProtos.MajorType> baselineTypeMap,
+ String baselineOptionSettingQueries, String testOptionSettingQueries, boolean highPerformanceComparison) {
+ super(allocator, query, queryType, ordered, approximateEquality, baselineTypeMap, baselineOptionSettingQueries, testOptionSettingQueries,
+ highPerformanceComparison);
+ this.baselineFilePath = baselineFile;
+ }
+
+ public CSVTestBuilder baselineTypes(TypeProtos.MajorType... baselineTypes) {
+ this.baselineTypes = baselineTypes;
+ this.baselineTypeMap = null;
+ return this;
+ }
+
+ // convenience method to convert minor types to major types if no decimals with precisions are needed
+ public CSVTestBuilder baselineTypes(TypeProtos.MinorType ... baselineTypes) {
+ TypeProtos.MajorType[] majorTypes = new TypeProtos.MajorType[baselineTypes.length];
+ int i = 0;
+ for(TypeProtos.MinorType minorType : baselineTypes) {
+ majorTypes[i] = Types.required(minorType);
+ i++;
+ }
+ this.baselineTypes = majorTypes;
+ this.baselineTypeMap = null;
+ return this;
+ }
+
+ protected TestBuilder reset() {
+ super.reset();
+ baselineTypeMap = null;
+ baselineTypes = null;
+ baselineFilePath = null;
+ return this;
+ }
+
+ boolean typeInfoSet() {
+ if (super.typeInfoSet() || baselineTypes != null) {
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ String getValidationQuery() throws Exception {
+ if (baselineColumns.length == 0) {
+ throw new Exception("Baseline CSV files require passing column names, please call the baselineColumns() method on the test builder.");
+ }
+
+ if (baselineTypes != null) {
+ assertEquals("Must pass the same number of types as column names if types are provided.", baselineTypes.length, baselineColumns.length);
+ }
+
+ String[] aliasedExpectedColumns = new String[baselineColumns.length];
+ for (int i = 0; i < baselineColumns.length; i++) {
+ aliasedExpectedColumns[i] = "columns[" + i + "] ";
+ TypeProtos.MajorType majorType;
+ if (baselineTypes != null) {
+ majorType = baselineTypes[i];
+ } else if (baselineTypeMap != null) {
+ majorType = baselineTypeMap.get(parsePath(baselineColumns[i]));
+ } else {
+ throw new Exception("Type information not set for interpreting csv baseline file.");
+ }
+ String precision = getDecimalPrecisionScaleInfo(majorType);
+ // TODO - determine if there is a better behavior here, if we do not specify a length the default behavior is
+ // to cast to varchar with length 1
+ // set default cast size for varchar, the cast function will take the lesser of this passed value and the
+ // length of the incoming data when choosing the length for the outgoing data
+ if (majorType.getMinorType() == TypeProtos.MinorType.VARCHAR ||
+ majorType.getMinorType() == TypeProtos.MinorType.VARBINARY) {
+ precision = "(65000)";
+ }
+ aliasedExpectedColumns[i] = "cast(" + aliasedExpectedColumns[i] + " as " +
+ Types.getNameOfMinorType(majorType.getMinorType()) + precision + " ) " + baselineColumns[i];
+ }
+ String query = "select " + Joiner.on(", ").join(aliasedExpectedColumns) + " from cp.`" + baselineFilePath + "`";
+ return query;
+ }
+
+ protected UserBitShared.QueryType getValidationQueryType() throws Exception {
+ return UserBitShared.QueryType.SQL;
+ }
+
+ }
+
+ public class JSONTestBuilder extends TestBuilder {
+
+ // path to the baseline file that will be inserted into the validation query
+ private String baselineFilePath;
+
+ JSONTestBuilder(String baselineFile, BufferAllocator allocator, String query, UserBitShared.QueryType queryType, Boolean ordered,
+ boolean approximateEquality, Map<SchemaPath, TypeProtos.MajorType> baselineTypeMap,
+ String baselineOptionSettingQueries, String testOptionSettingQueries, boolean highPerformanceComparison) {
+ super(allocator, query, queryType, ordered, approximateEquality, baselineTypeMap, baselineOptionSettingQueries, testOptionSettingQueries,
+ highPerformanceComparison);
+ this.baselineFilePath = baselineFile;
+ this.baselineColumns = new String[] {"*"};
+ }
+
+ String getValidationQuery() {
+ return "select " + Joiner.on(", ").join(baselineColumns) + " from cp.`" + baselineFilePath + "`";
+ }
+
+ protected UserBitShared.QueryType getValidationQueryType() throws Exception {
+ return UserBitShared.QueryType.SQL;
+ }
+
+ }
+
+ public class BaselineQueryTestBuilder extends TestBuilder {
+
+ private String baselineQuery;
+ private UserBitShared.QueryType baselineQueryType;
+
+ BaselineQueryTestBuilder(String baselineQuery, UserBitShared.QueryType baselineQueryType, BufferAllocator allocator,
+ String query, UserBitShared.QueryType queryType, Boolean ordered,
+ boolean approximateEquality, Map<SchemaPath, TypeProtos.MajorType> baselineTypeMap,
+ String baselineOptionSettingQueries, String testOptionSettingQueries, boolean highPerformanceComparison) {
+ super(allocator, query, queryType, ordered, approximateEquality, baselineTypeMap, baselineOptionSettingQueries, testOptionSettingQueries,
+ highPerformanceComparison);
+ this.baselineQuery = baselineQuery;
+ this.baselineQueryType = baselineQueryType;
+ }
+
+ String getValidationQuery() {
+ return baselineQuery;
+ }
+
+ protected UserBitShared.QueryType getValidationQueryType() throws Exception {
+ return baselineQueryType;
+ }
+
+ // This currently assumes that all explicit baseline queries will have fully qualified type information
+ // if this changes, the baseline query can be run in a sub query with the implicit or explicit type passing
+ // added on top of it, as is currently when done when reading a baseline file
+ boolean typeInfoSet() {
+ return true;
+ }
+
+ }
+}
diff --git a/exec/java-exec/src/test/java/org/apache/drill/TestTestFramework.java b/exec/java-exec/src/test/java/org/apache/drill/TestTestFramework.java
new file mode 100644
index 000000000..eaaae6b0f
--- /dev/null
+++ b/exec/java-exec/src/test/java/org/apache/drill/TestTestFramework.java
@@ -0,0 +1,397 @@
+/*******************************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ ******************************************************************************/
+package org.apache.drill;
+
+import org.apache.drill.common.expression.SchemaPath;
+import org.apache.drill.common.types.MinorType;
+import org.apache.drill.common.types.TypeProtos;
+import org.apache.drill.common.types.Types;
+import org.apache.drill.exec.util.JsonStringArrayList;
+import org.apache.drill.exec.util.JsonStringHashMap;
+import org.junit.Ignore;
+import org.junit.Test;
+
+import java.math.BigDecimal;
+import java.util.HashMap;
+import java.util.Map;
+
+import static org.junit.Assert.assertEquals;
+
+// TODO - update framework to remove any dependency on the Drill engine for reading baseline result sets
+// currently using it with the assumption that the csv and json readers are well tested, and handling diverse
+// types in the test framework would require doing some redundant work to enable casting outside of Drill or
+// some better tooling to generate parquet files that have all of the parquet types
+public class TestTestFramework extends BaseTestQuery{
+
+ private static String CSV_COLS = " cast(columns[0] as bigint) employee_id, columns[1] as first_name, columns[2] as last_name ";
+
+ @Test
+ public void testCSVVerification() throws Exception {
+ testBuilder()
+ .sqlQuery("select employee_id, first_name, last_name from cp.`testframework/small_test_data.json`")
+ .ordered()
+ .csvBaselineFile("testframework/small_test_data.tsv")
+ .baselineTypes(TypeProtos.MinorType.BIGINT, TypeProtos.MinorType.VARCHAR, TypeProtos.MinorType.VARCHAR)
+ .baselineColumns("employee_id", "first_name", "last_name")
+ .build().run();
+ }
+
+ @Test
+ public void testBaselineValsVerification() throws Exception {
+ testBuilder()
+ .sqlQuery("select employee_id, first_name, last_name from cp.`testframework/small_test_data.json` limit 1")
+ .ordered()
+ .baselineColumns("employee_id", "first_name", "last_name")
+ .baselineValues(12l, "Jewel", "Creek")
+ .build().run();
+
+ testBuilder()
+ .sqlQuery("select employee_id, first_name, last_name from cp.`testframework/small_test_data.json` limit 1")
+ .unOrdered()
+ .baselineColumns("employee_id", "first_name", "last_name")
+ .baselineValues(12l, "Jewel", "Creek")
+ .build().run();
+ }
+
+ @Ignore("Drill 1737")
+ @Test
+ public void testDecimalBaseline() throws Exception {
+ // type information can be provided explicitly
+ testBuilder()
+ .sqlQuery("select cast(dec_col as decimal(38,2)) dec_col from cp.`testframework/decimal_test.json`")
+ .unOrdered()
+ .csvBaselineFile("testframework/decimal_test.tsv")
+ .baselineTypes(Types.withScaleAndPrecision(TypeProtos.MinorType.DECIMAL38SPARSE, TypeProtos.DataMode.REQUIRED, 38, 2))
+ .baselineColumns("dec_col")
+ .build().run();
+
+ // TODO - re-enable once DRILL-1737 is fixed
+ // type information can also be left out, this will prompt the result types of the test query to drive the
+ // interpretation of the test file
+// testBuilder()
+// .sqlQuery("select cast(dec_col as decimal(38,2)) dec_col from cp.`testframework/decimal_test.json`")
+// .unOrdered()
+// .csvBaselineFile("testframework/decimal_test.tsv")
+// .baselineColumns("dec_col")
+// .build().run();
+
+ // Or you can provide explicit values to the builder itself to avoid going through the drill engine at all to
+ // populate the baseline results
+ testBuilder()
+ .sqlQuery("select cast(dec_col as decimal(38,2)) dec_col from cp.`testframework/decimal_test.json`")
+ .unOrdered()
+ .baselineColumns("dec_col")
+ .baselineValues(new BigDecimal("3.70"))
+ .build().run();
+
+ }
+
+ @Test
+ public void testBaselineValsVerificationWithNulls() throws Exception {
+ testBuilder()
+ .sqlQuery("select * from cp.`store/json/json_simple_with_null.json`")
+ .ordered()
+ .baselineColumns("a", "b")
+ .baselineValues(5l, 10l)
+ .baselineValues(7l, null)
+ .baselineValues(null, null)
+ .baselineValues(9l, 11l)
+ .build().run();
+
+ testBuilder()
+ .sqlQuery("select * from cp.`store/json/json_simple_with_null.json`")
+ .unOrdered()
+ .baselineColumns("a", "b")
+ .baselineValues(5l, 10l)
+ .baselineValues(9l, 11l)
+ .baselineValues(7l, null)
+ .baselineValues(null, null)
+ .build().run();
+ }
+
+ @Test
+ public void testBaselineValsVerificationWithComplexAndNulls() throws Exception {
+ JsonStringArrayList list = new JsonStringArrayList();
+ JsonStringArrayList innerList1 = new JsonStringArrayList();
+ innerList1.add(2l);
+ innerList1.add(1l);
+ JsonStringArrayList innerList2 = new JsonStringArrayList();
+ innerList2.add(4l);
+ innerList2.add(6l);
+ list.add(innerList1);
+ list.add(innerList2);
+
+ JsonStringArrayList l_list = new JsonStringArrayList();
+ l_list.add(4l);
+ l_list.add(2l);
+
+ JsonStringHashMap x = new JsonStringHashMap();
+ x.put("y", "kevin");
+ x.put("z", "paul");
+
+ // [{"orange":"yellow","pink":"red"},{"pink":"purple"}]
+ JsonStringArrayList z = new JsonStringArrayList();
+ JsonStringHashMap z_1 = new JsonStringHashMap();
+ z_1.put("orange", "yellow");
+ z_1.put("pink", "red");
+
+ JsonStringHashMap z_2 = new JsonStringHashMap();
+ z_2.put("pink", "purple");
+ z.add(z_1);
+ z.add(z_2);
+
+ testBuilder()
+ .sqlQuery("select * from cp.`/jsoninput/input2.json` limit 1")
+ .ordered()
+ .baselineColumns("integer", "float", "x", "z", "l", "rl")
+ .baselineValues(2010l, 17.4, x, z, l_list, list)
+ .build().run();
+ }
+
+ @Test
+ public void testCSVVerification_missing_records_fails() throws Exception {
+ try {
+ testBuilder()
+ .sqlQuery("select employee_id, first_name, last_name from cp.`testframework/small_test_data.json`")
+ .ordered()
+ .csvBaselineFile("testframework/small_test_data_extra.tsv")
+ .baselineTypes(TypeProtos.MinorType.BIGINT, TypeProtos.MinorType.VARCHAR, TypeProtos.MinorType.VARCHAR)
+ .baselineColumns("employee_id", "first_name", "last_name")
+ .build().run();
+ } catch (AssertionError ex) {
+ assertEquals("Incorrect number of rows returned by query. expected:<7> but was:<5>", ex.getMessage());
+ // this indicates successful completion of the test
+ return;
+ }
+ throw new Exception("Test framework verification failed, expected failure on missing records.");
+ }
+
+ @Test
+ public void testCSVVerification_extra_records_fails() throws Exception {
+ try {
+ testBuilder()
+ .sqlQuery("select " + CSV_COLS + " from cp.`testframework/small_test_data_extra.tsv`")
+ .ordered()
+ .csvBaselineFile("testframework/small_test_data.tsv")
+ .baselineTypes(TypeProtos.MinorType.BIGINT, TypeProtos.MinorType.VARCHAR, TypeProtos.MinorType.VARCHAR)
+ .baselineColumns("employee_id", "first_name", "last_name")
+ .build().run();
+ } catch (AssertionError ex) {
+ assertEquals("Incorrect number of rows returned by query. expected:<5> but was:<7>", ex.getMessage());
+ // this indicates successful completion of the test
+ return;
+ }
+ throw new Exception("Test framework verification failed, expected failure for extra records.");
+ }
+
+ @Test
+ public void testCSVVerification_extra_column_fails() throws Exception {
+ try {
+ testBuilder()
+ .sqlQuery("select " + CSV_COLS + ", columns[3] as address from cp.`testframework/small_test_data_extra_col.tsv`")
+ .ordered()
+ .csvBaselineFile("testframework/small_test_data.tsv")
+ .baselineTypes(TypeProtos.MinorType.BIGINT, TypeProtos.MinorType.VARCHAR, TypeProtos.MinorType.VARCHAR)
+ .baselineColumns("employee_id", "first_name", "last_name")
+ .build().run();
+ } catch (AssertionError ex) {
+ assertEquals("Unexpected extra column `address` returned by query.", ex.getMessage());
+ // this indicates successful completion of the test
+ return;
+ }
+ throw new Exception("Test framework verification failed, expected failure on extra column.");
+ }
+
+ @Test
+ public void testCSVVerification_missing_column_fails() throws Exception {
+ try {
+ testBuilder()
+ .sqlQuery("select employee_id, first_name, last_name from cp.`testframework/small_test_data.json`")
+ .ordered()
+ .csvBaselineFile("testframework/small_test_data_extra_col.tsv")
+ .baselineTypes(TypeProtos.MinorType.BIGINT, TypeProtos.MinorType.VARCHAR, TypeProtos.MinorType.VARCHAR, TypeProtos.MinorType.VARCHAR)
+ .baselineColumns("employee_id", "first_name", "last_name", "address")
+ .build().run();
+ } catch (Exception ex) {
+ assertEquals("Expected column(s) `address`, not found in result set.", ex.getMessage());
+ // this indicates successful completion of the test
+ return;
+ }
+ throw new Exception("Test framework verification failed, expected failure on missing column.");
+ }
+
+ @Test
+ public void testCSVVerificationOfTypes() throws Throwable {
+ try {
+ testBuilder()
+ .sqlQuery("select employee_id, first_name, last_name from cp.`testframework/small_test_data.json`")
+ .ordered()
+ .csvBaselineFile("testframework/small_test_data.tsv")
+ .baselineTypes(TypeProtos.MinorType.INT, TypeProtos.MinorType.VARCHAR, TypeProtos.MinorType.VARCHAR)
+ .baselineColumns("employee_id", "first_name", "last_name")
+ .build().run();
+ } catch (Exception ex) {
+ assertEquals("at position 0 column '`employee_id`' mismatched values, expected: 12(Integer) but received 12(Long)", ex.getMessage());
+ // this indicates successful completion of the test
+ return;
+ }
+ throw new Exception("Test framework verification failed, expected failure on type check.");
+ }
+
+ @Test
+ public void testCSVVerificationOfOrder_checkFailure() throws Throwable {
+ try {
+ testBuilder()
+ .sqlQuery("select columns[0] as employee_id, columns[1] as first_name, columns[2] as last_name from cp.`testframework/small_test_data_reordered.tsv`")
+ .ordered()
+ .csvBaselineFile("testframework/small_test_data.tsv")
+ .baselineColumns("employee_id", "first_name", "last_name")
+ .build().run();
+ } catch (Exception ex) {
+ assertEquals("at position 0 column '`first_name`' mismatched values, expected: Jewel(String) but received Peggy(String)", ex.getMessage());
+ // this indicates successful completion of the test
+ return;
+ }
+ throw new Exception("Test framework verification failed, expected failure on order check.");
+ }
+
+ @Test
+ public void testCSVVerificationOfUnorderedComparison() throws Throwable {
+ testBuilder()
+ .sqlQuery("select columns[0] as employee_id, columns[1] as first_name, columns[2] as last_name from cp.`testframework/small_test_data_reordered.tsv`")
+ .unOrdered()
+ .csvBaselineFile("testframework/small_test_data.tsv")
+ .baselineColumns("employee_id", "first_name", "last_name")
+ .build().run();
+ }
+
+ // TODO - enable more advanced type handling for JSON, currently basic support works
+ // add support for type information taken from test query, or explicit type expectations
+ @Test
+ public void testBasicJSON() throws Exception {
+ testBuilder()
+ .sqlQuery("select * from cp.`scan_json_test_3.json`")
+ .ordered()
+ .jsonBaselineFile("/scan_json_test_3.json")
+ .build().run();
+
+ testBuilder()
+ .sqlQuery("select * from cp.`scan_json_test_3.json`")
+ .unOrdered() // Check other verification method with same files
+ .jsonBaselineFile("/scan_json_test_3.json")
+ .build().run();
+ }
+
+ @Test
+ public void testComplexJSON_all_text() throws Exception {
+ testBuilder()
+ .sqlQuery("select * from cp.`store/json/schema_change_int_to_string.json`")
+ .optionSettingQueriesForTestQuery("alter system set `store.json.all_text_mode` = true")
+ .ordered()
+ .jsonBaselineFile("store/json/schema_change_int_to_string.json")
+ .optionSettingQueriesForBaseline("alter system set `store.json.all_text_mode` = true")
+ .build().run();
+
+ testBuilder()
+ .sqlQuery("select * from cp.`store/json/schema_change_int_to_string.json`")
+ .optionSettingQueriesForTestQuery("alter system set `store.json.all_text_mode` = true")
+ .unOrdered() // Check other verification method with same files
+ .jsonBaselineFile("store/json/schema_change_int_to_string.json")
+ .optionSettingQueriesForBaseline("alter system set `store.json.all_text_mode` = true")
+ .build().run();
+ test("alter system set `store.json.all_text_mode` = false");
+ }
+
+ @Test
+ public void testRepeatedColumnMatching() throws Exception {
+ try {
+ testBuilder()
+ .sqlQuery("select * from cp.`store/json/schema_change_int_to_string.json`")
+ .optionSettingQueriesForTestQuery("alter system set `store.json.all_text_mode` = true")
+ .ordered()
+ .jsonBaselineFile("testframework/schema_change_int_to_string_non-matching.json")
+ .optionSettingQueriesForBaseline("alter system set `store.json.all_text_mode` = true")
+ .build().run();
+ } catch (Exception ex) {
+ assertEquals("at position 1 column '`field_1`' mismatched values, " +
+ "expected: [\"5\",\"2\",\"3\",\"4\",\"1\",\"2\"](JsonStringArrayList) but received [\"5\"](JsonStringArrayList)",
+ ex.getMessage());
+ // this indicates successful completion of the test
+ return;
+ }
+ throw new Exception("Test framework verification failed, expected failure on order check.");
+ }
+
+ @Test
+ public void testEmptyResultSet() throws Exception {
+ testBuilder()
+ .sqlQuery("select * from cp.`store/json/json_simple_with_null.json` where 1=0")
+ .expectsEmptyResultSet()
+ .build().run();
+ try {
+ testBuilder()
+ .sqlQuery("select * from cp.`store/json/json_simple_with_null.json`")
+ .expectsEmptyResultSet()
+ .build().run();
+ } catch (AssertionError ex) {
+ assertEquals("Different number of records returned expected:<4> but was:<0>", ex.getMessage());
+ // this indicates successful completion of the test
+ return;
+ }
+ throw new Exception("Test framework verification failed, expected failure on unexpected records.");
+ }
+
+ @Test
+ public void testCSVVerificationTypeMap() throws Throwable {
+ Map<SchemaPath, TypeProtos.MajorType> typeMap = new HashMap<>();
+ typeMap.put(TestBuilder.parsePath("first_name"), Types.optional(TypeProtos.MinorType.VARCHAR));
+ typeMap.put(TestBuilder.parsePath("employee_id"), Types.optional(TypeProtos.MinorType.INT));
+ typeMap.put(TestBuilder.parsePath("last_name"), Types.optional(TypeProtos.MinorType.VARCHAR));
+ testBuilder()
+ .sqlQuery("select cast(columns[0] as int) employee_id, columns[1] as first_name, columns[2] as last_name from cp.`testframework/small_test_data_reordered.tsv`")
+ .unOrdered()
+ .csvBaselineFile("testframework/small_test_data.tsv")
+ .baselineColumns("employee_id", "first_name", "last_name")
+ // This should work without this line because of the default type casts added based on the types that come out of the test query.
+ // To write a test that enforces strict typing you must pass type information using a CSV with a list of types,
+ // or any format with a Map of types like is constructed above and include the call to pass it into the test, which is commented out below
+ //.baselineTypes(typeMap)
+ .build().run();
+
+ typeMap.clear();
+ typeMap.put(TestBuilder.parsePath("first_name"), Types.optional(TypeProtos.MinorType.VARCHAR));
+ // This is the wrong type intentionally to ensure failures happen when expected
+ typeMap.put(TestBuilder.parsePath("employee_id"), Types.optional(TypeProtos.MinorType.VARCHAR));
+ typeMap.put(TestBuilder.parsePath("last_name"), Types.optional(TypeProtos.MinorType.VARCHAR));
+
+ try {
+ testBuilder()
+ .sqlQuery("select cast(columns[0] as int) employee_id, columns[1] as first_name, columns[2] as last_name from cp.`testframework/small_test_data_reordered.tsv`")
+ .unOrdered()
+ .csvBaselineFile("testframework/small_test_data.tsv")
+ .baselineColumns("employee_id", "first_name", "last_name")
+ .baselineTypes(typeMap)
+ .build().run();
+ } catch (Exception ex) {
+ // this indicates successful completion of the test
+ return;
+ }
+ throw new Exception("Test framework verification failed, expected failure on type check.");
+ }
+
+}
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/HyperVectorValueIterator.java b/exec/java-exec/src/test/java/org/apache/drill/exec/HyperVectorValueIterator.java
new file mode 100644
index 000000000..d214b7c68
--- /dev/null
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/HyperVectorValueIterator.java
@@ -0,0 +1,98 @@
+/*******************************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ ******************************************************************************/
+package org.apache.drill.exec;
+
+import org.apache.drill.exec.record.HyperVectorWrapper;
+import org.apache.drill.exec.record.MaterializedField;
+import org.apache.drill.exec.vector.ValueVector;
+
+import java.util.Iterator;
+
+public class HyperVectorValueIterator implements Iterator<Object> {
+ private MaterializedField mf;
+ private HyperVectorWrapper hyperVector;
+ private int indexInVectorList;
+ private int indexInCurrentVector;
+ private ValueVector currVec;
+ private long totalValues;
+ private long totalValuesRead;
+ // limit how many values will be read out of this iterator
+ private long recordLimit;
+
+ public HyperVectorValueIterator(MaterializedField mf, HyperVectorWrapper hyperVector) {
+ this.mf = mf;
+ this.hyperVector = hyperVector;
+ this.totalValues = 0;
+ this.indexInCurrentVector = 0;
+ this.indexInVectorList = 0;
+ this.recordLimit = -1;
+ }
+
+ public void setRecordLimit(long limit) {
+ this.recordLimit = limit;
+ }
+
+ public HyperVectorWrapper getHyperVector() {
+ return hyperVector;
+ }
+
+ public long getTotalRecords() {
+ if (recordLimit > 0) {
+ return recordLimit;
+ } else {
+ return totalValues;
+ }
+ }
+
+ public void determineTotalSize() {
+ for (ValueVector vv : hyperVector.getValueVectors()) {
+ this.totalValues += vv.getAccessor().getValueCount();
+ }
+ }
+
+ @Override
+ public boolean hasNext() {
+ if (totalValuesRead == recordLimit) {
+ return false;
+ }
+ if (indexInVectorList < hyperVector.getValueVectors().length) {
+ return true;
+ } else if ( indexInCurrentVector < currVec.getAccessor().getValueCount()) {
+ return true;
+ }
+ return false;
+ }
+
+ @Override
+ public Object next() {
+ if (currVec == null || indexInCurrentVector == currVec.getValueCapacity()) {
+ currVec = hyperVector.getValueVectors()[indexInVectorList];
+ indexInVectorList++;
+ indexInCurrentVector = 0;
+ }
+ Object obj = currVec.getAccessor().getObject(indexInCurrentVector);
+ indexInCurrentVector++;
+ totalValuesRead++;
+ return obj;
+ }
+
+ @Override
+ public void remove() {
+ throw new UnsupportedOperationException();
+ }
+}
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/writer/TestParquetWriter.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/writer/TestParquetWriter.java
index ac5673db8..6f32847aa 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/writer/TestParquetWriter.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/writer/TestParquetWriter.java
@@ -30,6 +30,7 @@ import java.util.Map;
import org.apache.drill.BaseTestQuery;
import org.apache.drill.exec.ExecConstants;
+import org.apache.drill.exec.HyperVectorValueIterator;
import org.apache.drill.exec.exception.SchemaChangeException;
import org.apache.drill.exec.proto.UserBitShared;
import org.apache.drill.exec.record.BatchSchema;
@@ -53,8 +54,6 @@ public class TestParquetWriter extends BaseTestQuery {
static FileSystem fs;
- private static final boolean VERBOSE_DEBUG = false;
-
@BeforeClass
public static void initFs() throws Exception {
Configuration conf = new Configuration();
@@ -90,17 +89,9 @@ public class TestParquetWriter extends BaseTestQuery {
"L_RETURNFLAG, L_LINESTATUS, L_SHIPDATE, cast(L_COMMITDATE as DATE) as COMMITDATE, cast(L_RECEIPTDATE as DATE) AS RECEIPTDATE, L_SHIPINSTRUCT, L_SHIPMODE, L_COMMENT";
String validationSelection = "L_ORDERKEY, L_PARTKEY, L_SUPPKEY, L_LINENUMBER, L_QUANTITY, L_EXTENDEDPRICE, L_DISCOUNT, L_TAX, " +
"L_RETURNFLAG, L_LINESTATUS, L_SHIPDATE,COMMITDATE ,RECEIPTDATE, L_SHIPINSTRUCT, L_SHIPMODE, L_COMMENT";
+
String inputTable = "cp.`tpch/lineitem.parquet`";
- String query = String.format("SELECT %s FROM %s", selection, inputTable);
- List<QueryResultBatch> expected = testSqlWithResults(query);
- BatchSchema schema = null;
- RecordBatchLoader loader = new RecordBatchLoader(getAllocator());
- List<Map> expectedRecords = new ArrayList<>();
- // read the data out of the results, the error manifested itself upon call of getObject on the vectors as they had contained deadbufs
- addToMaterializedResults(expectedRecords, expected, loader, schema);
- for (QueryResultBatch result : expected) {
- result.release();
- }
+ runTestAndValidate(selection, validationSelection, inputTable, "drill_929");
}
@Test
@@ -184,7 +175,6 @@ public class TestParquetWriter extends BaseTestQuery {
}
@Test
- @Ignore
public void testRepeatedBool() throws Exception {
String inputTable = "cp.`parquet/repeated_bool_data.json`";
runTestAndValidate("*", "*", inputTable, "repeated_bool_parquet");
@@ -208,11 +198,9 @@ public class TestParquetWriter extends BaseTestQuery {
@Test
public void testMulipleRowGroups() throws Exception {
try {
- //test(String.format("ALTER SESSION SET `%s` = %d", ExecConstants.PARQUET_BLOCK_SIZE, 1*1024*1024));
+ test(String.format("ALTER SESSION SET `%s` = %d", ExecConstants.PARQUET_BLOCK_SIZE, 1*1024*1024));
String selection = "mi";
String inputTable = "cp.`customer.json`";
- int count = testRunAndPrint(UserBitShared.QueryType.SQL, "select mi from cp.`customer.json`");
- System.out.println(count);
runTestAndValidate(selection, selection, inputTable, "foodmart_customer_parquet");
} finally {
test(String.format("ALTER SESSION SET `%s` = %d", ExecConstants.PARQUET_BLOCK_SIZE, 512*1024*1024));
@@ -228,77 +216,29 @@ public class TestParquetWriter extends BaseTestQuery {
runTestAndValidate(selection, validateSelection, inputTable, "foodmart_employee_parquet");
}
- public void compareParquetReaders(String selection, String table) throws Exception {
- test("alter system set `store.parquet.use_new_reader` = true");
- List<QueryResultBatch> expected = testSqlWithResults("select " + selection + " from " + table);
-
- RecordBatchLoader loader = new RecordBatchLoader(getAllocator());
- BatchSchema schema = null;
-
- List<Map> expectedRecords = new ArrayList<>();
- addToMaterializedResults(expectedRecords, expected, loader, schema);
-
- test("alter system set `store.parquet.use_new_reader` = false");
- List<QueryResultBatch> results = testSqlWithResults("select " + selection + " from " + table);
-
- List<Map> actualRecords = new ArrayList<>();
- addToMaterializedResults(actualRecords, results, loader, schema);
- compareResults(expectedRecords, actualRecords);
- for (QueryResultBatch result : results) {
- result.release();
- }
- for (QueryResultBatch result : expected) {
- result.release();
- }
- }
-
public void compareParquetReadersColumnar(String selection, String table) throws Exception {
- test("alter system set `store.parquet.use_new_reader` = true");
- List<QueryResultBatch> expected = testSqlWithResults("select " + selection + " from " + table);
-
- RecordBatchLoader loader = new RecordBatchLoader(getAllocator());
- BatchSchema schema = null;
-
- Map<String, List> expectedSuperVectors = addToCombinedVectorResults(expected, loader, schema);
-
- test("alter system set `store.parquet.use_new_reader` = false");
- List<QueryResultBatch> results = testSqlWithResults("select " + selection + " from " + table);
+ String query = "select " + selection + " from " + table;
+ testBuilder()
+ .ordered()
+ .sqlQuery(query)
+ .optionSettingQueriesForTestQuery("alter system set `store.parquet.use_new_reader` = false")
+ .sqlBaselineQuery(query)
+ .optionSettingQueriesForBaseline("alter system set `store.parquet.use_new_reader` = true")
+ .build().run();
- Map<String, List> actualSuperVectors = addToCombinedVectorResults(results, loader, schema);
- compareMergedVectors(expectedSuperVectors, actualSuperVectors);
- for (QueryResultBatch result : results) {
- result.release();
- }
- for (QueryResultBatch result : expected) {
- result.release();
- }
}
public void compareParquetReadersHyperVector(String selection, String table) throws Exception {
- RecordBatchLoader loader = new RecordBatchLoader(getAllocator());
- BatchSchema schema = null;
- // TODO - It didn't seem to respect the max width per node setting, so I went in and modified the SimpleParalellizer directly.
- // I backed out the changes after the test passed.
-// test("alter system set `planner.width.max_per_node` = 1");
- test("alter system set `store.parquet.use_new_reader` = false");
String query = "select " + selection + " from " + table;
- List<QueryResultBatch> results = testSqlWithResults(query);
-
- Map<String, HyperVectorValueIterator> actualSuperVectors = addToHyperVectorMap(results, loader, schema);
-
- test("alter system set `store.parquet.use_new_reader` = true");
- List<QueryResultBatch> expected = testSqlWithResults(query);
-
- Map<String, HyperVectorValueIterator> expectedSuperVectors = addToHyperVectorMap(expected, loader, schema);
-
- compareHyperVectors(expectedSuperVectors, actualSuperVectors);
- for (QueryResultBatch result : results) {
- result.release();
- }
- for (QueryResultBatch result : expected) {
- result.release();
- }
+ testBuilder()
+ .ordered()
+ .highPerformanceComparison()
+ .sqlQuery(query)
+ .optionSettingQueriesForTestQuery("alter system set `store.parquet.use_new_reader` = false")
+ .sqlBaselineQuery(query)
+ .optionSettingQueriesForBaseline("alter system set `store.parquet.use_new_reader` = true")
+ .build().run();
}
@Ignore
@@ -394,312 +334,10 @@ public class TestParquetWriter extends BaseTestQuery {
String validateQuery = String.format("SELECT %s FROM " + outputFile, validationSelection);
test(create);
- RecordBatchLoader loader = new RecordBatchLoader(getAllocator());
- BatchSchema schema = null;
-
- List<QueryResultBatch> expected = testSqlWithResults(query);
- List<Map> expectedRecords = new ArrayList<>();
- addToMaterializedResults(expectedRecords, expected, loader, schema);
-
- List<QueryResultBatch> results = testSqlWithResults(validateQuery);
- List<Map> actualRecords = new ArrayList<>();
- addToMaterializedResults(actualRecords, results, loader, schema);
-
- compareResults(expectedRecords, actualRecords);
- for (QueryResultBatch result : results) {
- result.release();
- }
- for (QueryResultBatch result : expected) {
- result.release();
- }
- }
-
- public void compareHyperVectors(Map<String, HyperVectorValueIterator> expectedRecords,
- Map<String, HyperVectorValueIterator> actualRecords) throws Exception {
- for (String s : expectedRecords.keySet()) {
- assertEquals(expectedRecords.get(s).getTotalRecords(), actualRecords.get(s).getTotalRecords());
- HyperVectorValueIterator expectedValues = expectedRecords.get(s);
- HyperVectorValueIterator actualValues = actualRecords.get(s);
- int i = 0;
- while (expectedValues.hasNext()) {
- compareValues(expectedValues.next(), actualValues.next(), i, s);
- i++;
- }
- }
- for (HyperVectorValueIterator hvi : expectedRecords.values()) {
- for (ValueVector vv : hvi.hyperVector.getValueVectors()) {
- vv.clear();
- }
- }
- for (HyperVectorValueIterator hvi : actualRecords.values()) {
- for (ValueVector vv : hvi.hyperVector.getValueVectors()) {
- vv.clear();
- }
- }
- }
-
- public void compareMergedVectors(Map<String, List> expectedRecords, Map<String, List> actualRecords) throws Exception {
- for (String s : expectedRecords.keySet()) {
- assertEquals(expectedRecords.get(s).size(), actualRecords.get(s).size());
- List expectedValues = expectedRecords.get(s);
- List actualValues = actualRecords.get(s);
- for (int i = 0; i < expectedValues.size(); i++) {
- compareValues(expectedValues.get(i), actualValues.get(i), i, s);
- }
- }
- }
-
- public Map<String, HyperVectorValueIterator> addToHyperVectorMap(List<QueryResultBatch> records, RecordBatchLoader loader,
- BatchSchema schema) throws SchemaChangeException, UnsupportedEncodingException {
- // TODO - this does not handle schema changes
- Map<String, HyperVectorValueIterator> combinedVectors = new HashMap();
-
- long totalRecords = 0;
- QueryResultBatch batch;
- int size = records.size();
- for (int i = 0; i < size; i++) {
- batch = records.get(i);
- loader = new RecordBatchLoader(getAllocator());
- loader.load(batch.getHeader().getDef(), batch.getData());
- logger.debug("reading batch with " + loader.getRecordCount() + " rows, total read so far " + totalRecords);
- totalRecords += loader.getRecordCount();
- for (VectorWrapper w : loader) {
- String field = w.getField().toExpr();
- if ( ! combinedVectors.containsKey(field)) {
- MaterializedField mf = w.getField();
- ValueVector[] vvList = (ValueVector[]) Array.newInstance(mf.getValueClass(), 1);
- vvList[0] = w.getValueVector();
- combinedVectors.put(mf.getPath().toExpr(), new HyperVectorValueIterator(mf, new HyperVectorWrapper(mf,
- vvList)));
- } else {
- combinedVectors.get(field).hyperVector.addVector(w.getValueVector());
- }
-
- }
- }
- for (HyperVectorValueIterator hvi : combinedVectors.values()) {
- hvi.determineTotalSize();
- }
- return combinedVectors;
- }
-
- public Map<String, List> addToCombinedVectorResults(List<QueryResultBatch> records, RecordBatchLoader loader,
- BatchSchema schema) throws SchemaChangeException, UnsupportedEncodingException {
- // TODO - this does not handle schema changes
- Map<String, List> combinedVectors = new HashMap();
-
- long totalRecords = 0;
- QueryResultBatch batch;
- int size = records.size();
- for (int i = 0; i < size; i++) {
- batch = records.get(0);
- loader.load(batch.getHeader().getDef(), batch.getData());
- if (schema == null) {
- schema = loader.getSchema();
- for (MaterializedField mf : schema) {
- combinedVectors.put(mf.getPath().toExpr(), new ArrayList());
- }
- }
- logger.debug("reading batch with " + loader.getRecordCount() + " rows, total read so far " + totalRecords);
- totalRecords += loader.getRecordCount();
- for (VectorWrapper w : loader) {
- String field = w.getField().toExpr();
- for (int j = 0; j < loader.getRecordCount(); j++) {
- if (totalRecords - loader.getRecordCount() + j > 5000000) {
- continue;
- }
- Object obj = w.getValueVector().getAccessor().getObject(j);
- if (obj != null) {
- if (obj instanceof Text) {
- obj = obj.toString();
- if (obj.equals("")) {
- System.out.println(w.getField());
- }
- }
- else if (obj instanceof byte[]) {
- obj = new String((byte[]) obj, "UTF-8");
- }
- }
- combinedVectors.get(field).add(obj);
- }
- }
- records.remove(0);
- batch.release();
- loader.clear();
- }
- return combinedVectors;
- }
-
- public static class HyperVectorValueIterator implements Iterator<Object>{
- private MaterializedField mf;
- HyperVectorWrapper hyperVector;
- private int indexInVectorList;
- private int indexInCurrentVector;
- private ValueVector currVec;
- private long totalValues;
- private long totalValuesRead;
- // limit how many values will be read out of this iterator
- private long recordLimit;
-
- public HyperVectorValueIterator(MaterializedField mf, HyperVectorWrapper hyperVector) {
- this.mf = mf;
- this.hyperVector = hyperVector;
- this.totalValues = 0;
- this.indexInCurrentVector = 0;
- this.indexInVectorList = 0;
- this.recordLimit = -1;
- }
-
- public void setRecordLimit(long limit) {
- this.recordLimit = limit;
- }
-
- public long getTotalRecords() {
- if (recordLimit > 0) {
- return recordLimit;
- } else {
- return totalValues;
- }
- }
-
- public void determineTotalSize() {
- for (ValueVector vv : hyperVector.getValueVectors()) {
- this.totalValues += vv.getAccessor().getValueCount();
- }
- }
-
- @Override
- public boolean hasNext() {
- if (totalValuesRead == recordLimit) {
- return false;
- }
- if (indexInVectorList < hyperVector.getValueVectors().length) {
- return true;
- } else if ( indexInCurrentVector < currVec.getAccessor().getValueCount()) {
- return true;
- }
- return false;
- }
-
- @Override
- public Object next() {
- if (currVec == null || indexInCurrentVector == currVec.getValueCapacity()) {
- currVec = hyperVector.getValueVectors()[indexInVectorList];
- indexInVectorList++;
- indexInCurrentVector = 0;
- }
- Object obj = currVec.getAccessor().getObject(indexInCurrentVector);
- indexInCurrentVector++;
- totalValuesRead++;
- return obj;
- }
-
- @Override
- public void remove() {
- throw new UnsupportedOperationException();
- }
- }
-
- public void addToMaterializedResults(List<Map> materializedRecords, List<QueryResultBatch> records, RecordBatchLoader loader,
- BatchSchema schema) throws SchemaChangeException, UnsupportedEncodingException {
- long totalRecords = 0;
- QueryResultBatch batch;
- int size = records.size();
- for (int i = 0; i < size; i++) {
- batch = records.get(0);
- loader.load(batch.getHeader().getDef(), batch.getData());
- if (schema == null) {
- schema = loader.getSchema();
- }
- logger.debug("reading batch with " + loader.getRecordCount() + " rows, total read so far " + totalRecords);
- totalRecords += loader.getRecordCount();
- for (int j = 0; j < loader.getRecordCount(); j++) {
- HashMap<String, Object> record = new HashMap<>();
- for (VectorWrapper w : loader) {
- Object obj = w.getValueVector().getAccessor().getObject(j);
- if (obj != null) {
- if (obj instanceof Text) {
- obj = obj.toString();
- if (obj.equals("")) {
- System.out.println(w.getField());
- }
- }
- else if (obj instanceof byte[]) {
- obj = new String((byte[]) obj, "UTF-8");
- }
- record.put(w.getField().toExpr(), obj);
- }
- record.put(w.getField().toExpr(), obj);
- }
- materializedRecords.add(record);
- }
- records.remove(0);
- batch.release();
- loader.clear();
- }
- }
-
- public void compareValues(Object expected, Object actual, int counter, String column) throws Exception {
-
- if (expected == null) {
- if (actual == null) {
- if (VERBOSE_DEBUG) {
- logger.debug("(1) at position " + counter + " column '" + column + "' matched value: " + expected );
- }
- return;
- } else {
- throw new Exception("at position " + counter + " column '" + column + "' mismatched values, expected: " + expected + " but received " + actual);
- }
- }
- if (actual == null) {
- throw new Exception("unexpected null at position " + counter + " column '" + column + "' should have been: " + expected);
- }
- if (actual instanceof byte[]) {
- if ( ! Arrays.equals((byte[]) expected, (byte[]) actual)) {
- throw new Exception("at position " + counter + " column '" + column + "' mismatched values, expected: "
- + new String((byte[])expected, "UTF-8") + " but received " + new String((byte[])actual, "UTF-8"));
- } else {
- if (VERBOSE_DEBUG) {
- logger.debug("at position " + counter + " column '" + column + "' matched value " + new String((byte[])expected, "UTF-8"));
- }
- return;
- }
- }
- if (!expected.equals(actual)) {
- throw new Exception("at position " + counter + " column '" + column + "' mismatched values, expected: " + expected + " but received " + actual);
- } else {
- if (VERBOSE_DEBUG) {
- logger.debug("at position " + counter + " column '" + column + "' matched value: " + expected );
- }
- }
- }
-
- public void compareResults(List<Map> expectedRecords, List<Map> actualRecords) throws Exception {
- Assert.assertEquals("Different number of records returned", expectedRecords.size(), actualRecords.size());
-
- StringBuilder missing = new StringBuilder();
- int i = 0;
- int counter = 0;
- int missmatch;
- for (Map<String, Object> record : expectedRecords) {
- missmatch = 0;
- for (String column : record.keySet()) {
- compareValues(record.get(column), actualRecords.get(i).get(column), counter, column );
- }
- if ( !actualRecords.get(i).equals(record)) {
- System.out.println("mismatch at position " + counter );
- missing.append(missmatch);
- missing.append(",");
- }
-
- counter++;
- if (counter % 100000 == 0 ) {
- System.out.println("checked so far:" + counter);
- }
- i++;
- }
- logger.debug(missing.toString());
- System.out.println(missing);
+ testBuilder()
+ .unOrdered()
+ .sqlQuery(query)
+ .sqlBaselineQuery(validateQuery);
}
}
diff --git a/exec/java-exec/src/test/resources/sort/testSelectWithLimitOffset.tsv b/exec/java-exec/src/test/resources/sort/testSelectWithLimitOffset.tsv
new file mode 100644
index 000000000..eb490fe1b
--- /dev/null
+++ b/exec/java-exec/src/test/resources/sort/testSelectWithLimitOffset.tsv
@@ -0,0 +1,5 @@
+12 Jewel Creek
+13 Peggy Medina
+14 Bryan Rutledge
+15 Walter Cavestany
+16 Peggy Planck \ No newline at end of file
diff --git a/exec/java-exec/src/test/resources/store/json/json_simple_with_null.json b/exec/java-exec/src/test/resources/store/json/json_simple_with_null.json
new file mode 100644
index 000000000..521727c02
--- /dev/null
+++ b/exec/java-exec/src/test/resources/store/json/json_simple_with_null.json
@@ -0,0 +1,16 @@
+{
+ "a" : 5,
+ "b" : 10
+}
+{
+ "a" : 7,
+ "b" : null
+}
+{
+ "a" : null,
+ "b" : null
+}
+{
+ "a" : 9,
+ "b" : 11
+}
diff --git a/exec/java-exec/src/test/resources/testframework/decimal_test.json b/exec/java-exec/src/test/resources/testframework/decimal_test.json
new file mode 100644
index 000000000..36996f3c0
--- /dev/null
+++ b/exec/java-exec/src/test/resources/testframework/decimal_test.json
@@ -0,0 +1,3 @@
+{
+"dec_col" : "3.7"
+} \ No newline at end of file
diff --git a/exec/java-exec/src/test/resources/testframework/decimal_test.tsv b/exec/java-exec/src/test/resources/testframework/decimal_test.tsv
new file mode 100644
index 000000000..548d71365
--- /dev/null
+++ b/exec/java-exec/src/test/resources/testframework/decimal_test.tsv
@@ -0,0 +1 @@
+3.7 \ No newline at end of file
diff --git a/exec/java-exec/src/test/resources/testframework/schema_change_int_to_string_non-matching.json b/exec/java-exec/src/test/resources/testframework/schema_change_int_to_string_non-matching.json
new file mode 100644
index 000000000..3de294c3a
--- /dev/null
+++ b/exec/java-exec/src/test/resources/testframework/schema_change_int_to_string_non-matching.json
@@ -0,0 +1,30 @@
+{
+ "field_1": [1]
+}
+{
+ "field_1": [5,2,3,4,1,2],
+ "field_2": 2,
+ "field_3": {
+ "inner_1" : 2
+ },
+ "field_4" : {
+ "inner_1" : [1,2,3,2,3,4,2,3],
+ "inner_2" : 3,
+ "inner_3" : { "inner_object_field_1" : 2}
+ },
+ "field_5" : [ { "inner_list" : [1, null, 6] }, { "inner_list":[3,8]}, { "inner_list":[12, null, 4, "null", 5]} ]
+}
+{
+ "field_1": [5],
+ "field_2": "A wild string appears!",
+ "field_3": {
+ "inner_1" : 5,
+ "inner_2" : 3,
+ "inner_3" : [ { "inner_object_field_1" : null}, {"inner_object_field_1" : 10} ]
+ },
+ "field_4" : {
+ "inner_1" : [4,5,6],
+ "inner_2" : 3
+ },
+ "field_5" : [ { "inner_list" : [5, null, 6.0, "1234"] }, { "inner_list":[7,8.0, "12341324"], "inner_list_2" : [1,2,2323.443e10, "hello there"]}, { "inner_list":[3,4,5], "inner_list_2" : [10, 11, 12]} ]
+} \ No newline at end of file
diff --git a/exec/java-exec/src/test/resources/testframework/small_test_data.json b/exec/java-exec/src/test/resources/testframework/small_test_data.json
new file mode 100644
index 000000000..7f08d256b
--- /dev/null
+++ b/exec/java-exec/src/test/resources/testframework/small_test_data.json
@@ -0,0 +1,5 @@
+{"employee_id" : 12, "first_name" : "Jewel", "last_name" :"Creek" }
+{"employee_id" : 13, "first_name" : "Peggy" , "last_name" :"Medina" }
+{"employee_id" : 14, "first_name" : "Bryan" , "last_name" :"Rutledge" }
+{"employee_id" : 15, "first_name" : "Walter" , "last_name" :"Cavestany" }
+{"employee_id" : 16, "first_name" : "Peggy" , "last_name" :"Planck" }
diff --git a/exec/java-exec/src/test/resources/testframework/small_test_data.tsv b/exec/java-exec/src/test/resources/testframework/small_test_data.tsv
new file mode 100644
index 000000000..1e0b4de33
--- /dev/null
+++ b/exec/java-exec/src/test/resources/testframework/small_test_data.tsv
@@ -0,0 +1,5 @@
+12 Jewel Creek
+13 Peggy Medina
+14 Bryan Rutledge
+15 Walter Cavestany
+16 Peggy Planck
diff --git a/exec/java-exec/src/test/resources/testframework/small_test_data_extra.tsv b/exec/java-exec/src/test/resources/testframework/small_test_data_extra.tsv
new file mode 100644
index 000000000..40e0f26bc
--- /dev/null
+++ b/exec/java-exec/src/test/resources/testframework/small_test_data_extra.tsv
@@ -0,0 +1,7 @@
+12 Jewel Creek
+13 Peggy Medina
+14 Bryan Rutledge
+15 Walter Cavestany
+16 Peggy Planck
+15 asdf asdklj
+15 qwerty werjhtdl
diff --git a/exec/java-exec/src/test/resources/testframework/small_test_data_extra_col.tsv b/exec/java-exec/src/test/resources/testframework/small_test_data_extra_col.tsv
new file mode 100644
index 000000000..8465a14c6
--- /dev/null
+++ b/exec/java-exec/src/test/resources/testframework/small_test_data_extra_col.tsv
@@ -0,0 +1,5 @@
+12 Jewel Creek 123 fake st.
+13 Peggy Medina 34 Electric Ave.
+14 Bryan Rutledge 6 Sesame St.
+15 Walter Cavestany 8 Wanye Ct., Gotham
+16 Peggy Planck 5 Ran out of creative street names Blvd.
diff --git a/exec/java-exec/src/test/resources/testframework/small_test_data_reordered.tsv b/exec/java-exec/src/test/resources/testframework/small_test_data_reordered.tsv
new file mode 100644
index 000000000..b06bbddb6
--- /dev/null
+++ b/exec/java-exec/src/test/resources/testframework/small_test_data_reordered.tsv
@@ -0,0 +1,5 @@
+16 Peggy Planck
+13 Peggy Medina
+14 Bryan Rutledge
+12 Jewel Creek
+15 Walter Cavestany
diff --git a/exec/java-exec/src/test/resources/testframework/small_test_data_reordered_non_match_extra.tsv b/exec/java-exec/src/test/resources/testframework/small_test_data_reordered_non_match_extra.tsv
new file mode 100644
index 000000000..fe4a52408
--- /dev/null
+++ b/exec/java-exec/src/test/resources/testframework/small_test_data_reordered_non_match_extra.tsv
@@ -0,0 +1,7 @@
+16 Peggy Planck
+13 Peggy Medina
+14 Bryan Rutledge
+12 Jewel Creek
+15 Walter Cavestany
+15 asdf asdklj
+15 qwerty werjhtdl
diff --git a/exec/java-exec/src/test/resources/testframework/small_test_data_reordered_non_match_fewer.tsv b/exec/java-exec/src/test/resources/testframework/small_test_data_reordered_non_match_fewer.tsv
new file mode 100644
index 000000000..d7868b0fe
--- /dev/null
+++ b/exec/java-exec/src/test/resources/testframework/small_test_data_reordered_non_match_fewer.tsv
@@ -0,0 +1,3 @@
+16 Peggy Planck
+13 Peggy Medina
+15 Walter Cavestany
diff --git a/pom.xml b/pom.xml
index f74ad2b6c..4308c2a8b 100644
--- a/pom.xml
+++ b/pom.xml
@@ -173,6 +173,7 @@
<exclude>**/*.sql</exclude>
<exclude>**/git.properties</exclude>
<exclude>**/*.csv</exclude>
+ <exclude>**/*.tsv</exclude>
<exclude>**/*.txt</exclude>
<exclude>**/drill-*.conf</exclude>
<exclude>**/.buildpath</exclude>