diff options
22 files changed, 1764 insertions, 394 deletions
diff --git a/common/src/main/java/org/apache/drill/common/types/Types.java b/common/src/main/java/org/apache/drill/common/types/Types.java index 8ae3edda2..04fec54ac 100644 --- a/common/src/main/java/org/apache/drill/common/types/Types.java +++ b/common/src/main/java/org/apache/drill/common/types/Types.java @@ -18,7 +18,9 @@ package org.apache.drill.common.types; import static org.apache.drill.common.types.TypeProtos.DataMode.REPEATED; +import static org.apache.drill.common.types.TypeProtos.MinorType.*; +import org.apache.drill.common.exceptions.DrillRuntimeException; import org.apache.drill.common.types.TypeProtos.DataMode; import org.apache.drill.common.types.TypeProtos.MajorType; import org.apache.drill.common.types.TypeProtos.MinorType; @@ -370,6 +372,57 @@ public class Types { } } + public static String getNameOfMinorType(MinorType type) { + switch (type) { + case BIT: + return "bool"; + case TINYINT: + return "tinyint"; + case UINT1: + return "uint1"; + case SMALLINT: + return "smallint"; + case UINT2: + return "uint2"; + case INT: + return "int"; + case UINT4: + return "uint4"; + case BIGINT: + return "bigint"; + case UINT8: + return "uint8"; + case FLOAT4: + return "float"; + case FLOAT8: + return "double"; + case DECIMAL9: + return "decimal"; + case DECIMAL18: + return "decimal"; + case DECIMAL28SPARSE: + return "decimal"; + case DECIMAL38SPARSE: + return "decimal"; + case VARCHAR: + return "varchar"; + case VAR16CHAR: + return "utf16"; + case DATE: + return "date"; + case TIME: + return "time"; + case TIMESTAMP: + return "timestamp"; + case VARBINARY: + return "binary"; + case LATE: + throw new DrillRuntimeException("The late type should never appear in execution or an SQL query, so it does not have a name to refer to it."); + default: + throw new DrillRuntimeException("Unrecognized type " + type); + } + } + public static String toString(MajorType type) { return type != null ? "MajorType[" + TextFormat.shortDebugString(type) + "]" : "null"; } diff --git a/exec/java-exec/src/main/resources/bootstrap-storage-plugins.json b/exec/java-exec/src/main/resources/bootstrap-storage-plugins.json index 4a20beacf..6bf1872ae 100644 --- a/exec/java-exec/src/main/resources/bootstrap-storage-plugins.json +++ b/exec/java-exec/src/main/resources/bootstrap-storage-plugins.json @@ -47,6 +47,11 @@ extensions: [ "csv" ], delimiter: "," }, + "tsv" : { + type: "text", + extensions: [ "tsv" ], + delimiter: "\t" + }, "json" : { type: "json" }, diff --git a/exec/java-exec/src/test/java/org/apache/drill/BaseTestQuery.java b/exec/java-exec/src/test/java/org/apache/drill/BaseTestQuery.java index 18fe84e03..0de39382b 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/BaseTestQuery.java +++ b/exec/java-exec/src/test/java/org/apache/drill/BaseTestQuery.java @@ -18,13 +18,24 @@ package org.apache.drill; import java.io.IOException; +import java.io.UnsupportedEncodingException; +import java.lang.reflect.Array; import java.net.URL; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.Iterator; import java.util.List; +import java.util.Map; import java.util.Properties; import java.util.concurrent.CountDownLatch; import java.util.concurrent.atomic.AtomicInteger; +import com.google.common.base.Joiner; +import com.google.common.base.Strings; import org.apache.drill.common.config.DrillConfig; +import org.apache.drill.common.types.TypeProtos; +import org.apache.drill.common.types.Types; import org.apache.drill.common.util.TestTools; import org.apache.drill.exec.ExecConstants; import org.apache.drill.exec.ExecTest; @@ -37,7 +48,11 @@ import org.apache.drill.exec.memory.BufferAllocator; import org.apache.drill.exec.memory.TopLevelAllocator; import org.apache.drill.exec.proto.UserBitShared.QueryId; import org.apache.drill.exec.proto.UserBitShared.QueryType; +import org.apache.drill.exec.record.BatchSchema; +import org.apache.drill.exec.record.HyperVectorWrapper; +import org.apache.drill.exec.record.MaterializedField; import org.apache.drill.exec.record.RecordBatchLoader; +import org.apache.drill.exec.record.VectorWrapper; import org.apache.drill.exec.rpc.RpcException; import org.apache.drill.exec.rpc.user.ConnectionThrottle; import org.apache.drill.exec.rpc.user.QueryResultBatch; @@ -45,7 +60,10 @@ import org.apache.drill.exec.rpc.user.UserResultsListener; import org.apache.drill.exec.server.Drillbit; import org.apache.drill.exec.server.RemoteServiceSet; import org.apache.drill.exec.util.VectorUtil; +import org.apache.drill.exec.vector.ValueVector; +import org.apache.hadoop.io.Text; import org.junit.AfterClass; +import org.junit.Assert; import org.junit.BeforeClass; import org.junit.rules.TestRule; import org.junit.rules.TestWatcher; @@ -54,6 +72,9 @@ import org.junit.runner.Description; import com.google.common.base.Charsets; import com.google.common.io.Resources; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; + public class BaseTestQuery extends ExecTest{ static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(BaseTestQuery.class); @@ -110,12 +131,14 @@ public class BaseTestQuery extends ExecTest{ } } - - protected BufferAllocator getAllocator() { return allocator; } + public TestBuilder testBuilder() { + return new TestBuilder(allocator); + } + @AfterClass public static void closeClient() throws IOException{ if (client != null) { @@ -150,12 +173,12 @@ public class BaseTestQuery extends ExecTest{ return testRunAndReturn(QueryType.PHYSICAL, physical); } - protected List<QueryResultBatch> testRunAndReturn(QueryType type, String query) throws Exception{ + public static List<QueryResultBatch> testRunAndReturn(QueryType type, String query) throws Exception{ query = query.replace("[WORKING_PATH]", TestTools.getWorkingPath()); return client.runQuery(type, query); } - protected int testRunAndPrint(QueryType type, String query) throws Exception{ + public static int testRunAndPrint(QueryType type, String query) throws Exception{ query = query.replace("[WORKING_PATH]", TestTools.getWorkingPath()); PrintingResultsListener resultListener = new PrintingResultsListener(client.getConfig(), Format.TSV, VectorUtil.DEFAULT_COLUMN_WIDTH); client.runQuery(type, query, resultListener); @@ -182,7 +205,7 @@ public class BaseTestQuery extends ExecTest{ } } - protected void test(String query) throws Exception{ + public static void test(String query) throws Exception{ String[] queries = query.split(";"); for (String q : queries) { if (q.trim().isEmpty()) { @@ -220,7 +243,7 @@ public class BaseTestQuery extends ExecTest{ test(getFile(file)); } - protected String getFile(String resource) throws IOException{ + public static String getFile(String resource) throws IOException{ URL url = Resources.getResource(resource); if (url == null) { throw new IOException(String.format("Unable to find path %s.", resource)); @@ -309,5 +332,4 @@ public class BaseTestQuery extends ExecTest{ return formattedResults.toString(); } - } diff --git a/exec/java-exec/src/test/java/org/apache/drill/DrillTestWrapper.java b/exec/java-exec/src/test/java/org/apache/drill/DrillTestWrapper.java new file mode 100644 index 000000000..f06203e5c --- /dev/null +++ b/exec/java-exec/src/test/java/org/apache/drill/DrillTestWrapper.java @@ -0,0 +1,557 @@ +/******************************************************************************* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ +package org.apache.drill; + +import org.apache.drill.common.expression.SchemaPath; +import org.apache.drill.common.types.TypeProtos; +import org.apache.drill.exec.HyperVectorValueIterator; +import org.apache.drill.exec.exception.SchemaChangeException; +import org.apache.drill.exec.memory.BufferAllocator; +import org.apache.drill.exec.proto.UserBitShared; +import org.apache.drill.exec.proto.UserBitShared.QueryType; +import org.apache.drill.exec.record.BatchSchema; +import org.apache.drill.exec.record.HyperVectorWrapper; +import org.apache.drill.exec.record.MaterializedField; +import org.apache.drill.exec.record.RecordBatchLoader; +import org.apache.drill.exec.record.VectorWrapper; +import org.apache.drill.exec.rpc.user.QueryResultBatch; +import org.apache.drill.exec.vector.ValueVector; +import org.apache.hadoop.io.Text; + +import java.io.UnsupportedEncodingException; +import java.lang.reflect.Array; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; + +/** + * An object to encapsulate the options for a Drill unit test, as well as the execution methods to perform the tests and + * validation of results. + * + * To construct an instance easily, look at the TestBuilder class. From an implementation of + * the BaseTestQuery class, and instance of the builder is accessible through the testBuilder() method. + */ +public class DrillTestWrapper { + static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(BaseTestQuery.class); + + // TODO - when in JSON, read baseline in all text mode to avoid precision loss for decimal values + + // This flag will enable all of the values that are validated to be logged. For large validations this is time consuming + // so this is not exposed in a way that it can be enabled for an individual test. It can be changed here while debugging + // a test to see all of the output, but as this framework is doing full validation, there is no reason to keep it on as + // it will only make the test slower. + private static boolean VERBOSE_DEBUG = false; + + // The motivation behind the TestBuilder was to provide a clean API for test writers. The model is mostly designed to + // prepare all of the components necessary for running the tests, before the TestWrapper is initialized. There is however + // one case where the setup for the baseline is driven by the test query results, and this is implicit type enforcement + // for the baseline data. In this case there needs to be a call back into the TestBuilder once we know the type information + // from the test query. + private TestBuilder testBuilder; + // test query to run + private String query; + // The type of query provided + private UserBitShared.QueryType queryType; + // The type of query provided for the baseline + private UserBitShared.QueryType baselineQueryType; + // should ordering be enforced in the baseline check + private boolean ordered; + // TODO - implement this + private boolean approximateEquality; + private BufferAllocator allocator; + // queries to run before the baseline or test queries, can be used to set options + private String baselineOptionSettingQueries; + private String testOptionSettingQueries; + // two different methods are available for comparing ordered results, the default reads all of the records + // into giant lists of objects, like one giant on-heap batch of 'vectors' + // this flag enables the other approach which iterates through a hyper batch for the test query results and baseline + // while this does work faster and use less memory, it can be harder to debug as all of the elements are not in a + // single list + private boolean highPerformanceComparison; + // if the baseline is a single option test writers can provide the baseline values and columns + // without creating a file, these are provided to the builder in the baselineValues() and baselineColumns() methods + // and translated into a map in the builder + private List<Map> baselineRecords; + + public DrillTestWrapper(TestBuilder testBuilder, BufferAllocator allocator, String query, QueryType queryType, + String baselineOptionSettingQueries, String testOptionSettingQueries, + QueryType baselineQueryType, boolean ordered, boolean approximateEquality, + boolean highPerformanceComparison, List<Map> baselineRecords) { + this.testBuilder = testBuilder; + this.allocator = allocator; + this.query = query; + this.queryType = queryType; + this.baselineQueryType = baselineQueryType; + this.ordered = ordered; + this.approximateEquality = approximateEquality; + this.baselineOptionSettingQueries = baselineOptionSettingQueries; + this.testOptionSettingQueries = testOptionSettingQueries; + this.highPerformanceComparison = highPerformanceComparison; + this.baselineRecords = baselineRecords; + } + + public void run() throws Exception { + if (ordered) { + compareOrderedResults(); + } else { + compareUnorderedResults(); + } + } + + private BufferAllocator getAllocator() { + return allocator; + } + + private void compareHyperVectors(Map<String, HyperVectorValueIterator> expectedRecords, + Map<String, HyperVectorValueIterator> actualRecords) throws Exception { + for (String s : expectedRecords.keySet()) { + assertNotNull("Expected column '" + s + "' not found.", actualRecords.get(s)); + assertEquals(expectedRecords.get(s).getTotalRecords(), actualRecords.get(s).getTotalRecords()); + HyperVectorValueIterator expectedValues = expectedRecords.get(s); + HyperVectorValueIterator actualValues = actualRecords.get(s); + int i = 0; + while (expectedValues.hasNext()) { + compareValuesErrorOnMismatch(expectedValues.next(), actualValues.next(), i, s); + i++; + } + } + for (HyperVectorValueIterator hvi : expectedRecords.values()) { + for (ValueVector vv : hvi.getHyperVector().getValueVectors()) { + vv.clear(); + } + } + for (HyperVectorValueIterator hvi : actualRecords.values()) { + for (ValueVector vv : hvi.getHyperVector().getValueVectors()) { + vv.clear(); + } + } + } + + private void compareMergedVectors(Map<String, List> expectedRecords, Map<String, List> actualRecords) throws Exception { + + for (String s : actualRecords.keySet()) { + assertNotNull("Unexpected extra column " + s + " returned by query.", expectedRecords.get(s)); + assertEquals("Incorrect number of rows returned by query.", expectedRecords.get(s).size(), actualRecords.get(s).size()); + List expectedValues = expectedRecords.get(s); + List actualValues = actualRecords.get(s); + assertEquals("Different number of records returned", expectedValues.size(), actualValues.size()); + + for (int i = 0; i < expectedValues.size(); i++) { + compareValuesErrorOnMismatch(expectedValues.get(i), actualValues.get(i), i, s); + } + } + if (actualRecords.size() < expectedRecords.size()) { + throw new Exception(findMissingColumns(expectedRecords.keySet(), actualRecords.keySet())); + } + } + + private Map<String, HyperVectorValueIterator> addToHyperVectorMap(List<QueryResultBatch> records, RecordBatchLoader loader, + BatchSchema schema) throws SchemaChangeException, UnsupportedEncodingException { + // TODO - this does not handle schema changes + Map<String, HyperVectorValueIterator> combinedVectors = new HashMap(); + + long totalRecords = 0; + QueryResultBatch batch; + int size = records.size(); + for (int i = 0; i < size; i++) { + batch = records.get(i); + loader = new RecordBatchLoader(getAllocator()); + loader.load(batch.getHeader().getDef(), batch.getData()); + logger.debug("reading batch with " + loader.getRecordCount() + " rows, total read so far " + totalRecords); + totalRecords += loader.getRecordCount(); + for (VectorWrapper w : loader) { + String field = w.getField().toExpr(); + if (!combinedVectors.containsKey(field)) { + MaterializedField mf = w.getField(); + ValueVector[] vvList = (ValueVector[]) Array.newInstance(mf.getValueClass(), 1); + vvList[0] = w.getValueVector(); + combinedVectors.put(mf.getPath().toExpr(), new HyperVectorValueIterator(mf, new HyperVectorWrapper(mf, + vvList))); + } else { + combinedVectors.get(field).getHyperVector().addVector(w.getValueVector()); + } + + } + } + for (HyperVectorValueIterator hvi : combinedVectors.values()) { + hvi.determineTotalSize(); + } + return combinedVectors; + } + + /** + * Only use this method if absolutely needed. There are utility methods to compare results of single queries. + * The current use case for exposing this is setting session or system options between the test and verification + * queries. + * + * TODO - evaluate adding an interface to allow setting session and system options before running queries + * @param records + * @param loader + * @param schema + * @return + * @throws SchemaChangeException + * @throws UnsupportedEncodingException + */ + private Map<String, List> addToCombinedVectorResults(List<QueryResultBatch> records, RecordBatchLoader loader, + BatchSchema schema) throws SchemaChangeException, UnsupportedEncodingException { + // TODO - this does not handle schema changes + Map<String, List> combinedVectors = new HashMap(); + + long totalRecords = 0; + QueryResultBatch batch; + int size = records.size(); + for (int i = 0; i < size; i++) { + batch = records.get(0); + loader.load(batch.getHeader().getDef(), batch.getData()); + if (schema == null) { + schema = loader.getSchema(); + for (MaterializedField mf : schema) { + combinedVectors.put(mf.getPath().toExpr(), new ArrayList()); + } + } + logger.debug("reading batch with " + loader.getRecordCount() + " rows, total read so far " + totalRecords); + totalRecords += loader.getRecordCount(); + for (VectorWrapper w : loader) { + String field = w.getField().toExpr(); + for (int j = 0; j < loader.getRecordCount(); j++) { + Object obj = w.getValueVector().getAccessor().getObject(j); + if (obj != null) { + if (obj instanceof Text) { + obj = obj.toString(); + if (obj.equals("")) { + System.out.println(w.getField()); + } + } + else if (obj instanceof byte[]) { + obj = new String((byte[]) obj, "UTF-8"); + } + } + combinedVectors.get(field).add(obj); + } + } + records.remove(0); + batch.release(); + loader.clear(); + } + return combinedVectors; + } + + /** + * Use this method only if necessary to validate one query against another. If you are just validating against a + * baseline file use one of the simpler interfaces that will write the validation query for you. + * + * @throws Exception + */ + protected void compareUnorderedResults() throws Exception { + RecordBatchLoader loader = new RecordBatchLoader(getAllocator()); + BatchSchema schema = null; + + BaseTestQuery.test(testOptionSettingQueries); + List<QueryResultBatch> expected = BaseTestQuery.testRunAndReturn(queryType, query); + + addTypeInfoIfMissing(expected.get(0), testBuilder); + + List<Map> expectedRecords = new ArrayList<>(); + addToMaterializedResults(expectedRecords, expected, loader, schema); + + List<QueryResultBatch> results = new ArrayList(); + List<Map> actualRecords = new ArrayList<>(); + // If baseline data was not provided to the test builder directly, we must run a query for the baseline, this includes + // the cases where the baseline is stored in a file. + if (baselineRecords == null) { + BaseTestQuery.test(baselineOptionSettingQueries); + results = BaseTestQuery.testRunAndReturn(baselineQueryType, testBuilder.getValidationQuery()); + addToMaterializedResults(actualRecords, results, loader, schema); + } else { + actualRecords = baselineRecords; + } + + compareResults(expectedRecords, actualRecords); + cleanupBatches(expected, results); + } + + /** + * Use this method only if necessary to validate one query against another. If you are just validating against a + * baseline file use one of the simpler interfaces that will write the validation query for you. + * + * @throws Exception + */ + protected void compareOrderedResults() throws Exception { + if (highPerformanceComparison) { + if (baselineQueryType != null) { + throw new Exception("Cannot do a high performance comparison without using a baseline file"); + } + compareResultsHyperVector(); + } else { + compareMergedOnHeapVectors(); + } + } + + public void compareMergedOnHeapVectors() throws Exception { + RecordBatchLoader loader = new RecordBatchLoader(getAllocator()); + BatchSchema schema = null; + + BaseTestQuery.test(testOptionSettingQueries); + List<QueryResultBatch> results = BaseTestQuery.testRunAndReturn(queryType, query); + // To avoid extra work for test writers, types can optionally be inferred from the test query + addTypeInfoIfMissing(results.get(0), testBuilder); + + Map<String, List> actualSuperVectors = addToCombinedVectorResults(results, loader, schema); + + List<QueryResultBatch> expected = null; + Map<String, List> expectedSuperVectors = null; + + // If baseline data was not provided to the test builder directly, we must run a query for the baseline, this includes + // the cases where the baseline is stored in a file. + if (baselineRecords == null) { + BaseTestQuery.test(baselineOptionSettingQueries); + expected = BaseTestQuery.testRunAndReturn(baselineQueryType, testBuilder.getValidationQuery()); + expectedSuperVectors = addToCombinedVectorResults(expected, loader, schema); + } else { + // data is built in the TestBuilder in a row major format as it is provided by the user + // translate it here to vectorized, the representation expected by the ordered comparison + expectedSuperVectors = new HashMap(); + expected = new ArrayList(); + for (String s : ((Map<String, Object>)baselineRecords.get(0)).keySet()) { + expectedSuperVectors.put(s, new ArrayList()); + } + for (Map<String, Object> m : baselineRecords) { + for (String s : m.keySet()) { + expectedSuperVectors.get(s).add(m.get(s)); + } + } + } + + compareMergedVectors(expectedSuperVectors, actualSuperVectors); + + cleanupBatches(expected, results); + } + + public void compareResultsHyperVector() throws Exception { + RecordBatchLoader loader = new RecordBatchLoader(getAllocator()); + BatchSchema schema = null; + + BaseTestQuery.test(testOptionSettingQueries); + List<QueryResultBatch> results = BaseTestQuery.testRunAndReturn(queryType, query); + // To avoid extra work for test writers, types can optionally be inferred from the test query + addTypeInfoIfMissing(results.get(0), testBuilder); + + Map<String, HyperVectorValueIterator> actualSuperVectors = addToHyperVectorMap(results, loader, schema); + + BaseTestQuery.test(baselineOptionSettingQueries); + List<QueryResultBatch> expected = BaseTestQuery.testRunAndReturn(baselineQueryType, testBuilder.getValidationQuery()); + + Map<String, HyperVectorValueIterator> expectedSuperVectors = addToHyperVectorMap(expected, loader, schema); + + compareHyperVectors(expectedSuperVectors, actualSuperVectors); + cleanupBatches(results, expected); + } + + private void addTypeInfoIfMissing(QueryResultBatch batch, TestBuilder testBuilder) { + if (! testBuilder.typeInfoSet()) { + Map<SchemaPath, TypeProtos.MajorType> typeMap = getTypeMapFromBatch(batch); + testBuilder.baselineTypes(typeMap); + } + + } + + private Map<SchemaPath, TypeProtos.MajorType> getTypeMapFromBatch(QueryResultBatch batch) { + Map<SchemaPath, TypeProtos.MajorType> typeMap = new HashMap(); + for (int i = 0; i < batch.getHeader().getDef().getFieldCount(); i++) { + typeMap.put(MaterializedField.create(batch.getHeader().getDef().getField(i)).getPath(), + batch.getHeader().getDef().getField(i).getMajorType()); + } + return typeMap; + } + + private void cleanupBatches(List<QueryResultBatch>... results) { + for (List<QueryResultBatch> resultList : results ) { + for (QueryResultBatch result : resultList) { + result.release(); + } + } + } + + protected void addToMaterializedResults(List<Map> materializedRecords, List<QueryResultBatch> records, RecordBatchLoader loader, + BatchSchema schema) throws SchemaChangeException, UnsupportedEncodingException { + long totalRecords = 0; + QueryResultBatch batch; + int size = records.size(); + for (int i = 0; i < size; i++) { + batch = records.get(0); + loader.load(batch.getHeader().getDef(), batch.getData()); + if (schema == null) { + schema = loader.getSchema(); + } + logger.debug("reading batch with " + loader.getRecordCount() + " rows, total read so far " + totalRecords); + totalRecords += loader.getRecordCount(); + for (int j = 0; j < loader.getRecordCount(); j++) { + HashMap<String, Object> record = new HashMap<>(); + for (VectorWrapper w : loader) { + Object obj = w.getValueVector().getAccessor().getObject(j); + if (obj != null) { + if (obj instanceof Text) { + obj = obj.toString(); + if (obj.equals("")) { + System.out.println(w.getField()); + } + } + else if (obj instanceof byte[]) { + obj = new String((byte[]) obj, "UTF-8"); + } + record.put(w.getField().toExpr(), obj); + } + record.put(w.getField().toExpr(), obj); + } + materializedRecords.add(record); + } + records.remove(0); + batch.release(); + loader.clear(); + } + } + + public boolean compareValuesErrorOnMismatch(Object expected, Object actual, int counter, String column) throws Exception { + + if (compareValues(expected, actual, counter, column)) { + return true; + } + if (expected == null) { + throw new Exception("at position " + counter + " column '" + column + "' mismatched values, expected: null " + + "but received " + actual + "(" + actual.getClass().getSimpleName() + ")"); + } + if (actual == null) { + throw new Exception("unexpected null at position " + counter + " column '" + column + "' should have been: " + expected); + } + if (actual instanceof byte[]) { + throw new Exception("at position " + counter + " column '" + column + "' mismatched values, expected: " + + new String((byte[])expected, "UTF-8") + " but received " + new String((byte[])actual, "UTF-8")); + } + if (!expected.equals(actual)) { + throw new Exception("at position " + counter + " column '" + column + "' mismatched values, expected: " + + expected + "(" + expected.getClass().getSimpleName() + ") but received " + actual + "(" + actual.getClass().getSimpleName() + ")"); + } + return true; + } + + public boolean compareValues(Object expected, Object actual, int counter, String column) throws Exception { + if (expected == null) { + if (actual == null) { + if (VERBOSE_DEBUG) { + logger.debug("(1) at position " + counter + " column '" + column + "' matched value: " + expected ); + } + return true; + } else { + return false; + } + } + if (actual == null) { + return false; + } + if (actual instanceof byte[]) { + if ( ! Arrays.equals((byte[]) expected, (byte[]) actual)) { + return false; + } else { + if (VERBOSE_DEBUG) { + logger.debug("at position " + counter + " column '" + column + "' matched value " + new String((byte[])expected, "UTF-8")); + } + return true; + } + } + if (!expected.equals(actual)) { + return false; + } else { + if (VERBOSE_DEBUG) { + logger.debug("at position " + counter + " column '" + column + "' matched value: " + expected ); + } + } + return true; + } + + /** + * Compare two result sets, ignoring ordering. + * + * @param expectedRecords - list of records from baseline + * @param actualRecords - list of records from test query, WARNING - this list is destroyed in this method + * @throws Exception + */ + private void compareResults(List<Map> expectedRecords, List<Map> actualRecords) throws Exception { + + assertEquals("Different number of records returned", expectedRecords.size(), actualRecords.size()); + + String missing = ""; + int i = 0; + int counter = 0; + boolean found; + for (Map<String, Object> expectedRecord : expectedRecords) { + i = 0; + found = false; + findMatch: + for (Map<String, Object> actualRecord : actualRecords) { + for (String s : actualRecord.keySet()) { + if (!expectedRecord.containsKey(s)) { + throw new Exception("Unexpected column '" + s + "' returned by query."); + } + if ( ! compareValues(expectedRecord.get(s), actualRecord.get(s), counter, s)) { + i++; + continue findMatch; + } + } + if (actualRecord.size() < expectedRecord.size()) { + throw new Exception(findMissingColumns(expectedRecord.keySet(), actualRecord.keySet())); + } + found = true; + break; + } + if (!found) { + throw new Exception("Did not find expected record in result set: " + printRecord(expectedRecord)); + } else { + actualRecords.remove(i); + counter++; + } + } + logger.debug(missing); + System.out.println(missing); + assertEquals(0, actualRecords.size()); + } + + private String findMissingColumns(Set<String> expected, Set<String> actual) { + String missingCols = ""; + for (String colName : expected) { + if (!actual.contains(colName)) { + missingCols += colName + ", "; + } + } + return "Expected column(s) " + missingCols + " not found in result set."; + } + + private String printRecord(Map<String, Object> record) { + String ret = ""; + for (String s : record.keySet()) { + ret += s + " : " + record.get(s) + ", "; + } + return ret + "\n"; + } + +} diff --git a/exec/java-exec/src/test/java/org/apache/drill/TestAltSortQueries.java b/exec/java-exec/src/test/java/org/apache/drill/TestAltSortQueries.java index 57689080e..a554db2a2 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/TestAltSortQueries.java +++ b/exec/java-exec/src/test/java/org/apache/drill/TestAltSortQueries.java @@ -41,9 +41,17 @@ public class TestAltSortQueries extends BaseTestQuery{ test("select employee_id, first_name, last_name from cp.`employee.json` order by employee_id limit 5 "); } + // TODO - This is currently passing but I think that it is still in error, + // the new verification for this test was written against the output that was previously not being checked + // It looks like there is an off by one error in the results, see the baseline file for the current results @Test public void testSelectWithLimitOffset() throws Exception{ - test("select employee_id, first_name, last_name from cp.`employee.json` order by employee_id limit 5 offset 10 "); + testBuilder() + .sqlQuery("select employee_id, first_name, last_name from cp.`employee.json` order by employee_id limit 5 offset 10 ") + .ordered() + .csvBaselineFile("sort/testSelectWithLimitOffset.tsv") + .baselineColumns("employee_id", "first_name", "last_name") + .build().run(); } @Test diff --git a/exec/java-exec/src/test/java/org/apache/drill/TestBuilder.java b/exec/java-exec/src/test/java/org/apache/drill/TestBuilder.java new file mode 100644 index 000000000..9334c6e84 --- /dev/null +++ b/exec/java-exec/src/test/java/org/apache/drill/TestBuilder.java @@ -0,0 +1,499 @@ +/******************************************************************************* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ +package org.apache.drill; + +import com.google.common.base.Joiner; +import org.antlr.runtime.ANTLRStringStream; +import org.antlr.runtime.CommonTokenStream; +import org.antlr.runtime.RecognitionException; +import org.apache.drill.common.expression.SchemaPath; +import org.apache.drill.common.expression.parser.ExprLexer; +import org.apache.drill.common.expression.parser.ExprParser; +import org.apache.drill.common.types.TypeProtos; +import org.apache.drill.common.types.Types; +import org.apache.drill.exec.memory.BufferAllocator; +import org.apache.drill.exec.proto.UserBitShared; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import static org.junit.Assert.assertEquals; + +public class TestBuilder { + + // test query to run + private String query; + // the type of query for the test + private UserBitShared.QueryType queryType; + // should the validation enforce ordering + private Boolean ordered; + private boolean approximateEquality; + private BufferAllocator allocator; + // Used to pass the type information associated with particular column names rather than relying on the + // ordering of the columns in the CSV file, or the default type inferences when reading JSON, this is used for the + // case where results of the test query are adding type casts to the baseline queries, this saves a little bit of + // setup in cases where strict type enforcement is not necessary for a given test + protected Map<SchemaPath, TypeProtos.MajorType> baselineTypeMap; + // queries to run before the baseline or test queries, can be used to set options + private String baselineOptionSettingQueries; + private String testOptionSettingQueries; + // two different methods are available for comparing ordered results, the default reads all of the records + // into giant lists of objects, like one giant on-heap batch of 'vectors' + // this flag enables the other approach which iterates through a hyper batch for the test query results and baseline + // while this does work faster and use less memory, it can be harder to debug as all of the elements are not in a + // single list + private boolean highPerformanceComparison; + // for cases where the result set is just a single record, test writers can avoid creating a lot of small baseline + // files by providing a list of baseline values + private Object[] baselineValues; + // column names for use with the baseline values + protected String[] baselineColumns; + // In cases where we need to verify larger datasets without the risk of running the baseline data through + // the drill engine, results can be provided in a list of maps. While this model does make a lot of sense, there is a + // lot of work to make the type handling/casting work correctly, and making robust complex type handling work completely outside + // of the drill engine for generating baselines would likely be more work than it would be worth. For now we will be + // going with an approach of using this facility to validate the parts of the drill engine that could break in ways + // that would affect the reading of baseline files (i.e. we need robust test for storage engines, project and casting that + // use this interface) and then rely on the engine for the rest of the tests that will use the baseline queries. + private List<Map> baselineRecords; + + public TestBuilder(BufferAllocator allocator) { + this.allocator = allocator; + reset(); + } + + public TestBuilder(BufferAllocator allocator, String query, UserBitShared.QueryType queryType, Boolean ordered, + boolean approximateEquality, Map<SchemaPath, TypeProtos.MajorType> baselineTypeMap, + String baselineOptionSettingQueries, String testOptionSettingQueries, boolean highPerformanceComparison) { + this(allocator); + if (ordered == null) { + throw new RuntimeException("Ordering not set, when using a baseline file or query you must explicitly call the ordered() or unOrdered() method on the " + this.getClass().getSimpleName()); + } + this.query = query; + this.queryType = queryType; + this.ordered = ordered; + this.approximateEquality = approximateEquality; + this.baselineTypeMap = baselineTypeMap; + this.baselineOptionSettingQueries = baselineOptionSettingQueries; + this.testOptionSettingQueries = testOptionSettingQueries; + this.highPerformanceComparison = highPerformanceComparison; + } + + protected TestBuilder reset() { + query = ""; + ordered = null; + approximateEquality = false; + highPerformanceComparison = false; + testOptionSettingQueries = ""; + baselineOptionSettingQueries = ""; + baselineRecords = null; + return this; + } + + public DrillTestWrapper build() throws Exception { + if ( ! ordered && highPerformanceComparison ) { + throw new Exception("High performance comparison only available for ordered checks, to enforce this restriction, ordered() must be called first."); + } + return new DrillTestWrapper(this, allocator, query, queryType, baselineOptionSettingQueries, testOptionSettingQueries, + getValidationQueryType(), ordered, approximateEquality, highPerformanceComparison, baselineRecords); + } + + public TestBuilder sqlQuery(String query) { + this.query = query; + this.queryType = UserBitShared.QueryType.SQL; + return this; + } + + public TestBuilder sqlQueryFromFile(String queryFile) throws IOException { + String query = BaseTestQuery.getFile(queryFile); + this.query = query; + this.queryType = UserBitShared.QueryType.SQL; + return this; + } + + public TestBuilder physicalPlanFromFile(String queryFile) throws IOException { + String query = BaseTestQuery.getFile(queryFile); + this.query = query; + this.queryType = UserBitShared.QueryType.PHYSICAL; + return this; + } + + public TestBuilder ordered() { + this.ordered = true; + return this; + } + + public TestBuilder unOrdered() { + this.ordered = false; + return this; + } + + // this can only be used with ordered verifications, it does run faster and use less memory but may be + // a little harder to debug as it iterates over a hyper batch rather than reading all of the values into + // large on-heap lists + public TestBuilder highPerformanceComparison() throws Exception { + this.highPerformanceComparison = true; + return this; + } + + // list of queries to run before the baseline query, can be used to set several options + // list takes the form of a semi-colon separated list + public TestBuilder optionSettingQueriesForBaseline(String queries) { + this.baselineOptionSettingQueries = queries; + return this; + } + + // list of queries to run before the test query, can be used to set several options + // list takes the form of a semi-colon separated list + public TestBuilder optionSettingQueriesForTestQuery(String queries) { + this.testOptionSettingQueries = queries; + return this; + } + public TestBuilder approximateEquality() { + this.approximateEquality = true; + return this; + } + + // modified code from SchemaPath.De class. This should be used sparingly and only in tests if absolutely needed. + public static SchemaPath parsePath(String path) { + try { + // logger.debug("Parsing expression string '{}'", expr); + ExprLexer lexer = new ExprLexer(new ANTLRStringStream(path)); + CommonTokenStream tokens = new CommonTokenStream(lexer); + ExprParser parser = new ExprParser(tokens); + + //TODO: move functionregistry and error collector to injectables. + //ctxt.findInjectableValue(valueId, forProperty, beanInstance) + ExprParser.parse_return ret = parser.parse(); + + // ret.e.resolveAndValidate(expr, errorCollector); + if (ret.e instanceof SchemaPath) { + return (SchemaPath) ret.e; + } else { + throw new IllegalStateException("Schema path is not a valid format."); + } + } catch (RecognitionException e) { + throw new RuntimeException(e); + } + } + + String getValidationQuery() throws Exception { + throw new RuntimeException("Must provide some kind of baseline, either a baseline file or another query"); + } + + protected UserBitShared.QueryType getValidationQueryType() throws Exception { + if (singleExplicitBaselineRecord()) { + return null; + } + throw new RuntimeException("Must provide some kind of baseline, either a baseline file or another query"); + } + + public JSONTestBuilder jsonBaselineFile(String filePath) { + return new JSONTestBuilder(filePath, allocator, query, queryType, ordered, approximateEquality, + baselineTypeMap, baselineOptionSettingQueries, testOptionSettingQueries, highPerformanceComparison); + } + + public CSVTestBuilder csvBaselineFile(String filePath) { + return new CSVTestBuilder(filePath, allocator, query, queryType, ordered, approximateEquality, + baselineTypeMap, baselineOptionSettingQueries, testOptionSettingQueries, highPerformanceComparison); + } + + public TestBuilder baselineTypes(Map<SchemaPath, TypeProtos.MajorType> baselineTypeMap) { + this.baselineTypeMap = baselineTypeMap; + return this; + } + + boolean typeInfoSet() { + if (baselineTypeMap != null) { + return true; + } else { + return false; + } + } + + // indicate that the tests query should be checked for an empty result set + public TestBuilder expectsEmptyResultSet() { + unOrdered(); + baselineRecords = new ArrayList(); + return this; + } + + /** + * This method is used to pass in a simple list of values for a single record verification without + * the need to create a CSV or JSON file to store the baseline. + * + * This can be called repeatedly to pass a list of records to verify. It works for both ordered and unordered + * checks. + * + * @param baselineValues - the baseline values to validate + * @return + */ + public TestBuilder baselineValues(Object ... baselineValues) { + if (ordered == null) { + throw new RuntimeException("Ordering not set, before specifying baseline data you must explicitly call the ordered() or unOrdered() method on the " + this.getClass().getSimpleName()); + } + if (baselineRecords == null) { + baselineRecords = new ArrayList(); + } + Map<String, Object> ret = new HashMap(); + int i = 0; + assertEquals("Must supply the same number of baseline values as columns.", baselineValues.length, baselineColumns.length); + for (String s : baselineColumns) { + ret.put(s, baselineValues[i]); + i++; + } + this.baselineRecords.add(ret); + return this; + } + + /** + * This can be used in cases where we want to avoid issues with the assumptions made by the test framework. + * Most of the methods for verification in the framework run drill queries to generate the read baseline files or + * execute alternative baseline queries. This model relies on basic functionality of reading files with storage + * plugins and applying casts/projects to be stable. + * + * This method can be used to verify the engine for these cases and any other future execution paths that would + * be used by both the test query and baseline. Without tests like this it is possible that some tests + * could falsely report as passing, as both the test query and baseline query could run into the same problem + * with an assumed stable code path and produce the same erroneous result. + * + * @param materializedRecords - a list of maps representing materialized results + * @return + */ + public TestBuilder baselineRecords(List<Map> materializedRecords) { + this.baselineRecords = materializedRecords; + return this; + } + + /** + * This setting has a slightly different impact on the test depending on some of the other + * configuration options are set. + * + * If a JSON baseline file is given, this list will act as a project list to verify the + * test query against a subset of the columns in the file. + * + * For a CSV baseline file, these will act as aliases for columns [0 .. n] in the repeated + * varchar column that is read out of CSV. + * + * For a baseline sql query, this currently has no effect. + * + * For explicit baseline values given in java code with the baselineValues() method, these will + * be used to create a map for the one record verification. + */ + public TestBuilder baselineColumns(String... columns) { + for (int i = 0; i < columns.length; i++) { + columns[i] = parsePath(columns[i]).toExpr(); + } + this.baselineColumns = columns; + return this; + } + + private boolean singleExplicitBaselineRecord() { + return baselineRecords != null; + } + + // provide a SQL query to validate against + public BaselineQueryTestBuilder sqlBaselineQuery(String baselineQuery) { + return new BaselineQueryTestBuilder(baselineQuery, UserBitShared.QueryType.SQL, allocator, query, queryType, ordered, approximateEquality, + baselineTypeMap, baselineOptionSettingQueries, testOptionSettingQueries, highPerformanceComparison); + } + + // provide a path to a file containing a SQL query to use as a baseline + public BaselineQueryTestBuilder sqlBaselineQueryFromFile(String baselineQueryFilename) throws IOException { + String baselineQuery = BaseTestQuery.getFile(baselineQueryFilename); + return new BaselineQueryTestBuilder(baselineQuery, UserBitShared.QueryType.SQL, allocator, query, queryType, ordered, approximateEquality, + baselineTypeMap, baselineOptionSettingQueries, testOptionSettingQueries, highPerformanceComparison); + } + + // as physical plans are verbose, this is the only option provided for specifying them, we should enforce + // that physical plans, or any large JSON strings do not live in the Java source as literals + public BaselineQueryTestBuilder physicalPlanBaselineQueryFromFile(String baselinePhysicalPlanPath) throws IOException { + String baselineQuery = BaseTestQuery.getFile(baselinePhysicalPlanPath); + return new BaselineQueryTestBuilder(baselineQuery, UserBitShared.QueryType.PHYSICAL, allocator, query, queryType, ordered, approximateEquality, + baselineTypeMap, baselineOptionSettingQueries, testOptionSettingQueries, highPerformanceComparison); + } + + private String getDecimalPrecisionScaleInfo(TypeProtos.MajorType type) { + String precision = ""; + switch(type.getMinorType()) { + case DECIMAL18: + case DECIMAL28SPARSE: + case DECIMAL38SPARSE: + case DECIMAL38DENSE: + case DECIMAL28DENSE: + case DECIMAL9: + precision = String.format("(%d,%d)", type.getScale(), type.getPrecision()); + break; + default: + ; // do nothing empty string set above + } + return precision; + } + + public class CSVTestBuilder extends TestBuilder { + + // path to the baseline file that will be inserted into the validation query + private String baselineFilePath; + // use to cast the baseline file columns, if not set the types + // that come out of the test query drive interpretation of baseline + private TypeProtos.MajorType[] baselineTypes; + + CSVTestBuilder(String baselineFile, BufferAllocator allocator, String query, UserBitShared.QueryType queryType, Boolean ordered, + boolean approximateEquality, Map<SchemaPath, TypeProtos.MajorType> baselineTypeMap, + String baselineOptionSettingQueries, String testOptionSettingQueries, boolean highPerformanceComparison) { + super(allocator, query, queryType, ordered, approximateEquality, baselineTypeMap, baselineOptionSettingQueries, testOptionSettingQueries, + highPerformanceComparison); + this.baselineFilePath = baselineFile; + } + + public CSVTestBuilder baselineTypes(TypeProtos.MajorType... baselineTypes) { + this.baselineTypes = baselineTypes; + this.baselineTypeMap = null; + return this; + } + + // convenience method to convert minor types to major types if no decimals with precisions are needed + public CSVTestBuilder baselineTypes(TypeProtos.MinorType ... baselineTypes) { + TypeProtos.MajorType[] majorTypes = new TypeProtos.MajorType[baselineTypes.length]; + int i = 0; + for(TypeProtos.MinorType minorType : baselineTypes) { + majorTypes[i] = Types.required(minorType); + i++; + } + this.baselineTypes = majorTypes; + this.baselineTypeMap = null; + return this; + } + + protected TestBuilder reset() { + super.reset(); + baselineTypeMap = null; + baselineTypes = null; + baselineFilePath = null; + return this; + } + + boolean typeInfoSet() { + if (super.typeInfoSet() || baselineTypes != null) { + return true; + } else { + return false; + } + } + + String getValidationQuery() throws Exception { + if (baselineColumns.length == 0) { + throw new Exception("Baseline CSV files require passing column names, please call the baselineColumns() method on the test builder."); + } + + if (baselineTypes != null) { + assertEquals("Must pass the same number of types as column names if types are provided.", baselineTypes.length, baselineColumns.length); + } + + String[] aliasedExpectedColumns = new String[baselineColumns.length]; + for (int i = 0; i < baselineColumns.length; i++) { + aliasedExpectedColumns[i] = "columns[" + i + "] "; + TypeProtos.MajorType majorType; + if (baselineTypes != null) { + majorType = baselineTypes[i]; + } else if (baselineTypeMap != null) { + majorType = baselineTypeMap.get(parsePath(baselineColumns[i])); + } else { + throw new Exception("Type information not set for interpreting csv baseline file."); + } + String precision = getDecimalPrecisionScaleInfo(majorType); + // TODO - determine if there is a better behavior here, if we do not specify a length the default behavior is + // to cast to varchar with length 1 + // set default cast size for varchar, the cast function will take the lesser of this passed value and the + // length of the incoming data when choosing the length for the outgoing data + if (majorType.getMinorType() == TypeProtos.MinorType.VARCHAR || + majorType.getMinorType() == TypeProtos.MinorType.VARBINARY) { + precision = "(65000)"; + } + aliasedExpectedColumns[i] = "cast(" + aliasedExpectedColumns[i] + " as " + + Types.getNameOfMinorType(majorType.getMinorType()) + precision + " ) " + baselineColumns[i]; + } + String query = "select " + Joiner.on(", ").join(aliasedExpectedColumns) + " from cp.`" + baselineFilePath + "`"; + return query; + } + + protected UserBitShared.QueryType getValidationQueryType() throws Exception { + return UserBitShared.QueryType.SQL; + } + + } + + public class JSONTestBuilder extends TestBuilder { + + // path to the baseline file that will be inserted into the validation query + private String baselineFilePath; + + JSONTestBuilder(String baselineFile, BufferAllocator allocator, String query, UserBitShared.QueryType queryType, Boolean ordered, + boolean approximateEquality, Map<SchemaPath, TypeProtos.MajorType> baselineTypeMap, + String baselineOptionSettingQueries, String testOptionSettingQueries, boolean highPerformanceComparison) { + super(allocator, query, queryType, ordered, approximateEquality, baselineTypeMap, baselineOptionSettingQueries, testOptionSettingQueries, + highPerformanceComparison); + this.baselineFilePath = baselineFile; + this.baselineColumns = new String[] {"*"}; + } + + String getValidationQuery() { + return "select " + Joiner.on(", ").join(baselineColumns) + " from cp.`" + baselineFilePath + "`"; + } + + protected UserBitShared.QueryType getValidationQueryType() throws Exception { + return UserBitShared.QueryType.SQL; + } + + } + + public class BaselineQueryTestBuilder extends TestBuilder { + + private String baselineQuery; + private UserBitShared.QueryType baselineQueryType; + + BaselineQueryTestBuilder(String baselineQuery, UserBitShared.QueryType baselineQueryType, BufferAllocator allocator, + String query, UserBitShared.QueryType queryType, Boolean ordered, + boolean approximateEquality, Map<SchemaPath, TypeProtos.MajorType> baselineTypeMap, + String baselineOptionSettingQueries, String testOptionSettingQueries, boolean highPerformanceComparison) { + super(allocator, query, queryType, ordered, approximateEquality, baselineTypeMap, baselineOptionSettingQueries, testOptionSettingQueries, + highPerformanceComparison); + this.baselineQuery = baselineQuery; + this.baselineQueryType = baselineQueryType; + } + + String getValidationQuery() { + return baselineQuery; + } + + protected UserBitShared.QueryType getValidationQueryType() throws Exception { + return baselineQueryType; + } + + // This currently assumes that all explicit baseline queries will have fully qualified type information + // if this changes, the baseline query can be run in a sub query with the implicit or explicit type passing + // added on top of it, as is currently when done when reading a baseline file + boolean typeInfoSet() { + return true; + } + + } +} diff --git a/exec/java-exec/src/test/java/org/apache/drill/TestTestFramework.java b/exec/java-exec/src/test/java/org/apache/drill/TestTestFramework.java new file mode 100644 index 000000000..eaaae6b0f --- /dev/null +++ b/exec/java-exec/src/test/java/org/apache/drill/TestTestFramework.java @@ -0,0 +1,397 @@ +/******************************************************************************* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ +package org.apache.drill; + +import org.apache.drill.common.expression.SchemaPath; +import org.apache.drill.common.types.MinorType; +import org.apache.drill.common.types.TypeProtos; +import org.apache.drill.common.types.Types; +import org.apache.drill.exec.util.JsonStringArrayList; +import org.apache.drill.exec.util.JsonStringHashMap; +import org.junit.Ignore; +import org.junit.Test; + +import java.math.BigDecimal; +import java.util.HashMap; +import java.util.Map; + +import static org.junit.Assert.assertEquals; + +// TODO - update framework to remove any dependency on the Drill engine for reading baseline result sets +// currently using it with the assumption that the csv and json readers are well tested, and handling diverse +// types in the test framework would require doing some redundant work to enable casting outside of Drill or +// some better tooling to generate parquet files that have all of the parquet types +public class TestTestFramework extends BaseTestQuery{ + + private static String CSV_COLS = " cast(columns[0] as bigint) employee_id, columns[1] as first_name, columns[2] as last_name "; + + @Test + public void testCSVVerification() throws Exception { + testBuilder() + .sqlQuery("select employee_id, first_name, last_name from cp.`testframework/small_test_data.json`") + .ordered() + .csvBaselineFile("testframework/small_test_data.tsv") + .baselineTypes(TypeProtos.MinorType.BIGINT, TypeProtos.MinorType.VARCHAR, TypeProtos.MinorType.VARCHAR) + .baselineColumns("employee_id", "first_name", "last_name") + .build().run(); + } + + @Test + public void testBaselineValsVerification() throws Exception { + testBuilder() + .sqlQuery("select employee_id, first_name, last_name from cp.`testframework/small_test_data.json` limit 1") + .ordered() + .baselineColumns("employee_id", "first_name", "last_name") + .baselineValues(12l, "Jewel", "Creek") + .build().run(); + + testBuilder() + .sqlQuery("select employee_id, first_name, last_name from cp.`testframework/small_test_data.json` limit 1") + .unOrdered() + .baselineColumns("employee_id", "first_name", "last_name") + .baselineValues(12l, "Jewel", "Creek") + .build().run(); + } + + @Ignore("Drill 1737") + @Test + public void testDecimalBaseline() throws Exception { + // type information can be provided explicitly + testBuilder() + .sqlQuery("select cast(dec_col as decimal(38,2)) dec_col from cp.`testframework/decimal_test.json`") + .unOrdered() + .csvBaselineFile("testframework/decimal_test.tsv") + .baselineTypes(Types.withScaleAndPrecision(TypeProtos.MinorType.DECIMAL38SPARSE, TypeProtos.DataMode.REQUIRED, 38, 2)) + .baselineColumns("dec_col") + .build().run(); + + // TODO - re-enable once DRILL-1737 is fixed + // type information can also be left out, this will prompt the result types of the test query to drive the + // interpretation of the test file +// testBuilder() +// .sqlQuery("select cast(dec_col as decimal(38,2)) dec_col from cp.`testframework/decimal_test.json`") +// .unOrdered() +// .csvBaselineFile("testframework/decimal_test.tsv") +// .baselineColumns("dec_col") +// .build().run(); + + // Or you can provide explicit values to the builder itself to avoid going through the drill engine at all to + // populate the baseline results + testBuilder() + .sqlQuery("select cast(dec_col as decimal(38,2)) dec_col from cp.`testframework/decimal_test.json`") + .unOrdered() + .baselineColumns("dec_col") + .baselineValues(new BigDecimal("3.70")) + .build().run(); + + } + + @Test + public void testBaselineValsVerificationWithNulls() throws Exception { + testBuilder() + .sqlQuery("select * from cp.`store/json/json_simple_with_null.json`") + .ordered() + .baselineColumns("a", "b") + .baselineValues(5l, 10l) + .baselineValues(7l, null) + .baselineValues(null, null) + .baselineValues(9l, 11l) + .build().run(); + + testBuilder() + .sqlQuery("select * from cp.`store/json/json_simple_with_null.json`") + .unOrdered() + .baselineColumns("a", "b") + .baselineValues(5l, 10l) + .baselineValues(9l, 11l) + .baselineValues(7l, null) + .baselineValues(null, null) + .build().run(); + } + + @Test + public void testBaselineValsVerificationWithComplexAndNulls() throws Exception { + JsonStringArrayList list = new JsonStringArrayList(); + JsonStringArrayList innerList1 = new JsonStringArrayList(); + innerList1.add(2l); + innerList1.add(1l); + JsonStringArrayList innerList2 = new JsonStringArrayList(); + innerList2.add(4l); + innerList2.add(6l); + list.add(innerList1); + list.add(innerList2); + + JsonStringArrayList l_list = new JsonStringArrayList(); + l_list.add(4l); + l_list.add(2l); + + JsonStringHashMap x = new JsonStringHashMap(); + x.put("y", "kevin"); + x.put("z", "paul"); + + // [{"orange":"yellow","pink":"red"},{"pink":"purple"}] + JsonStringArrayList z = new JsonStringArrayList(); + JsonStringHashMap z_1 = new JsonStringHashMap(); + z_1.put("orange", "yellow"); + z_1.put("pink", "red"); + + JsonStringHashMap z_2 = new JsonStringHashMap(); + z_2.put("pink", "purple"); + z.add(z_1); + z.add(z_2); + + testBuilder() + .sqlQuery("select * from cp.`/jsoninput/input2.json` limit 1") + .ordered() + .baselineColumns("integer", "float", "x", "z", "l", "rl") + .baselineValues(2010l, 17.4, x, z, l_list, list) + .build().run(); + } + + @Test + public void testCSVVerification_missing_records_fails() throws Exception { + try { + testBuilder() + .sqlQuery("select employee_id, first_name, last_name from cp.`testframework/small_test_data.json`") + .ordered() + .csvBaselineFile("testframework/small_test_data_extra.tsv") + .baselineTypes(TypeProtos.MinorType.BIGINT, TypeProtos.MinorType.VARCHAR, TypeProtos.MinorType.VARCHAR) + .baselineColumns("employee_id", "first_name", "last_name") + .build().run(); + } catch (AssertionError ex) { + assertEquals("Incorrect number of rows returned by query. expected:<7> but was:<5>", ex.getMessage()); + // this indicates successful completion of the test + return; + } + throw new Exception("Test framework verification failed, expected failure on missing records."); + } + + @Test + public void testCSVVerification_extra_records_fails() throws Exception { + try { + testBuilder() + .sqlQuery("select " + CSV_COLS + " from cp.`testframework/small_test_data_extra.tsv`") + .ordered() + .csvBaselineFile("testframework/small_test_data.tsv") + .baselineTypes(TypeProtos.MinorType.BIGINT, TypeProtos.MinorType.VARCHAR, TypeProtos.MinorType.VARCHAR) + .baselineColumns("employee_id", "first_name", "last_name") + .build().run(); + } catch (AssertionError ex) { + assertEquals("Incorrect number of rows returned by query. expected:<5> but was:<7>", ex.getMessage()); + // this indicates successful completion of the test + return; + } + throw new Exception("Test framework verification failed, expected failure for extra records."); + } + + @Test + public void testCSVVerification_extra_column_fails() throws Exception { + try { + testBuilder() + .sqlQuery("select " + CSV_COLS + ", columns[3] as address from cp.`testframework/small_test_data_extra_col.tsv`") + .ordered() + .csvBaselineFile("testframework/small_test_data.tsv") + .baselineTypes(TypeProtos.MinorType.BIGINT, TypeProtos.MinorType.VARCHAR, TypeProtos.MinorType.VARCHAR) + .baselineColumns("employee_id", "first_name", "last_name") + .build().run(); + } catch (AssertionError ex) { + assertEquals("Unexpected extra column `address` returned by query.", ex.getMessage()); + // this indicates successful completion of the test + return; + } + throw new Exception("Test framework verification failed, expected failure on extra column."); + } + + @Test + public void testCSVVerification_missing_column_fails() throws Exception { + try { + testBuilder() + .sqlQuery("select employee_id, first_name, last_name from cp.`testframework/small_test_data.json`") + .ordered() + .csvBaselineFile("testframework/small_test_data_extra_col.tsv") + .baselineTypes(TypeProtos.MinorType.BIGINT, TypeProtos.MinorType.VARCHAR, TypeProtos.MinorType.VARCHAR, TypeProtos.MinorType.VARCHAR) + .baselineColumns("employee_id", "first_name", "last_name", "address") + .build().run(); + } catch (Exception ex) { + assertEquals("Expected column(s) `address`, not found in result set.", ex.getMessage()); + // this indicates successful completion of the test + return; + } + throw new Exception("Test framework verification failed, expected failure on missing column."); + } + + @Test + public void testCSVVerificationOfTypes() throws Throwable { + try { + testBuilder() + .sqlQuery("select employee_id, first_name, last_name from cp.`testframework/small_test_data.json`") + .ordered() + .csvBaselineFile("testframework/small_test_data.tsv") + .baselineTypes(TypeProtos.MinorType.INT, TypeProtos.MinorType.VARCHAR, TypeProtos.MinorType.VARCHAR) + .baselineColumns("employee_id", "first_name", "last_name") + .build().run(); + } catch (Exception ex) { + assertEquals("at position 0 column '`employee_id`' mismatched values, expected: 12(Integer) but received 12(Long)", ex.getMessage()); + // this indicates successful completion of the test + return; + } + throw new Exception("Test framework verification failed, expected failure on type check."); + } + + @Test + public void testCSVVerificationOfOrder_checkFailure() throws Throwable { + try { + testBuilder() + .sqlQuery("select columns[0] as employee_id, columns[1] as first_name, columns[2] as last_name from cp.`testframework/small_test_data_reordered.tsv`") + .ordered() + .csvBaselineFile("testframework/small_test_data.tsv") + .baselineColumns("employee_id", "first_name", "last_name") + .build().run(); + } catch (Exception ex) { + assertEquals("at position 0 column '`first_name`' mismatched values, expected: Jewel(String) but received Peggy(String)", ex.getMessage()); + // this indicates successful completion of the test + return; + } + throw new Exception("Test framework verification failed, expected failure on order check."); + } + + @Test + public void testCSVVerificationOfUnorderedComparison() throws Throwable { + testBuilder() + .sqlQuery("select columns[0] as employee_id, columns[1] as first_name, columns[2] as last_name from cp.`testframework/small_test_data_reordered.tsv`") + .unOrdered() + .csvBaselineFile("testframework/small_test_data.tsv") + .baselineColumns("employee_id", "first_name", "last_name") + .build().run(); + } + + // TODO - enable more advanced type handling for JSON, currently basic support works + // add support for type information taken from test query, or explicit type expectations + @Test + public void testBasicJSON() throws Exception { + testBuilder() + .sqlQuery("select * from cp.`scan_json_test_3.json`") + .ordered() + .jsonBaselineFile("/scan_json_test_3.json") + .build().run(); + + testBuilder() + .sqlQuery("select * from cp.`scan_json_test_3.json`") + .unOrdered() // Check other verification method with same files + .jsonBaselineFile("/scan_json_test_3.json") + .build().run(); + } + + @Test + public void testComplexJSON_all_text() throws Exception { + testBuilder() + .sqlQuery("select * from cp.`store/json/schema_change_int_to_string.json`") + .optionSettingQueriesForTestQuery("alter system set `store.json.all_text_mode` = true") + .ordered() + .jsonBaselineFile("store/json/schema_change_int_to_string.json") + .optionSettingQueriesForBaseline("alter system set `store.json.all_text_mode` = true") + .build().run(); + + testBuilder() + .sqlQuery("select * from cp.`store/json/schema_change_int_to_string.json`") + .optionSettingQueriesForTestQuery("alter system set `store.json.all_text_mode` = true") + .unOrdered() // Check other verification method with same files + .jsonBaselineFile("store/json/schema_change_int_to_string.json") + .optionSettingQueriesForBaseline("alter system set `store.json.all_text_mode` = true") + .build().run(); + test("alter system set `store.json.all_text_mode` = false"); + } + + @Test + public void testRepeatedColumnMatching() throws Exception { + try { + testBuilder() + .sqlQuery("select * from cp.`store/json/schema_change_int_to_string.json`") + .optionSettingQueriesForTestQuery("alter system set `store.json.all_text_mode` = true") + .ordered() + .jsonBaselineFile("testframework/schema_change_int_to_string_non-matching.json") + .optionSettingQueriesForBaseline("alter system set `store.json.all_text_mode` = true") + .build().run(); + } catch (Exception ex) { + assertEquals("at position 1 column '`field_1`' mismatched values, " + + "expected: [\"5\",\"2\",\"3\",\"4\",\"1\",\"2\"](JsonStringArrayList) but received [\"5\"](JsonStringArrayList)", + ex.getMessage()); + // this indicates successful completion of the test + return; + } + throw new Exception("Test framework verification failed, expected failure on order check."); + } + + @Test + public void testEmptyResultSet() throws Exception { + testBuilder() + .sqlQuery("select * from cp.`store/json/json_simple_with_null.json` where 1=0") + .expectsEmptyResultSet() + .build().run(); + try { + testBuilder() + .sqlQuery("select * from cp.`store/json/json_simple_with_null.json`") + .expectsEmptyResultSet() + .build().run(); + } catch (AssertionError ex) { + assertEquals("Different number of records returned expected:<4> but was:<0>", ex.getMessage()); + // this indicates successful completion of the test + return; + } + throw new Exception("Test framework verification failed, expected failure on unexpected records."); + } + + @Test + public void testCSVVerificationTypeMap() throws Throwable { + Map<SchemaPath, TypeProtos.MajorType> typeMap = new HashMap<>(); + typeMap.put(TestBuilder.parsePath("first_name"), Types.optional(TypeProtos.MinorType.VARCHAR)); + typeMap.put(TestBuilder.parsePath("employee_id"), Types.optional(TypeProtos.MinorType.INT)); + typeMap.put(TestBuilder.parsePath("last_name"), Types.optional(TypeProtos.MinorType.VARCHAR)); + testBuilder() + .sqlQuery("select cast(columns[0] as int) employee_id, columns[1] as first_name, columns[2] as last_name from cp.`testframework/small_test_data_reordered.tsv`") + .unOrdered() + .csvBaselineFile("testframework/small_test_data.tsv") + .baselineColumns("employee_id", "first_name", "last_name") + // This should work without this line because of the default type casts added based on the types that come out of the test query. + // To write a test that enforces strict typing you must pass type information using a CSV with a list of types, + // or any format with a Map of types like is constructed above and include the call to pass it into the test, which is commented out below + //.baselineTypes(typeMap) + .build().run(); + + typeMap.clear(); + typeMap.put(TestBuilder.parsePath("first_name"), Types.optional(TypeProtos.MinorType.VARCHAR)); + // This is the wrong type intentionally to ensure failures happen when expected + typeMap.put(TestBuilder.parsePath("employee_id"), Types.optional(TypeProtos.MinorType.VARCHAR)); + typeMap.put(TestBuilder.parsePath("last_name"), Types.optional(TypeProtos.MinorType.VARCHAR)); + + try { + testBuilder() + .sqlQuery("select cast(columns[0] as int) employee_id, columns[1] as first_name, columns[2] as last_name from cp.`testframework/small_test_data_reordered.tsv`") + .unOrdered() + .csvBaselineFile("testframework/small_test_data.tsv") + .baselineColumns("employee_id", "first_name", "last_name") + .baselineTypes(typeMap) + .build().run(); + } catch (Exception ex) { + // this indicates successful completion of the test + return; + } + throw new Exception("Test framework verification failed, expected failure on type check."); + } + +} diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/HyperVectorValueIterator.java b/exec/java-exec/src/test/java/org/apache/drill/exec/HyperVectorValueIterator.java new file mode 100644 index 000000000..d214b7c68 --- /dev/null +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/HyperVectorValueIterator.java @@ -0,0 +1,98 @@ +/******************************************************************************* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ +package org.apache.drill.exec; + +import org.apache.drill.exec.record.HyperVectorWrapper; +import org.apache.drill.exec.record.MaterializedField; +import org.apache.drill.exec.vector.ValueVector; + +import java.util.Iterator; + +public class HyperVectorValueIterator implements Iterator<Object> { + private MaterializedField mf; + private HyperVectorWrapper hyperVector; + private int indexInVectorList; + private int indexInCurrentVector; + private ValueVector currVec; + private long totalValues; + private long totalValuesRead; + // limit how many values will be read out of this iterator + private long recordLimit; + + public HyperVectorValueIterator(MaterializedField mf, HyperVectorWrapper hyperVector) { + this.mf = mf; + this.hyperVector = hyperVector; + this.totalValues = 0; + this.indexInCurrentVector = 0; + this.indexInVectorList = 0; + this.recordLimit = -1; + } + + public void setRecordLimit(long limit) { + this.recordLimit = limit; + } + + public HyperVectorWrapper getHyperVector() { + return hyperVector; + } + + public long getTotalRecords() { + if (recordLimit > 0) { + return recordLimit; + } else { + return totalValues; + } + } + + public void determineTotalSize() { + for (ValueVector vv : hyperVector.getValueVectors()) { + this.totalValues += vv.getAccessor().getValueCount(); + } + } + + @Override + public boolean hasNext() { + if (totalValuesRead == recordLimit) { + return false; + } + if (indexInVectorList < hyperVector.getValueVectors().length) { + return true; + } else if ( indexInCurrentVector < currVec.getAccessor().getValueCount()) { + return true; + } + return false; + } + + @Override + public Object next() { + if (currVec == null || indexInCurrentVector == currVec.getValueCapacity()) { + currVec = hyperVector.getValueVectors()[indexInVectorList]; + indexInVectorList++; + indexInCurrentVector = 0; + } + Object obj = currVec.getAccessor().getObject(indexInCurrentVector); + indexInCurrentVector++; + totalValuesRead++; + return obj; + } + + @Override + public void remove() { + throw new UnsupportedOperationException(); + } +} diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/writer/TestParquetWriter.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/writer/TestParquetWriter.java index ac5673db8..6f32847aa 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/writer/TestParquetWriter.java +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/writer/TestParquetWriter.java @@ -30,6 +30,7 @@ import java.util.Map; import org.apache.drill.BaseTestQuery; import org.apache.drill.exec.ExecConstants; +import org.apache.drill.exec.HyperVectorValueIterator; import org.apache.drill.exec.exception.SchemaChangeException; import org.apache.drill.exec.proto.UserBitShared; import org.apache.drill.exec.record.BatchSchema; @@ -53,8 +54,6 @@ public class TestParquetWriter extends BaseTestQuery { static FileSystem fs; - private static final boolean VERBOSE_DEBUG = false; - @BeforeClass public static void initFs() throws Exception { Configuration conf = new Configuration(); @@ -90,17 +89,9 @@ public class TestParquetWriter extends BaseTestQuery { "L_RETURNFLAG, L_LINESTATUS, L_SHIPDATE, cast(L_COMMITDATE as DATE) as COMMITDATE, cast(L_RECEIPTDATE as DATE) AS RECEIPTDATE, L_SHIPINSTRUCT, L_SHIPMODE, L_COMMENT"; String validationSelection = "L_ORDERKEY, L_PARTKEY, L_SUPPKEY, L_LINENUMBER, L_QUANTITY, L_EXTENDEDPRICE, L_DISCOUNT, L_TAX, " + "L_RETURNFLAG, L_LINESTATUS, L_SHIPDATE,COMMITDATE ,RECEIPTDATE, L_SHIPINSTRUCT, L_SHIPMODE, L_COMMENT"; + String inputTable = "cp.`tpch/lineitem.parquet`"; - String query = String.format("SELECT %s FROM %s", selection, inputTable); - List<QueryResultBatch> expected = testSqlWithResults(query); - BatchSchema schema = null; - RecordBatchLoader loader = new RecordBatchLoader(getAllocator()); - List<Map> expectedRecords = new ArrayList<>(); - // read the data out of the results, the error manifested itself upon call of getObject on the vectors as they had contained deadbufs - addToMaterializedResults(expectedRecords, expected, loader, schema); - for (QueryResultBatch result : expected) { - result.release(); - } + runTestAndValidate(selection, validationSelection, inputTable, "drill_929"); } @Test @@ -184,7 +175,6 @@ public class TestParquetWriter extends BaseTestQuery { } @Test - @Ignore public void testRepeatedBool() throws Exception { String inputTable = "cp.`parquet/repeated_bool_data.json`"; runTestAndValidate("*", "*", inputTable, "repeated_bool_parquet"); @@ -208,11 +198,9 @@ public class TestParquetWriter extends BaseTestQuery { @Test public void testMulipleRowGroups() throws Exception { try { - //test(String.format("ALTER SESSION SET `%s` = %d", ExecConstants.PARQUET_BLOCK_SIZE, 1*1024*1024)); + test(String.format("ALTER SESSION SET `%s` = %d", ExecConstants.PARQUET_BLOCK_SIZE, 1*1024*1024)); String selection = "mi"; String inputTable = "cp.`customer.json`"; - int count = testRunAndPrint(UserBitShared.QueryType.SQL, "select mi from cp.`customer.json`"); - System.out.println(count); runTestAndValidate(selection, selection, inputTable, "foodmart_customer_parquet"); } finally { test(String.format("ALTER SESSION SET `%s` = %d", ExecConstants.PARQUET_BLOCK_SIZE, 512*1024*1024)); @@ -228,77 +216,29 @@ public class TestParquetWriter extends BaseTestQuery { runTestAndValidate(selection, validateSelection, inputTable, "foodmart_employee_parquet"); } - public void compareParquetReaders(String selection, String table) throws Exception { - test("alter system set `store.parquet.use_new_reader` = true"); - List<QueryResultBatch> expected = testSqlWithResults("select " + selection + " from " + table); - - RecordBatchLoader loader = new RecordBatchLoader(getAllocator()); - BatchSchema schema = null; - - List<Map> expectedRecords = new ArrayList<>(); - addToMaterializedResults(expectedRecords, expected, loader, schema); - - test("alter system set `store.parquet.use_new_reader` = false"); - List<QueryResultBatch> results = testSqlWithResults("select " + selection + " from " + table); - - List<Map> actualRecords = new ArrayList<>(); - addToMaterializedResults(actualRecords, results, loader, schema); - compareResults(expectedRecords, actualRecords); - for (QueryResultBatch result : results) { - result.release(); - } - for (QueryResultBatch result : expected) { - result.release(); - } - } - public void compareParquetReadersColumnar(String selection, String table) throws Exception { - test("alter system set `store.parquet.use_new_reader` = true"); - List<QueryResultBatch> expected = testSqlWithResults("select " + selection + " from " + table); - - RecordBatchLoader loader = new RecordBatchLoader(getAllocator()); - BatchSchema schema = null; - - Map<String, List> expectedSuperVectors = addToCombinedVectorResults(expected, loader, schema); - - test("alter system set `store.parquet.use_new_reader` = false"); - List<QueryResultBatch> results = testSqlWithResults("select " + selection + " from " + table); + String query = "select " + selection + " from " + table; + testBuilder() + .ordered() + .sqlQuery(query) + .optionSettingQueriesForTestQuery("alter system set `store.parquet.use_new_reader` = false") + .sqlBaselineQuery(query) + .optionSettingQueriesForBaseline("alter system set `store.parquet.use_new_reader` = true") + .build().run(); - Map<String, List> actualSuperVectors = addToCombinedVectorResults(results, loader, schema); - compareMergedVectors(expectedSuperVectors, actualSuperVectors); - for (QueryResultBatch result : results) { - result.release(); - } - for (QueryResultBatch result : expected) { - result.release(); - } } public void compareParquetReadersHyperVector(String selection, String table) throws Exception { - RecordBatchLoader loader = new RecordBatchLoader(getAllocator()); - BatchSchema schema = null; - // TODO - It didn't seem to respect the max width per node setting, so I went in and modified the SimpleParalellizer directly. - // I backed out the changes after the test passed. -// test("alter system set `planner.width.max_per_node` = 1"); - test("alter system set `store.parquet.use_new_reader` = false"); String query = "select " + selection + " from " + table; - List<QueryResultBatch> results = testSqlWithResults(query); - - Map<String, HyperVectorValueIterator> actualSuperVectors = addToHyperVectorMap(results, loader, schema); - - test("alter system set `store.parquet.use_new_reader` = true"); - List<QueryResultBatch> expected = testSqlWithResults(query); - - Map<String, HyperVectorValueIterator> expectedSuperVectors = addToHyperVectorMap(expected, loader, schema); - - compareHyperVectors(expectedSuperVectors, actualSuperVectors); - for (QueryResultBatch result : results) { - result.release(); - } - for (QueryResultBatch result : expected) { - result.release(); - } + testBuilder() + .ordered() + .highPerformanceComparison() + .sqlQuery(query) + .optionSettingQueriesForTestQuery("alter system set `store.parquet.use_new_reader` = false") + .sqlBaselineQuery(query) + .optionSettingQueriesForBaseline("alter system set `store.parquet.use_new_reader` = true") + .build().run(); } @Ignore @@ -394,312 +334,10 @@ public class TestParquetWriter extends BaseTestQuery { String validateQuery = String.format("SELECT %s FROM " + outputFile, validationSelection); test(create); - RecordBatchLoader loader = new RecordBatchLoader(getAllocator()); - BatchSchema schema = null; - - List<QueryResultBatch> expected = testSqlWithResults(query); - List<Map> expectedRecords = new ArrayList<>(); - addToMaterializedResults(expectedRecords, expected, loader, schema); - - List<QueryResultBatch> results = testSqlWithResults(validateQuery); - List<Map> actualRecords = new ArrayList<>(); - addToMaterializedResults(actualRecords, results, loader, schema); - - compareResults(expectedRecords, actualRecords); - for (QueryResultBatch result : results) { - result.release(); - } - for (QueryResultBatch result : expected) { - result.release(); - } - } - - public void compareHyperVectors(Map<String, HyperVectorValueIterator> expectedRecords, - Map<String, HyperVectorValueIterator> actualRecords) throws Exception { - for (String s : expectedRecords.keySet()) { - assertEquals(expectedRecords.get(s).getTotalRecords(), actualRecords.get(s).getTotalRecords()); - HyperVectorValueIterator expectedValues = expectedRecords.get(s); - HyperVectorValueIterator actualValues = actualRecords.get(s); - int i = 0; - while (expectedValues.hasNext()) { - compareValues(expectedValues.next(), actualValues.next(), i, s); - i++; - } - } - for (HyperVectorValueIterator hvi : expectedRecords.values()) { - for (ValueVector vv : hvi.hyperVector.getValueVectors()) { - vv.clear(); - } - } - for (HyperVectorValueIterator hvi : actualRecords.values()) { - for (ValueVector vv : hvi.hyperVector.getValueVectors()) { - vv.clear(); - } - } - } - - public void compareMergedVectors(Map<String, List> expectedRecords, Map<String, List> actualRecords) throws Exception { - for (String s : expectedRecords.keySet()) { - assertEquals(expectedRecords.get(s).size(), actualRecords.get(s).size()); - List expectedValues = expectedRecords.get(s); - List actualValues = actualRecords.get(s); - for (int i = 0; i < expectedValues.size(); i++) { - compareValues(expectedValues.get(i), actualValues.get(i), i, s); - } - } - } - - public Map<String, HyperVectorValueIterator> addToHyperVectorMap(List<QueryResultBatch> records, RecordBatchLoader loader, - BatchSchema schema) throws SchemaChangeException, UnsupportedEncodingException { - // TODO - this does not handle schema changes - Map<String, HyperVectorValueIterator> combinedVectors = new HashMap(); - - long totalRecords = 0; - QueryResultBatch batch; - int size = records.size(); - for (int i = 0; i < size; i++) { - batch = records.get(i); - loader = new RecordBatchLoader(getAllocator()); - loader.load(batch.getHeader().getDef(), batch.getData()); - logger.debug("reading batch with " + loader.getRecordCount() + " rows, total read so far " + totalRecords); - totalRecords += loader.getRecordCount(); - for (VectorWrapper w : loader) { - String field = w.getField().toExpr(); - if ( ! combinedVectors.containsKey(field)) { - MaterializedField mf = w.getField(); - ValueVector[] vvList = (ValueVector[]) Array.newInstance(mf.getValueClass(), 1); - vvList[0] = w.getValueVector(); - combinedVectors.put(mf.getPath().toExpr(), new HyperVectorValueIterator(mf, new HyperVectorWrapper(mf, - vvList))); - } else { - combinedVectors.get(field).hyperVector.addVector(w.getValueVector()); - } - - } - } - for (HyperVectorValueIterator hvi : combinedVectors.values()) { - hvi.determineTotalSize(); - } - return combinedVectors; - } - - public Map<String, List> addToCombinedVectorResults(List<QueryResultBatch> records, RecordBatchLoader loader, - BatchSchema schema) throws SchemaChangeException, UnsupportedEncodingException { - // TODO - this does not handle schema changes - Map<String, List> combinedVectors = new HashMap(); - - long totalRecords = 0; - QueryResultBatch batch; - int size = records.size(); - for (int i = 0; i < size; i++) { - batch = records.get(0); - loader.load(batch.getHeader().getDef(), batch.getData()); - if (schema == null) { - schema = loader.getSchema(); - for (MaterializedField mf : schema) { - combinedVectors.put(mf.getPath().toExpr(), new ArrayList()); - } - } - logger.debug("reading batch with " + loader.getRecordCount() + " rows, total read so far " + totalRecords); - totalRecords += loader.getRecordCount(); - for (VectorWrapper w : loader) { - String field = w.getField().toExpr(); - for (int j = 0; j < loader.getRecordCount(); j++) { - if (totalRecords - loader.getRecordCount() + j > 5000000) { - continue; - } - Object obj = w.getValueVector().getAccessor().getObject(j); - if (obj != null) { - if (obj instanceof Text) { - obj = obj.toString(); - if (obj.equals("")) { - System.out.println(w.getField()); - } - } - else if (obj instanceof byte[]) { - obj = new String((byte[]) obj, "UTF-8"); - } - } - combinedVectors.get(field).add(obj); - } - } - records.remove(0); - batch.release(); - loader.clear(); - } - return combinedVectors; - } - - public static class HyperVectorValueIterator implements Iterator<Object>{ - private MaterializedField mf; - HyperVectorWrapper hyperVector; - private int indexInVectorList; - private int indexInCurrentVector; - private ValueVector currVec; - private long totalValues; - private long totalValuesRead; - // limit how many values will be read out of this iterator - private long recordLimit; - - public HyperVectorValueIterator(MaterializedField mf, HyperVectorWrapper hyperVector) { - this.mf = mf; - this.hyperVector = hyperVector; - this.totalValues = 0; - this.indexInCurrentVector = 0; - this.indexInVectorList = 0; - this.recordLimit = -1; - } - - public void setRecordLimit(long limit) { - this.recordLimit = limit; - } - - public long getTotalRecords() { - if (recordLimit > 0) { - return recordLimit; - } else { - return totalValues; - } - } - - public void determineTotalSize() { - for (ValueVector vv : hyperVector.getValueVectors()) { - this.totalValues += vv.getAccessor().getValueCount(); - } - } - - @Override - public boolean hasNext() { - if (totalValuesRead == recordLimit) { - return false; - } - if (indexInVectorList < hyperVector.getValueVectors().length) { - return true; - } else if ( indexInCurrentVector < currVec.getAccessor().getValueCount()) { - return true; - } - return false; - } - - @Override - public Object next() { - if (currVec == null || indexInCurrentVector == currVec.getValueCapacity()) { - currVec = hyperVector.getValueVectors()[indexInVectorList]; - indexInVectorList++; - indexInCurrentVector = 0; - } - Object obj = currVec.getAccessor().getObject(indexInCurrentVector); - indexInCurrentVector++; - totalValuesRead++; - return obj; - } - - @Override - public void remove() { - throw new UnsupportedOperationException(); - } - } - - public void addToMaterializedResults(List<Map> materializedRecords, List<QueryResultBatch> records, RecordBatchLoader loader, - BatchSchema schema) throws SchemaChangeException, UnsupportedEncodingException { - long totalRecords = 0; - QueryResultBatch batch; - int size = records.size(); - for (int i = 0; i < size; i++) { - batch = records.get(0); - loader.load(batch.getHeader().getDef(), batch.getData()); - if (schema == null) { - schema = loader.getSchema(); - } - logger.debug("reading batch with " + loader.getRecordCount() + " rows, total read so far " + totalRecords); - totalRecords += loader.getRecordCount(); - for (int j = 0; j < loader.getRecordCount(); j++) { - HashMap<String, Object> record = new HashMap<>(); - for (VectorWrapper w : loader) { - Object obj = w.getValueVector().getAccessor().getObject(j); - if (obj != null) { - if (obj instanceof Text) { - obj = obj.toString(); - if (obj.equals("")) { - System.out.println(w.getField()); - } - } - else if (obj instanceof byte[]) { - obj = new String((byte[]) obj, "UTF-8"); - } - record.put(w.getField().toExpr(), obj); - } - record.put(w.getField().toExpr(), obj); - } - materializedRecords.add(record); - } - records.remove(0); - batch.release(); - loader.clear(); - } - } - - public void compareValues(Object expected, Object actual, int counter, String column) throws Exception { - - if (expected == null) { - if (actual == null) { - if (VERBOSE_DEBUG) { - logger.debug("(1) at position " + counter + " column '" + column + "' matched value: " + expected ); - } - return; - } else { - throw new Exception("at position " + counter + " column '" + column + "' mismatched values, expected: " + expected + " but received " + actual); - } - } - if (actual == null) { - throw new Exception("unexpected null at position " + counter + " column '" + column + "' should have been: " + expected); - } - if (actual instanceof byte[]) { - if ( ! Arrays.equals((byte[]) expected, (byte[]) actual)) { - throw new Exception("at position " + counter + " column '" + column + "' mismatched values, expected: " - + new String((byte[])expected, "UTF-8") + " but received " + new String((byte[])actual, "UTF-8")); - } else { - if (VERBOSE_DEBUG) { - logger.debug("at position " + counter + " column '" + column + "' matched value " + new String((byte[])expected, "UTF-8")); - } - return; - } - } - if (!expected.equals(actual)) { - throw new Exception("at position " + counter + " column '" + column + "' mismatched values, expected: " + expected + " but received " + actual); - } else { - if (VERBOSE_DEBUG) { - logger.debug("at position " + counter + " column '" + column + "' matched value: " + expected ); - } - } - } - - public void compareResults(List<Map> expectedRecords, List<Map> actualRecords) throws Exception { - Assert.assertEquals("Different number of records returned", expectedRecords.size(), actualRecords.size()); - - StringBuilder missing = new StringBuilder(); - int i = 0; - int counter = 0; - int missmatch; - for (Map<String, Object> record : expectedRecords) { - missmatch = 0; - for (String column : record.keySet()) { - compareValues(record.get(column), actualRecords.get(i).get(column), counter, column ); - } - if ( !actualRecords.get(i).equals(record)) { - System.out.println("mismatch at position " + counter ); - missing.append(missmatch); - missing.append(","); - } - - counter++; - if (counter % 100000 == 0 ) { - System.out.println("checked so far:" + counter); - } - i++; - } - logger.debug(missing.toString()); - System.out.println(missing); + testBuilder() + .unOrdered() + .sqlQuery(query) + .sqlBaselineQuery(validateQuery); } } diff --git a/exec/java-exec/src/test/resources/sort/testSelectWithLimitOffset.tsv b/exec/java-exec/src/test/resources/sort/testSelectWithLimitOffset.tsv new file mode 100644 index 000000000..eb490fe1b --- /dev/null +++ b/exec/java-exec/src/test/resources/sort/testSelectWithLimitOffset.tsv @@ -0,0 +1,5 @@ +12 Jewel Creek +13 Peggy Medina +14 Bryan Rutledge +15 Walter Cavestany +16 Peggy Planck
\ No newline at end of file diff --git a/exec/java-exec/src/test/resources/store/json/json_simple_with_null.json b/exec/java-exec/src/test/resources/store/json/json_simple_with_null.json new file mode 100644 index 000000000..521727c02 --- /dev/null +++ b/exec/java-exec/src/test/resources/store/json/json_simple_with_null.json @@ -0,0 +1,16 @@ +{ + "a" : 5, + "b" : 10 +} +{ + "a" : 7, + "b" : null +} +{ + "a" : null, + "b" : null +} +{ + "a" : 9, + "b" : 11 +} diff --git a/exec/java-exec/src/test/resources/testframework/decimal_test.json b/exec/java-exec/src/test/resources/testframework/decimal_test.json new file mode 100644 index 000000000..36996f3c0 --- /dev/null +++ b/exec/java-exec/src/test/resources/testframework/decimal_test.json @@ -0,0 +1,3 @@ +{ +"dec_col" : "3.7" +}
\ No newline at end of file diff --git a/exec/java-exec/src/test/resources/testframework/decimal_test.tsv b/exec/java-exec/src/test/resources/testframework/decimal_test.tsv new file mode 100644 index 000000000..548d71365 --- /dev/null +++ b/exec/java-exec/src/test/resources/testframework/decimal_test.tsv @@ -0,0 +1 @@ +3.7
\ No newline at end of file diff --git a/exec/java-exec/src/test/resources/testframework/schema_change_int_to_string_non-matching.json b/exec/java-exec/src/test/resources/testframework/schema_change_int_to_string_non-matching.json new file mode 100644 index 000000000..3de294c3a --- /dev/null +++ b/exec/java-exec/src/test/resources/testframework/schema_change_int_to_string_non-matching.json @@ -0,0 +1,30 @@ +{ + "field_1": [1] +} +{ + "field_1": [5,2,3,4,1,2], + "field_2": 2, + "field_3": { + "inner_1" : 2 + }, + "field_4" : { + "inner_1" : [1,2,3,2,3,4,2,3], + "inner_2" : 3, + "inner_3" : { "inner_object_field_1" : 2} + }, + "field_5" : [ { "inner_list" : [1, null, 6] }, { "inner_list":[3,8]}, { "inner_list":[12, null, 4, "null", 5]} ] +} +{ + "field_1": [5], + "field_2": "A wild string appears!", + "field_3": { + "inner_1" : 5, + "inner_2" : 3, + "inner_3" : [ { "inner_object_field_1" : null}, {"inner_object_field_1" : 10} ] + }, + "field_4" : { + "inner_1" : [4,5,6], + "inner_2" : 3 + }, + "field_5" : [ { "inner_list" : [5, null, 6.0, "1234"] }, { "inner_list":[7,8.0, "12341324"], "inner_list_2" : [1,2,2323.443e10, "hello there"]}, { "inner_list":[3,4,5], "inner_list_2" : [10, 11, 12]} ] +}
\ No newline at end of file diff --git a/exec/java-exec/src/test/resources/testframework/small_test_data.json b/exec/java-exec/src/test/resources/testframework/small_test_data.json new file mode 100644 index 000000000..7f08d256b --- /dev/null +++ b/exec/java-exec/src/test/resources/testframework/small_test_data.json @@ -0,0 +1,5 @@ +{"employee_id" : 12, "first_name" : "Jewel", "last_name" :"Creek" } +{"employee_id" : 13, "first_name" : "Peggy" , "last_name" :"Medina" } +{"employee_id" : 14, "first_name" : "Bryan" , "last_name" :"Rutledge" } +{"employee_id" : 15, "first_name" : "Walter" , "last_name" :"Cavestany" } +{"employee_id" : 16, "first_name" : "Peggy" , "last_name" :"Planck" } diff --git a/exec/java-exec/src/test/resources/testframework/small_test_data.tsv b/exec/java-exec/src/test/resources/testframework/small_test_data.tsv new file mode 100644 index 000000000..1e0b4de33 --- /dev/null +++ b/exec/java-exec/src/test/resources/testframework/small_test_data.tsv @@ -0,0 +1,5 @@ +12 Jewel Creek +13 Peggy Medina +14 Bryan Rutledge +15 Walter Cavestany +16 Peggy Planck diff --git a/exec/java-exec/src/test/resources/testframework/small_test_data_extra.tsv b/exec/java-exec/src/test/resources/testframework/small_test_data_extra.tsv new file mode 100644 index 000000000..40e0f26bc --- /dev/null +++ b/exec/java-exec/src/test/resources/testframework/small_test_data_extra.tsv @@ -0,0 +1,7 @@ +12 Jewel Creek +13 Peggy Medina +14 Bryan Rutledge +15 Walter Cavestany +16 Peggy Planck +15 asdf asdklj +15 qwerty werjhtdl diff --git a/exec/java-exec/src/test/resources/testframework/small_test_data_extra_col.tsv b/exec/java-exec/src/test/resources/testframework/small_test_data_extra_col.tsv new file mode 100644 index 000000000..8465a14c6 --- /dev/null +++ b/exec/java-exec/src/test/resources/testframework/small_test_data_extra_col.tsv @@ -0,0 +1,5 @@ +12 Jewel Creek 123 fake st. +13 Peggy Medina 34 Electric Ave. +14 Bryan Rutledge 6 Sesame St. +15 Walter Cavestany 8 Wanye Ct., Gotham +16 Peggy Planck 5 Ran out of creative street names Blvd. diff --git a/exec/java-exec/src/test/resources/testframework/small_test_data_reordered.tsv b/exec/java-exec/src/test/resources/testframework/small_test_data_reordered.tsv new file mode 100644 index 000000000..b06bbddb6 --- /dev/null +++ b/exec/java-exec/src/test/resources/testframework/small_test_data_reordered.tsv @@ -0,0 +1,5 @@ +16 Peggy Planck +13 Peggy Medina +14 Bryan Rutledge +12 Jewel Creek +15 Walter Cavestany diff --git a/exec/java-exec/src/test/resources/testframework/small_test_data_reordered_non_match_extra.tsv b/exec/java-exec/src/test/resources/testframework/small_test_data_reordered_non_match_extra.tsv new file mode 100644 index 000000000..fe4a52408 --- /dev/null +++ b/exec/java-exec/src/test/resources/testframework/small_test_data_reordered_non_match_extra.tsv @@ -0,0 +1,7 @@ +16 Peggy Planck +13 Peggy Medina +14 Bryan Rutledge +12 Jewel Creek +15 Walter Cavestany +15 asdf asdklj +15 qwerty werjhtdl diff --git a/exec/java-exec/src/test/resources/testframework/small_test_data_reordered_non_match_fewer.tsv b/exec/java-exec/src/test/resources/testframework/small_test_data_reordered_non_match_fewer.tsv new file mode 100644 index 000000000..d7868b0fe --- /dev/null +++ b/exec/java-exec/src/test/resources/testframework/small_test_data_reordered_non_match_fewer.tsv @@ -0,0 +1,3 @@ +16 Peggy Planck +13 Peggy Medina +15 Walter Cavestany @@ -173,6 +173,7 @@ <exclude>**/*.sql</exclude> <exclude>**/git.properties</exclude> <exclude>**/*.csv</exclude> + <exclude>**/*.tsv</exclude> <exclude>**/*.txt</exclude> <exclude>**/drill-*.conf</exclude> <exclude>**/.buildpath</exclude> |