From f2fd822afd8acc5aaa99dd37fb2f798bbec54a00 Mon Sep 17 00:00:00 2001 From: ozinoviev Date: Tue, 28 Aug 2018 20:29:55 +0300 Subject: DRILL-6717: lower and upper functions not works with national characters closes #1450 --- .../exec/expr/fn/impl/StringFunctionHelpers.java | 20 ++++++------- .../drill/exec/expr/fn/impl/StringFunctions.java | 35 +++++++++++----------- .../exec/expr/fn/impl/TestStringFunctions.java | 2 -- 3 files changed, 27 insertions(+), 30 deletions(-) (limited to 'exec') diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctionHelpers.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctionHelpers.java index 677446adc..cd1818068 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctionHelpers.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctionHelpers.java @@ -148,24 +148,22 @@ public class StringFunctionHelpers { * Capitalizes first letter in each word. * Any symbol except digits and letters is considered as word delimiter. * - * @param start start position in input buffer - * @param end end position in input buffer - * @param inBuf buffer with input characters - * @param outBuf buffer with output characters + * @param source input characters */ - public static void initCap(int start, int end, DrillBuf inBuf, DrillBuf outBuf) { + public static String initCap(String source) { boolean capitalizeNext = true; - int out = 0; - for (int id = start; id < end; id++, out++) { - int currentByte = inBuf.getByte(id); - if (Character.isLetterOrDigit(currentByte)) { - currentByte = capitalizeNext ? Character.toUpperCase(currentByte) : Character.toLowerCase(currentByte); + StringBuilder str = new StringBuilder(source); + for (int i = 0; i < str.length(); i++) { + char c = str.charAt(i); + if (Character.isLetterOrDigit(c)) { + str.setCharAt(i, capitalizeNext ? Character.toUpperCase(c) : Character.toLowerCase(c)); capitalizeNext = false; } else { capitalizeNext = true; } - outBuf.setByte(out, currentByte); } + + return str.toString(); } /** diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctions.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctions.java index a77faee14..6353e55f5 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctions.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctions.java @@ -491,14 +491,13 @@ public class StringFunctions{ @Override public void eval() { - out.buffer = buffer = buffer.reallocIfNeeded(input.end- input.start); - out.start = 0; - out.end = input.end - input.start; + String str = org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(input.start, input.end, input.buffer); + byte[] result = str.toLowerCase().getBytes(com.google.common.base.Charsets.UTF_8); - for (int id = input.start; id < input.end; id++) { - byte currentByte = input.buffer.getByte(id); - out.buffer.setByte(id - input.start, Character.toLowerCase(currentByte)); - } + out.buffer = buffer = buffer.reallocIfNeeded(result.length); + out.start = 0; + out.end = result.length; + out.buffer.setBytes(0, result); } } @@ -522,14 +521,13 @@ public class StringFunctions{ @Override public void eval() { - out.buffer = buffer = buffer.reallocIfNeeded(input.end- input.start); - out.start = 0; - out.end = input.end - input.start; + String str = org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(input.start, input.end, input.buffer); + byte[] result = str.toUpperCase().getBytes(com.google.common.base.Charsets.UTF_8); - for (int id = input.start; id < input.end; id++) { - byte currentByte = input.buffer.getByte(id); - out.buffer.setByte(id - input.start, Character.toUpperCase(currentByte)); - } + out.buffer = buffer = buffer.reallocIfNeeded(result.length); + out.start = 0; + out.end = result.length; + out.buffer.setBytes(0, result); } } @@ -785,10 +783,13 @@ public class StringFunctions{ @Override public void eval() { - out.buffer = buffer = buffer.reallocIfNeeded(input.end - input.start); + String source = org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(input.start, input.end, input.buffer); + String result = org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.initCap(source); + byte[] bytes = result.getBytes(com.google.common.base.Charsets.UTF_8); + out.buffer = buffer = buffer.reallocIfNeeded(bytes.length); out.start = 0; - out.end = input.end - input.start; - org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.initCap(input.start, input.end, input.buffer, out.buffer); + out.end = bytes.length; + out.buffer.setBytes(0, bytes); } } diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/expr/fn/impl/TestStringFunctions.java b/exec/java-exec/src/test/java/org/apache/drill/exec/expr/fn/impl/TestStringFunctions.java index 8158c3950..0f79daacc 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/expr/fn/impl/TestStringFunctions.java +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/expr/fn/impl/TestStringFunctions.java @@ -23,7 +23,6 @@ import org.apache.drill.categories.UnlikelyTest; import org.apache.drill.test.BaseTestQuery; import org.apache.drill.categories.SqlFunctionTest; import org.apache.drill.exec.util.Text; -import org.junit.Ignore; import org.junit.Test; import org.apache.drill.shaded.guava.com.google.common.collect.ImmutableList; @@ -1494,7 +1493,6 @@ public class TestStringFunctions extends BaseTestQuery { .run(); } - @Ignore("DRILL-5477") @Test public void testMultiByteEncoding() throws Exception { testBuilder() -- cgit v1.2.3