aboutsummaryrefslogtreecommitdiff
path: root/test/sun/nio
diff options
context:
space:
mode:
authorsherman <none@none>2011-05-02 11:42:52 -0700
committersherman <none@none>2011-05-02 11:42:52 -0700
commit7b333f5281700ff39fffb248f5b61ea3116e7f38 (patch)
tree8de22bcf3c59ba9bc850fca40d01cc0a30495027 /test/sun/nio
parente344ad61532ac700a5db6ab9c1ca832891c28cf9 (diff)
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
Summary: implement sun.nio.cs.ArrayEn/Decoer in utf8 Reviewed-by: alanb
Diffstat (limited to 'test/sun/nio')
-rw-r--r--test/sun/nio/cs/StrCodingBenchmarkUTF8.java92
-rw-r--r--test/sun/nio/cs/TestStringCoding.java4
-rw-r--r--test/sun/nio/cs/TestStringCodingUTF8.java166
-rw-r--r--test/sun/nio/cs/TestUTF8.java53
4 files changed, 310 insertions, 5 deletions
diff --git a/test/sun/nio/cs/StrCodingBenchmarkUTF8.java b/test/sun/nio/cs/StrCodingBenchmarkUTF8.java
new file mode 100644
index 000000000..eecc6ec2d
--- /dev/null
+++ b/test/sun/nio/cs/StrCodingBenchmarkUTF8.java
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+import java.util.*;
+import java.nio.*;
+import java.nio.charset.*;
+
+public class StrCodingBenchmarkUTF8 {
+
+ public static void main(String[] args) throws Throwable {
+
+ final int itrs = Integer.getInteger("iterations", 100000);
+ final int size = 2048;
+ final int subsize = Integer.getInteger("subsize", 128);
+ final Random rnd = new Random();
+ final int maxchar = 0x7f;
+
+ Charset charset = Charset.forName("UTF-8");
+ final String csn = charset.name();
+ final Charset cs = charset;
+
+ int[] starts = new int[] { 0, 0x80, 0x800, 0x10000};
+ for (int nb = 1; nb <= 4; nb++) {
+
+ final CharsetEncoder enc = cs.newEncoder();
+
+ char[] cc = new char[size];
+ int i = 0;
+ while (i < size - 3) {
+ i += Character.toChars(starts[nb - 1] + rnd.nextInt(maxchar), cc, i);
+ }
+
+ final String string = new String(cc);
+ final byte[] bytes = string.getBytes(cs);
+
+ System.out.printf("%n--------%s[nb=%d]---------%n", csn, nb);
+ int sz = 12;
+ while (sz < size) {
+ System.out.printf(" [len=%d]%n", sz);
+ final byte[] bs = Arrays.copyOf(bytes, sz);
+ final String str = new String(bs, csn);
+ StrCodingBenchmark.Job[] jobs = {
+ new StrCodingBenchmark.Job("String decode: csn") {
+ public void work() throws Throwable {
+ for (int i = 0; i < itrs; i++)
+ new String(bs, csn);
+ }},
+
+ new StrCodingBenchmark.Job("String decode: cs") {
+ public void work() throws Throwable {
+ for (int i = 0; i < itrs; i++)
+ new String(bs, cs);
+ }},
+
+ new StrCodingBenchmark.Job("String encode: csn") {
+ public void work() throws Throwable {
+ for (int i = 0; i < itrs; i++)
+ str.getBytes(csn);
+ }},
+
+ new StrCodingBenchmark.Job("String encode: cs") {
+ public void work() throws Throwable {
+ for (int i = 0; i < itrs; i++)
+ str.getBytes(cs);
+ }},
+ };
+ StrCodingBenchmark.time(StrCodingBenchmark.filter(null, jobs));
+ sz <<= 1;
+ }
+ }
+ }
+}
diff --git a/test/sun/nio/cs/TestStringCoding.java b/test/sun/nio/cs/TestStringCoding.java
index 4ab707310..c4837e956 100644
--- a/test/sun/nio/cs/TestStringCoding.java
+++ b/test/sun/nio/cs/TestStringCoding.java
@@ -24,7 +24,7 @@
*/
/* @test
- @bug 6636323 6636319
+ @bug 6636323 6636319 7040220
@summary Test if StringCoding and NIO result have the same de/encoding result
* @run main/othervm/timeout=2000 TestStringCoding
*/
@@ -111,6 +111,8 @@ public class TestStringCoding {
//encode unmappable surrogates
if (enc instanceof sun.nio.cs.ArrayEncoder &&
cs.contains(Charset.forName("ASCII"))) {
+ if (cs.name().equals("UTF-8")) // utf8 handles surrogates
+ return;
enc.replaceWith(new byte[] { (byte)'A'});
sun.nio.cs.ArrayEncoder cae = (sun.nio.cs.ArrayEncoder)enc;
diff --git a/test/sun/nio/cs/TestStringCodingUTF8.java b/test/sun/nio/cs/TestStringCodingUTF8.java
new file mode 100644
index 000000000..fdc204849
--- /dev/null
+++ b/test/sun/nio/cs/TestStringCodingUTF8.java
@@ -0,0 +1,166 @@
+/*
+ * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/* @test
+ @bug 7040220
+ @summary Test if StringCoding and NIO result have the same de/encoding result for UTF-8
+ * @run main/othervm/timeout=2000 TestStringCodingUTF8
+ */
+
+import java.util.*;
+import java.nio.*;
+import java.nio.charset.*;
+
+public class TestStringCodingUTF8 {
+ public static void main(String[] args) throws Throwable {
+ test();
+ // security manager on
+ System.setSecurityManager(new PermissiveSecurityManger());
+ test();
+ }
+
+ static void test() throws Throwable {
+ Charset cs = Charset.forName("UTF-8");
+ char[] bmp = new char[0x10000];
+ for (int i = 0; i < 0x10000; i++) {
+ bmp[i] = (char)i;
+ }
+ test(cs, bmp, 0, bmp.length);
+
+ ArrayList<Integer> list = new ArrayList<>(0x20000);
+ for (int i = 0; i < 0x20000; i++) {
+ list.add(i, i);
+ }
+ Collections.shuffle(list);
+ int j = 0;
+ char[] bmpsupp = new char[0x30000];
+ for (int i = 0; i < 0x20000; i++) {
+ j += Character.toChars(list.get(i), bmpsupp, j);
+ }
+ assert (j == bmpsupp.length);
+ test(cs, bmpsupp, 0, bmpsupp.length);
+
+ // randomed "off" and "len" on shuffled data
+ Random rnd = new Random();
+ int maxlen = 1000;
+ int itr = 5000;
+ for (int i = 0; i < itr; i++) {
+ int off = rnd.nextInt(bmpsupp.length - maxlen);
+ int len = rnd.nextInt(maxlen);
+ test(cs, bmpsupp, off, len);
+ }
+
+ // random length of bytes, test the edge corner case
+ for (int i = 0; i < itr; i++) {
+ byte[] ba = new byte[rnd.nextInt(maxlen)];
+ rnd.nextBytes(ba);
+ //new String(csn);
+ if (!new String(ba, cs.name()).equals(
+ new String(decode(cs, ba, 0, ba.length))))
+ throw new RuntimeException("new String(csn) failed");
+ //new String(cs);
+ if (!new String(ba, cs).equals(
+ new String(decode(cs, ba, 0, ba.length))))
+ throw new RuntimeException("new String(cs) failed");
+ }
+ System.out.println("done!");
+ }
+
+ static void test(Charset cs, char[] ca, int off, int len) throws Throwable {
+ String str = new String(ca, off, len);
+ byte[] ba = encode(cs, ca, off, len);
+
+ //getBytes(csn);
+ byte[] baStr = str.getBytes(cs.name());
+ if (!Arrays.equals(ba, baStr))
+ throw new RuntimeException("getBytes(csn) failed");
+
+ //getBytes(cs);
+ baStr = str.getBytes(cs);
+ if (!Arrays.equals(ba, baStr))
+ throw new RuntimeException("getBytes(cs) failed");
+
+ //new String(csn);
+ if (!new String(ba, cs.name()).equals(new String(decode(cs, ba, 0, ba.length))))
+ throw new RuntimeException("new String(csn) failed");
+
+ //new String(cs);
+ if (!new String(ba, cs).equals(new String(decode(cs, ba, 0, ba.length))))
+ throw new RuntimeException("new String(cs) failed");
+ }
+
+ // copy/paste of the StringCoding.decode()
+ static char[] decode(Charset cs, byte[] ba, int off, int len) {
+ CharsetDecoder cd = cs.newDecoder();
+ int en = (int)(len * cd.maxCharsPerByte());
+ char[] ca = new char[en];
+ if (len == 0)
+ return ca;
+ cd.onMalformedInput(CodingErrorAction.REPLACE)
+ .onUnmappableCharacter(CodingErrorAction.REPLACE)
+ .reset();
+
+ ByteBuffer bb = ByteBuffer.wrap(ba, off, len);
+ CharBuffer cb = CharBuffer.wrap(ca);
+ try {
+ CoderResult cr = cd.decode(bb, cb, true);
+ if (!cr.isUnderflow())
+ cr.throwException();
+ cr = cd.flush(cb);
+ if (!cr.isUnderflow())
+ cr.throwException();
+ } catch (CharacterCodingException x) {
+ throw new Error(x);
+ }
+ return Arrays.copyOf(ca, cb.position());
+ }
+
+ // copy/paste of the StringCoding.encode()
+ static byte[] encode(Charset cs, char[] ca, int off, int len) {
+ CharsetEncoder ce = cs.newEncoder();
+ int en = (int)(len * ce.maxBytesPerChar());
+ byte[] ba = new byte[en];
+ if (len == 0)
+ return ba;
+ ce.onMalformedInput(CodingErrorAction.REPLACE)
+ .onUnmappableCharacter(CodingErrorAction.REPLACE)
+ .reset();
+ ByteBuffer bb = ByteBuffer.wrap(ba);
+ CharBuffer cb = CharBuffer.wrap(ca, off, len);
+ try {
+ CoderResult cr = ce.encode(cb, bb, true);
+ if (!cr.isUnderflow())
+ cr.throwException();
+ cr = ce.flush(bb);
+ if (!cr.isUnderflow())
+ cr.throwException();
+ } catch (CharacterCodingException x) {
+ throw new Error(x);
+ }
+ return Arrays.copyOf(ba, bb.position());
+ }
+
+ static class PermissiveSecurityManger extends SecurityManager {
+ @Override public void checkPermission(java.security.Permission p) {}
+ }
+}
diff --git a/test/sun/nio/cs/TestUTF8.java b/test/sun/nio/cs/TestUTF8.java
index 34a999fb2..f339eae04 100644
--- a/test/sun/nio/cs/TestUTF8.java
+++ b/test/sun/nio/cs/TestUTF8.java
@@ -23,7 +23,7 @@
/*
* @test
- * @bug 4486841
+ * @bug 4486841 7040220
* @summary Test UTF-8 charset
*/
@@ -70,6 +70,32 @@ public class TestUTF8 {
return dec.decode(bbf, cbf, true);
}
+ // copy/paste of the StringCoding.decode()
+ static char[] decode(Charset cs, byte[] ba, int off, int len) {
+ CharsetDecoder cd = cs.newDecoder();
+ int en = (int)(len * cd.maxCharsPerByte());
+ char[] ca = new char[en];
+ if (len == 0)
+ return ca;
+ cd.onMalformedInput(CodingErrorAction.REPLACE)
+ .onUnmappableCharacter(CodingErrorAction.REPLACE)
+ .reset();
+
+ ByteBuffer bb = ByteBuffer.wrap(ba, off, len);
+ CharBuffer cb = CharBuffer.wrap(ca);
+ try {
+ CoderResult cr = cd.decode(bb, cb, true);
+ if (!cr.isUnderflow())
+ cr.throwException();
+ cr = cd.flush(cb);
+ if (!cr.isUnderflow())
+ cr.throwException();
+ } catch (CharacterCodingException x) {
+ throw new Error(x);
+ }
+ return Arrays.copyOf(ca, cb.position());
+ }
+
static byte[] encode(char[] cc, String csn, boolean testDirect)
throws Exception {
ByteBuffer bbf;
@@ -142,7 +168,14 @@ public class TestUTF8 {
bb = encode(cc, csn, true);
ccO = decode(bb, csn, true);
if (!Arrays.equals(cc, ccO)) {
- System.out.printf(" (direct) failed");
+ System.out.print(" (direct) failed");
+ }
+ // String.getBytes()/toCharArray() goes to ArrayDe/Encoder path
+ if (!Arrays.equals(bb, new String(cc).getBytes(csn))) {
+ System.out.printf(" String.getBytes() failed");
+ }
+ if (!Arrays.equals(cc, new String(bb, csn).toCharArray())) {
+ System.out.printf(" String.toCharArray() failed");
}
System.out.println();
}
@@ -168,6 +201,12 @@ public class TestUTF8 {
if (!Arrays.equals(cc, ccO)) {
System.out.printf(" decoding(direct) failed%n");
}
+ // new String(bb, csn).getBytes(csn) will not return
+ // the 6 bytes surrogates as in bb, so only test
+ // toCharArray() here.
+ if (!Arrays.equals(cc, new String(bb, csn).toCharArray())) {
+ System.out.printf(" String.toCharArray() failed");
+ }
}
static void compare(String csn1, String csn2) throws Exception {
@@ -274,6 +313,7 @@ public class TestUTF8 {
static void checkMalformed(String csn) throws Exception {
boolean failed = false;
System.out.printf(" Check malformed <%s>...%n", csn);
+ Charset cs = Charset.forName(csn);
for (boolean direct: new boolean[] {false, true}) {
for (byte[] bins : malformed) {
int mlen = bins[0];
@@ -285,10 +325,15 @@ public class TestUTF8 {
ashex += Integer.toBinaryString((int)bin[i] & 0xff);
}
if (!cr.isMalformed()) {
- System.out.printf(" FAIL(direct=%b): [%s] not malformed.\n", direct, ashex);
+ System.out.printf(" FAIL(direct=%b): [%s] not malformed.%n", direct, ashex);
failed = true;
} else if (cr.length() != mlen) {
- System.out.printf(" FAIL(direct=%b): [%s] malformed[len=%d].\n", direct, ashex, cr.length());
+ System.out.printf(" FAIL(direct=%b): [%s] malformed[len=%d].%n", direct, ashex, cr.length());
+ failed = true;
+ }
+ if (!Arrays.equals(decode(cs, bin, 0, bin.length),
+ new String(bin, csn).toCharArray())) {
+ System.out.printf(" FAIL(new String(bb, %s)) failed%n", csn);
failed = true;
}
}