diff options
author | sherman <none@none> | 2011-05-02 11:42:52 -0700 |
---|---|---|
committer | sherman <none@none> | 2011-05-02 11:42:52 -0700 |
commit | 7b333f5281700ff39fffb248f5b61ea3116e7f38 (patch) | |
tree | 8de22bcf3c59ba9bc850fca40d01cc0a30495027 /test/sun/nio | |
parent | e344ad61532ac700a5db6ab9c1ca832891c28cf9 (diff) |
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
Summary: implement sun.nio.cs.ArrayEn/Decoer in utf8
Reviewed-by: alanb
Diffstat (limited to 'test/sun/nio')
-rw-r--r-- | test/sun/nio/cs/StrCodingBenchmarkUTF8.java | 92 | ||||
-rw-r--r-- | test/sun/nio/cs/TestStringCoding.java | 4 | ||||
-rw-r--r-- | test/sun/nio/cs/TestStringCodingUTF8.java | 166 | ||||
-rw-r--r-- | test/sun/nio/cs/TestUTF8.java | 53 |
4 files changed, 310 insertions, 5 deletions
diff --git a/test/sun/nio/cs/StrCodingBenchmarkUTF8.java b/test/sun/nio/cs/StrCodingBenchmarkUTF8.java new file mode 100644 index 000000000..eecc6ec2d --- /dev/null +++ b/test/sun/nio/cs/StrCodingBenchmarkUTF8.java @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +import java.util.*; +import java.nio.*; +import java.nio.charset.*; + +public class StrCodingBenchmarkUTF8 { + + public static void main(String[] args) throws Throwable { + + final int itrs = Integer.getInteger("iterations", 100000); + final int size = 2048; + final int subsize = Integer.getInteger("subsize", 128); + final Random rnd = new Random(); + final int maxchar = 0x7f; + + Charset charset = Charset.forName("UTF-8"); + final String csn = charset.name(); + final Charset cs = charset; + + int[] starts = new int[] { 0, 0x80, 0x800, 0x10000}; + for (int nb = 1; nb <= 4; nb++) { + + final CharsetEncoder enc = cs.newEncoder(); + + char[] cc = new char[size]; + int i = 0; + while (i < size - 3) { + i += Character.toChars(starts[nb - 1] + rnd.nextInt(maxchar), cc, i); + } + + final String string = new String(cc); + final byte[] bytes = string.getBytes(cs); + + System.out.printf("%n--------%s[nb=%d]---------%n", csn, nb); + int sz = 12; + while (sz < size) { + System.out.printf(" [len=%d]%n", sz); + final byte[] bs = Arrays.copyOf(bytes, sz); + final String str = new String(bs, csn); + StrCodingBenchmark.Job[] jobs = { + new StrCodingBenchmark.Job("String decode: csn") { + public void work() throws Throwable { + for (int i = 0; i < itrs; i++) + new String(bs, csn); + }}, + + new StrCodingBenchmark.Job("String decode: cs") { + public void work() throws Throwable { + for (int i = 0; i < itrs; i++) + new String(bs, cs); + }}, + + new StrCodingBenchmark.Job("String encode: csn") { + public void work() throws Throwable { + for (int i = 0; i < itrs; i++) + str.getBytes(csn); + }}, + + new StrCodingBenchmark.Job("String encode: cs") { + public void work() throws Throwable { + for (int i = 0; i < itrs; i++) + str.getBytes(cs); + }}, + }; + StrCodingBenchmark.time(StrCodingBenchmark.filter(null, jobs)); + sz <<= 1; + } + } + } +} diff --git a/test/sun/nio/cs/TestStringCoding.java b/test/sun/nio/cs/TestStringCoding.java index 4ab707310..c4837e956 100644 --- a/test/sun/nio/cs/TestStringCoding.java +++ b/test/sun/nio/cs/TestStringCoding.java @@ -24,7 +24,7 @@ */ /* @test - @bug 6636323 6636319 + @bug 6636323 6636319 7040220 @summary Test if StringCoding and NIO result have the same de/encoding result * @run main/othervm/timeout=2000 TestStringCoding */ @@ -111,6 +111,8 @@ public class TestStringCoding { //encode unmappable surrogates if (enc instanceof sun.nio.cs.ArrayEncoder && cs.contains(Charset.forName("ASCII"))) { + if (cs.name().equals("UTF-8")) // utf8 handles surrogates + return; enc.replaceWith(new byte[] { (byte)'A'}); sun.nio.cs.ArrayEncoder cae = (sun.nio.cs.ArrayEncoder)enc; diff --git a/test/sun/nio/cs/TestStringCodingUTF8.java b/test/sun/nio/cs/TestStringCodingUTF8.java new file mode 100644 index 000000000..fdc204849 --- /dev/null +++ b/test/sun/nio/cs/TestStringCodingUTF8.java @@ -0,0 +1,166 @@ +/* + * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* @test + @bug 7040220 + @summary Test if StringCoding and NIO result have the same de/encoding result for UTF-8 + * @run main/othervm/timeout=2000 TestStringCodingUTF8 + */ + +import java.util.*; +import java.nio.*; +import java.nio.charset.*; + +public class TestStringCodingUTF8 { + public static void main(String[] args) throws Throwable { + test(); + // security manager on + System.setSecurityManager(new PermissiveSecurityManger()); + test(); + } + + static void test() throws Throwable { + Charset cs = Charset.forName("UTF-8"); + char[] bmp = new char[0x10000]; + for (int i = 0; i < 0x10000; i++) { + bmp[i] = (char)i; + } + test(cs, bmp, 0, bmp.length); + + ArrayList<Integer> list = new ArrayList<>(0x20000); + for (int i = 0; i < 0x20000; i++) { + list.add(i, i); + } + Collections.shuffle(list); + int j = 0; + char[] bmpsupp = new char[0x30000]; + for (int i = 0; i < 0x20000; i++) { + j += Character.toChars(list.get(i), bmpsupp, j); + } + assert (j == bmpsupp.length); + test(cs, bmpsupp, 0, bmpsupp.length); + + // randomed "off" and "len" on shuffled data + Random rnd = new Random(); + int maxlen = 1000; + int itr = 5000; + for (int i = 0; i < itr; i++) { + int off = rnd.nextInt(bmpsupp.length - maxlen); + int len = rnd.nextInt(maxlen); + test(cs, bmpsupp, off, len); + } + + // random length of bytes, test the edge corner case + for (int i = 0; i < itr; i++) { + byte[] ba = new byte[rnd.nextInt(maxlen)]; + rnd.nextBytes(ba); + //new String(csn); + if (!new String(ba, cs.name()).equals( + new String(decode(cs, ba, 0, ba.length)))) + throw new RuntimeException("new String(csn) failed"); + //new String(cs); + if (!new String(ba, cs).equals( + new String(decode(cs, ba, 0, ba.length)))) + throw new RuntimeException("new String(cs) failed"); + } + System.out.println("done!"); + } + + static void test(Charset cs, char[] ca, int off, int len) throws Throwable { + String str = new String(ca, off, len); + byte[] ba = encode(cs, ca, off, len); + + //getBytes(csn); + byte[] baStr = str.getBytes(cs.name()); + if (!Arrays.equals(ba, baStr)) + throw new RuntimeException("getBytes(csn) failed"); + + //getBytes(cs); + baStr = str.getBytes(cs); + if (!Arrays.equals(ba, baStr)) + throw new RuntimeException("getBytes(cs) failed"); + + //new String(csn); + if (!new String(ba, cs.name()).equals(new String(decode(cs, ba, 0, ba.length)))) + throw new RuntimeException("new String(csn) failed"); + + //new String(cs); + if (!new String(ba, cs).equals(new String(decode(cs, ba, 0, ba.length)))) + throw new RuntimeException("new String(cs) failed"); + } + + // copy/paste of the StringCoding.decode() + static char[] decode(Charset cs, byte[] ba, int off, int len) { + CharsetDecoder cd = cs.newDecoder(); + int en = (int)(len * cd.maxCharsPerByte()); + char[] ca = new char[en]; + if (len == 0) + return ca; + cd.onMalformedInput(CodingErrorAction.REPLACE) + .onUnmappableCharacter(CodingErrorAction.REPLACE) + .reset(); + + ByteBuffer bb = ByteBuffer.wrap(ba, off, len); + CharBuffer cb = CharBuffer.wrap(ca); + try { + CoderResult cr = cd.decode(bb, cb, true); + if (!cr.isUnderflow()) + cr.throwException(); + cr = cd.flush(cb); + if (!cr.isUnderflow()) + cr.throwException(); + } catch (CharacterCodingException x) { + throw new Error(x); + } + return Arrays.copyOf(ca, cb.position()); + } + + // copy/paste of the StringCoding.encode() + static byte[] encode(Charset cs, char[] ca, int off, int len) { + CharsetEncoder ce = cs.newEncoder(); + int en = (int)(len * ce.maxBytesPerChar()); + byte[] ba = new byte[en]; + if (len == 0) + return ba; + ce.onMalformedInput(CodingErrorAction.REPLACE) + .onUnmappableCharacter(CodingErrorAction.REPLACE) + .reset(); + ByteBuffer bb = ByteBuffer.wrap(ba); + CharBuffer cb = CharBuffer.wrap(ca, off, len); + try { + CoderResult cr = ce.encode(cb, bb, true); + if (!cr.isUnderflow()) + cr.throwException(); + cr = ce.flush(bb); + if (!cr.isUnderflow()) + cr.throwException(); + } catch (CharacterCodingException x) { + throw new Error(x); + } + return Arrays.copyOf(ba, bb.position()); + } + + static class PermissiveSecurityManger extends SecurityManager { + @Override public void checkPermission(java.security.Permission p) {} + } +} diff --git a/test/sun/nio/cs/TestUTF8.java b/test/sun/nio/cs/TestUTF8.java index 34a999fb2..f339eae04 100644 --- a/test/sun/nio/cs/TestUTF8.java +++ b/test/sun/nio/cs/TestUTF8.java @@ -23,7 +23,7 @@ /* * @test - * @bug 4486841 + * @bug 4486841 7040220 * @summary Test UTF-8 charset */ @@ -70,6 +70,32 @@ public class TestUTF8 { return dec.decode(bbf, cbf, true); } + // copy/paste of the StringCoding.decode() + static char[] decode(Charset cs, byte[] ba, int off, int len) { + CharsetDecoder cd = cs.newDecoder(); + int en = (int)(len * cd.maxCharsPerByte()); + char[] ca = new char[en]; + if (len == 0) + return ca; + cd.onMalformedInput(CodingErrorAction.REPLACE) + .onUnmappableCharacter(CodingErrorAction.REPLACE) + .reset(); + + ByteBuffer bb = ByteBuffer.wrap(ba, off, len); + CharBuffer cb = CharBuffer.wrap(ca); + try { + CoderResult cr = cd.decode(bb, cb, true); + if (!cr.isUnderflow()) + cr.throwException(); + cr = cd.flush(cb); + if (!cr.isUnderflow()) + cr.throwException(); + } catch (CharacterCodingException x) { + throw new Error(x); + } + return Arrays.copyOf(ca, cb.position()); + } + static byte[] encode(char[] cc, String csn, boolean testDirect) throws Exception { ByteBuffer bbf; @@ -142,7 +168,14 @@ public class TestUTF8 { bb = encode(cc, csn, true); ccO = decode(bb, csn, true); if (!Arrays.equals(cc, ccO)) { - System.out.printf(" (direct) failed"); + System.out.print(" (direct) failed"); + } + // String.getBytes()/toCharArray() goes to ArrayDe/Encoder path + if (!Arrays.equals(bb, new String(cc).getBytes(csn))) { + System.out.printf(" String.getBytes() failed"); + } + if (!Arrays.equals(cc, new String(bb, csn).toCharArray())) { + System.out.printf(" String.toCharArray() failed"); } System.out.println(); } @@ -168,6 +201,12 @@ public class TestUTF8 { if (!Arrays.equals(cc, ccO)) { System.out.printf(" decoding(direct) failed%n"); } + // new String(bb, csn).getBytes(csn) will not return + // the 6 bytes surrogates as in bb, so only test + // toCharArray() here. + if (!Arrays.equals(cc, new String(bb, csn).toCharArray())) { + System.out.printf(" String.toCharArray() failed"); + } } static void compare(String csn1, String csn2) throws Exception { @@ -274,6 +313,7 @@ public class TestUTF8 { static void checkMalformed(String csn) throws Exception { boolean failed = false; System.out.printf(" Check malformed <%s>...%n", csn); + Charset cs = Charset.forName(csn); for (boolean direct: new boolean[] {false, true}) { for (byte[] bins : malformed) { int mlen = bins[0]; @@ -285,10 +325,15 @@ public class TestUTF8 { ashex += Integer.toBinaryString((int)bin[i] & 0xff); } if (!cr.isMalformed()) { - System.out.printf(" FAIL(direct=%b): [%s] not malformed.\n", direct, ashex); + System.out.printf(" FAIL(direct=%b): [%s] not malformed.%n", direct, ashex); failed = true; } else if (cr.length() != mlen) { - System.out.printf(" FAIL(direct=%b): [%s] malformed[len=%d].\n", direct, ashex, cr.length()); + System.out.printf(" FAIL(direct=%b): [%s] malformed[len=%d].%n", direct, ashex, cr.length()); + failed = true; + } + if (!Arrays.equals(decode(cs, bin, 0, bin.length), + new String(bin, csn).toCharArray())) { + System.out.printf(" FAIL(new String(bb, %s)) failed%n", csn); failed = true; } } |