7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])

Summary: implement sun.nio.cs.ArrayEn/Decoer in utf8 Reviewed-by: alanb
author: sherman <none@none> 2011-05-02 11:42:52 -0700
committer: sherman <none@none> 2011-05-02 11:42:52 -0700
commit: 7b333f5281700ff39fffb248f5b61ea3116e7f38 (patch)
tree: 8de22bcf3c59ba9bc850fca40d01cc0a30495027 /test/sun/nio
parent: e344ad61532ac700a5db6ab9c1ca832891c28cf9 (diff)
4 files changed, 310 insertions, 5 deletions
diff --git a/test/sun/nio/cs/StrCodingBenchmarkUTF8.java b/test/sun/nio/cs/StrCodingBenchmarkUTF8.java
new file mode 100644
index 000000000..eecc6ec2d
--- /dev/null
+++ b/test/sun/nio/cs/StrCodingBenchmarkUTF8.java
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+import java.util.*;
+import java.nio.*;
+import java.nio.charset.*;
+
+public class StrCodingBenchmarkUTF8 {
+
+    public static void main(String[] args) throws Throwable {
+
+        final int itrs = Integer.getInteger("iterations", 100000);
+        final int size = 2048;
+        final int subsize    = Integer.getInteger("subsize", 128);
+        final Random rnd = new Random();
+        final int maxchar    = 0x7f;
+
+        Charset charset = Charset.forName("UTF-8");
+        final String csn = charset.name();
+        final Charset cs = charset;
+
+        int[] starts = new int[] { 0, 0x80, 0x800, 0x10000};
+        for (int nb = 1; nb <= 4; nb++) {
+
+            final CharsetEncoder enc = cs.newEncoder();
+
+            char[] cc = new char[size];
+            int i = 0;
+            while (i < size - 3) {
+                i += Character.toChars(starts[nb - 1] + rnd.nextInt(maxchar), cc, i);
+            }
+
+            final String string = new String(cc);
+            final byte[] bytes  = string.getBytes(cs);
+
+            System.out.printf("%n--------%s[nb=%d]---------%n", csn, nb);
+            int sz = 12;
+            while (sz < size) {
+                System.out.printf("   [len=%d]%n", sz);
+                final byte[] bs  = Arrays.copyOf(bytes, sz);
+                final String str = new String(bs, csn);
+                StrCodingBenchmark.Job[] jobs = {
+                    new StrCodingBenchmark.Job("String decode: csn") {
+                    public void work() throws Throwable {
+                        for (int i = 0; i < itrs; i++)
+                            new String(bs, csn);
+                    }},
+
+                    new StrCodingBenchmark.Job("String decode: cs") {
+                    public void work() throws Throwable {
+                        for (int i = 0; i < itrs; i++)
+                            new String(bs, cs);
+                    }},
+
+                    new StrCodingBenchmark.Job("String encode: csn") {
+                    public void work() throws Throwable {
+                        for (int i = 0; i < itrs; i++)
+                                str.getBytes(csn);
+                    }},
+
+                    new StrCodingBenchmark.Job("String encode: cs") {
+                    public void work() throws Throwable {
+                         for (int i = 0; i < itrs; i++)
+                          str.getBytes(cs);
+                    }},
+                };
+                StrCodingBenchmark.time(StrCodingBenchmark.filter(null, jobs));
+                sz <<= 1;
+            }
+        }
+    }
+}
diff --git a/test/sun/nio/cs/TestStringCoding.java b/test/sun/nio/cs/TestStringCoding.java
index 4ab707310..c4837e956 100644
--- a/test/sun/nio/cs/TestStringCoding.java
+++ b/test/sun/nio/cs/TestStringCoding.java
@@ -24,7 +24,7 @@
  */
 
 /* @test
-   @bug 6636323 6636319
+   @bug 6636323 6636319 7040220
    @summary Test if StringCoding and NIO result have the same de/encoding result
  * @run main/othervm/timeout=2000 TestStringCoding
  */
@@ -111,6 +111,8 @@ public class TestStringCoding {
         //encode unmappable surrogates
         if (enc instanceof sun.nio.cs.ArrayEncoder &&
             cs.contains(Charset.forName("ASCII"))) {
+            if (cs.name().equals("UTF-8"))    // utf8 handles surrogates
+                return;
             enc.replaceWith(new byte[] { (byte)'A'});
             sun.nio.cs.ArrayEncoder cae = (sun.nio.cs.ArrayEncoder)enc;
 
diff --git a/test/sun/nio/cs/TestStringCodingUTF8.java b/test/sun/nio/cs/TestStringCodingUTF8.java
new file mode 100644
index 000000000..fdc204849
--- /dev/null
+++ b/test/sun/nio/cs/TestStringCodingUTF8.java
@@ -0,0 +1,166 @@
+/*
+ * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/* @test
+   @bug 7040220
+   @summary Test if StringCoding and NIO result have the same de/encoding result for UTF-8
+ * @run main/othervm/timeout=2000 TestStringCodingUTF8
+ */
+
+import java.util.*;
+import java.nio.*;
+import java.nio.charset.*;
+
+public class TestStringCodingUTF8 {
+    public static void main(String[] args) throws Throwable {
+        test();
+        // security manager on
+        System.setSecurityManager(new PermissiveSecurityManger());
+        test();
+    }
+
+    static void test() throws Throwable {
+        Charset cs = Charset.forName("UTF-8");
+        char[] bmp = new char[0x10000];
+        for (int i = 0; i < 0x10000; i++) {
+            bmp[i] = (char)i;
+        }
+        test(cs, bmp, 0, bmp.length);
+
+        ArrayList<Integer> list = new ArrayList<>(0x20000);
+        for (int i = 0; i < 0x20000; i++) {
+            list.add(i, i);
+        }
+        Collections.shuffle(list);
+        int j = 0;
+        char[] bmpsupp = new char[0x30000];
+        for (int i = 0; i < 0x20000; i++) {
+            j += Character.toChars(list.get(i), bmpsupp, j);
+        }
+        assert (j == bmpsupp.length);
+        test(cs, bmpsupp, 0, bmpsupp.length);
+
+        // randomed "off" and "len" on shuffled data
+        Random rnd = new Random();
+        int maxlen = 1000;
+        int itr = 5000;
+        for (int i = 0; i < itr; i++) {
+            int off = rnd.nextInt(bmpsupp.length - maxlen);
+            int len = rnd.nextInt(maxlen);
+            test(cs, bmpsupp, off, len);
+        }
+
+        // random length of bytes, test the edge corner case
+        for (int i = 0; i < itr; i++) {
+            byte[] ba = new byte[rnd.nextInt(maxlen)];
+            rnd.nextBytes(ba);
+            //new String(csn);
+            if (!new String(ba, cs.name()).equals(
+                 new String(decode(cs, ba, 0, ba.length))))
+                throw new RuntimeException("new String(csn) failed");
+            //new String(cs);
+            if (!new String(ba, cs).equals(
+                 new String(decode(cs, ba, 0, ba.length))))
+                throw new RuntimeException("new String(cs) failed");
+        }
+        System.out.println("done!");
+    }
+
+    static void test(Charset cs, char[] ca, int off, int len) throws Throwable {
+        String str = new String(ca, off, len);
+        byte[] ba = encode(cs, ca, off, len);
+
+        //getBytes(csn);
+        byte[] baStr = str.getBytes(cs.name());
+        if (!Arrays.equals(ba, baStr))
+            throw new RuntimeException("getBytes(csn) failed");
+
+        //getBytes(cs);
+        baStr = str.getBytes(cs);
+        if (!Arrays.equals(ba, baStr))
+            throw new RuntimeException("getBytes(cs) failed");
+
+        //new String(csn);
+        if (!new String(ba, cs.name()).equals(new String(decode(cs, ba, 0, ba.length))))
+            throw new RuntimeException("new String(csn) failed");
+
+        //new String(cs);
+        if (!new String(ba, cs).equals(new String(decode(cs, ba, 0, ba.length))))
+            throw new RuntimeException("new String(cs) failed");
+    }
+
+    // copy/paste of the StringCoding.decode()
+    static char[] decode(Charset cs, byte[] ba, int off, int len) {
+        CharsetDecoder cd = cs.newDecoder();
+        int en = (int)(len * cd.maxCharsPerByte());
+        char[] ca = new char[en];
+        if (len == 0)
+            return ca;
+        cd.onMalformedInput(CodingErrorAction.REPLACE)
+          .onUnmappableCharacter(CodingErrorAction.REPLACE)
+          .reset();
+
+        ByteBuffer bb = ByteBuffer.wrap(ba, off, len);
+        CharBuffer cb = CharBuffer.wrap(ca);
+        try {
+            CoderResult cr = cd.decode(bb, cb, true);
+            if (!cr.isUnderflow())
+                cr.throwException();
+            cr = cd.flush(cb);
+            if (!cr.isUnderflow())
+                cr.throwException();
+        } catch (CharacterCodingException x) {
+            throw new Error(x);
+        }
+        return Arrays.copyOf(ca, cb.position());
+    }
+
+    // copy/paste of the StringCoding.encode()
+    static byte[] encode(Charset cs, char[] ca, int off, int len) {
+        CharsetEncoder ce = cs.newEncoder();
+        int en = (int)(len * ce.maxBytesPerChar());
+        byte[] ba = new byte[en];
+        if (len == 0)
+            return ba;
+        ce.onMalformedInput(CodingErrorAction.REPLACE)
+          .onUnmappableCharacter(CodingErrorAction.REPLACE)
+          .reset();
+        ByteBuffer bb = ByteBuffer.wrap(ba);
+        CharBuffer cb = CharBuffer.wrap(ca, off, len);
+        try {
+            CoderResult cr = ce.encode(cb, bb, true);
+            if (!cr.isUnderflow())
+                cr.throwException();
+            cr = ce.flush(bb);
+            if (!cr.isUnderflow())
+                cr.throwException();
+        } catch (CharacterCodingException x) {
+            throw new Error(x);
+        }
+        return Arrays.copyOf(ba, bb.position());
+    }
+
+    static class PermissiveSecurityManger extends SecurityManager {
+        @Override public void checkPermission(java.security.Permission p) {}
+    }
+}
diff --git a/test/sun/nio/cs/TestUTF8.java b/test/sun/nio/cs/TestUTF8.java
index 34a999fb2..f339eae04 100644
--- a/test/sun/nio/cs/TestUTF8.java
+++ b/test/sun/nio/cs/TestUTF8.java
@@ -23,7 +23,7 @@
 
 /*
  * @test
- * @bug 4486841
+ * @bug 4486841 7040220
  * @summary Test UTF-8 charset
  */
 
@@ -70,6 +70,32 @@ public class TestUTF8 {
         return dec.decode(bbf, cbf, true);
     }
 
+    // copy/paste of the StringCoding.decode()
+    static char[] decode(Charset cs, byte[] ba, int off, int len) {
+        CharsetDecoder cd = cs.newDecoder();
+        int en = (int)(len * cd.maxCharsPerByte());
+        char[] ca = new char[en];
+        if (len == 0)
+            return ca;
+        cd.onMalformedInput(CodingErrorAction.REPLACE)
+          .onUnmappableCharacter(CodingErrorAction.REPLACE)
+          .reset();
+
+        ByteBuffer bb = ByteBuffer.wrap(ba, off, len);
+        CharBuffer cb = CharBuffer.wrap(ca);
+        try {
+            CoderResult cr = cd.decode(bb, cb, true);
+            if (!cr.isUnderflow())
+                cr.throwException();
+            cr = cd.flush(cb);
+            if (!cr.isUnderflow())
+                cr.throwException();
+        } catch (CharacterCodingException x) {
+            throw new Error(x);
+        }
+        return Arrays.copyOf(ca, cb.position());
+    }
+
     static byte[] encode(char[] cc, String csn, boolean testDirect)
         throws Exception {
         ByteBuffer bbf;
@@ -142,7 +168,14 @@ public class TestUTF8 {
         bb = encode(cc, csn, true);
         ccO = decode(bb, csn, true);
         if (!Arrays.equals(cc, ccO)) {
-            System.out.printf("    (direct) failed");
+            System.out.print("    (direct) failed");
+        }
+        // String.getBytes()/toCharArray() goes to ArrayDe/Encoder path
+        if (!Arrays.equals(bb, new String(cc).getBytes(csn))) {
+            System.out.printf("    String.getBytes() failed");
+        }
+        if (!Arrays.equals(cc, new String(bb, csn).toCharArray())) {
+            System.out.printf("    String.toCharArray() failed");
         }
         System.out.println();
     }
@@ -168,6 +201,12 @@ public class TestUTF8 {
         if (!Arrays.equals(cc, ccO)) {
             System.out.printf("    decoding(direct) failed%n");
         }
+        // new String(bb, csn).getBytes(csn) will not return
+        // the 6 bytes surrogates as in bb, so only test
+        // toCharArray() here.
+        if (!Arrays.equals(cc, new String(bb, csn).toCharArray())) {
+            System.out.printf("    String.toCharArray() failed");
+        }
     }
 
     static void compare(String csn1, String csn2) throws Exception {
@@ -274,6 +313,7 @@ public class TestUTF8 {
     static void checkMalformed(String csn) throws Exception {
         boolean failed = false;
         System.out.printf("    Check malformed <%s>...%n", csn);
+        Charset cs = Charset.forName(csn);
         for (boolean direct: new boolean[] {false, true}) {
             for (byte[] bins : malformed) {
                 int mlen = bins[0];
@@ -285,10 +325,15 @@ public class TestUTF8 {
                         ashex += Integer.toBinaryString((int)bin[i] & 0xff);
                 }
                 if (!cr.isMalformed()) {
-                    System.out.printf("        FAIL(direct=%b): [%s] not malformed.\n", direct, ashex);
+                    System.out.printf("        FAIL(direct=%b): [%s] not malformed.%n", direct, ashex);
                     failed = true;
                 } else if (cr.length() != mlen) {
-                    System.out.printf("        FAIL(direct=%b): [%s] malformed[len=%d].\n", direct, ashex, cr.length());
+                    System.out.printf("        FAIL(direct=%b): [%s] malformed[len=%d].%n", direct, ashex, cr.length());
+                    failed = true;
+                }
+                if (!Arrays.equals(decode(cs, bin, 0, bin.length),
+                                   new String(bin, csn).toCharArray())) {
+                    System.out.printf("        FAIL(new String(bb, %s)) failed%n", csn);
                     failed = true;
                 }
             }
author	sherman <none@none>	2011-05-02 11:42:52 -0700
committer	sherman <none@none>	2011-05-02 11:42:52 -0700
commit	7b333f5281700ff39fffb248f5b61ea3116e7f38 (patch)
tree	8de22bcf3c59ba9bc850fca40d01cc0a30495027 /test/sun/nio
parent	e344ad61532ac700a5db6ab9c1ca832891c28cf9 (diff)