aboutsummaryrefslogtreecommitdiff
path: root/test/sun/nio
diff options
context:
space:
mode:
authorsherman <none@none>2011-11-07 13:46:02 -0800
committersherman <none@none>2011-11-07 13:46:02 -0800
commitedad9bca37946f4ba65f9a2d42f3463b8ad05724 (patch)
treeaf6030f416896c8d43c532bfe262c891f10ac70e /test/sun/nio
parent140f521de776d78a01663b4f46c51fd10b1fe6f8 (diff)
7096080: UTF8 update and new CESU-8 charset
7082884: Incorrect UTF8 conversion for sequence ED 31 7082883: Incorrect UTF8 conversion for sequence fc 80 80 8f bf bf Summary: Updated UTF8 and added CESU-8 to following the latest Standard Reviewed-by: alanb
Diffstat (limited to 'test/sun/nio')
-rw-r--r--test/sun/nio/cs/TestStringCoding.java6
-rw-r--r--test/sun/nio/cs/TestStringCodingUTF8.java10
-rw-r--r--test/sun/nio/cs/TestUTF8.java187
3 files changed, 164 insertions, 39 deletions
diff --git a/test/sun/nio/cs/TestStringCoding.java b/test/sun/nio/cs/TestStringCoding.java
index c4837e956..09e614448 100644
--- a/test/sun/nio/cs/TestStringCoding.java
+++ b/test/sun/nio/cs/TestStringCoding.java
@@ -24,7 +24,7 @@
*/
/* @test
- @bug 6636323 6636319 7040220
+ @bug 6636323 6636319 7040220 7096080
@summary Test if StringCoding and NIO result have the same de/encoding result
* @run main/othervm/timeout=2000 TestStringCoding
*/
@@ -111,7 +111,8 @@ public class TestStringCoding {
//encode unmappable surrogates
if (enc instanceof sun.nio.cs.ArrayEncoder &&
cs.contains(Charset.forName("ASCII"))) {
- if (cs.name().equals("UTF-8")) // utf8 handles surrogates
+ if (cs.name().equals("UTF-8") || // utf8 handles surrogates
+ cs.name().equals("CESU-8")) // utf8 handles surrogates
return;
enc.replaceWith(new byte[] { (byte)'A'});
sun.nio.cs.ArrayEncoder cae = (sun.nio.cs.ArrayEncoder)enc;
@@ -136,7 +137,6 @@ public class TestStringCoding {
cs.name())))
throw new RuntimeException("encode3(surrogates) failed -> "
+ cs.name());
-
ba = new byte[str.length() - 1];
n = cae.encode(str.toCharArray(), 0, str.length(), ba);
if (n != 7 || !"abABABc".equals(new String(ba, 0, n,
diff --git a/test/sun/nio/cs/TestStringCodingUTF8.java b/test/sun/nio/cs/TestStringCodingUTF8.java
index fdc204849..d1f699506 100644
--- a/test/sun/nio/cs/TestStringCodingUTF8.java
+++ b/test/sun/nio/cs/TestStringCodingUTF8.java
@@ -33,14 +33,16 @@ import java.nio.charset.*;
public class TestStringCodingUTF8 {
public static void main(String[] args) throws Throwable {
- test();
+ test("UTF-8");
+ test("CESU-8");
// security manager on
System.setSecurityManager(new PermissiveSecurityManger());
- test();
+ test("UTF-8");
+ test("CESU-8");
}
- static void test() throws Throwable {
- Charset cs = Charset.forName("UTF-8");
+ static void test(String csn) throws Throwable {
+ Charset cs = Charset.forName(csn);
char[] bmp = new char[0x10000];
for (int i = 0; i < 0x10000; i++) {
bmp[i] = (char)i;
diff --git a/test/sun/nio/cs/TestUTF8.java b/test/sun/nio/cs/TestUTF8.java
index f339eae04..e83f8fbb5 100644
--- a/test/sun/nio/cs/TestUTF8.java
+++ b/test/sun/nio/cs/TestUTF8.java
@@ -23,7 +23,7 @@
/*
* @test
- * @bug 4486841 7040220
+ * @bug 4486841 7040220 7096080
* @summary Test UTF-8 charset
*/
@@ -156,15 +156,22 @@ public class TestUTF8 {
return 3;
}
+ static int to4ByteUTF8(int uc, byte[] bb, int pos) {
+ bb[pos++] = (byte)(0xf0 | ((uc >> 18)));
+ bb[pos++] = (byte)(0x80 | ((uc >> 12) & 0x3f));
+ bb[pos++] = (byte)(0x80 | ((uc >> 6) & 0x3f));
+ bb[pos++] = (byte)(0x80 | (uc & 0x3f));
+ return 4;
+ }
+
static void checkRoundtrip(String csn) throws Exception {
System.out.printf(" Check roundtrip <%s>...", csn);
char[] cc = getUTFChars();
byte[] bb = encode(cc, csn, false);
char[] ccO = decode(bb, csn, false);
- if (!Arrays.equals(cc, ccO)) {
+ if (!Arrays.equals(cc, ccO))
System.out.printf(" non-direct failed");
- }
bb = encode(cc, csn, true);
ccO = decode(bb, csn, true);
if (!Arrays.equals(cc, ccO)) {
@@ -180,19 +187,24 @@ public class TestUTF8 {
System.out.println();
}
- static void check6ByteSurrs(String csn) throws Exception {
- System.out.printf(" Check 6-byte Surrogates <%s>...%n", csn);
- byte[] bb = new byte[(0x110000 - 0x10000) * 6];
+ static void check4ByteSurrs(String csn) throws Exception {
+ System.out.printf(" Check 4-byte Surrogates <%s>...%n", csn);
+ byte[] bb = new byte[(0x110000 - 0x10000) * 4];
char[] cc = new char[(0x110000 - 0x10000) * 2];
int bpos = 0;
int cpos = 0;
for (int i = 0x10000; i < 0x110000; i++) {
Character.toChars(i, cc, cpos);
- bpos += to3ByteUTF8(cc[cpos], bb, bpos);
- bpos += to3ByteUTF8(cc[cpos + 1], bb, bpos);
+ bpos += to4ByteUTF8(i, bb, bpos);
cpos += 2;
}
+ checkSurrs(csn, bb, cc);
+ }
+
+ static void checkSurrs(String csn, byte[] bb, char[] cc)
+ throws Exception
+ {
char[] ccO = decode(bb, csn, false);
if (!Arrays.equals(cc, ccO)) {
System.out.printf(" decoding failed%n");
@@ -201,14 +213,30 @@ public class TestUTF8 {
if (!Arrays.equals(cc, ccO)) {
System.out.printf(" decoding(direct) failed%n");
}
- // new String(bb, csn).getBytes(csn) will not return
- // the 6 bytes surrogates as in bb, so only test
- // toCharArray() here.
if (!Arrays.equals(cc, new String(bb, csn).toCharArray())) {
System.out.printf(" String.toCharArray() failed");
}
+ if (!Arrays.equals(bb, new String(cc).getBytes(csn))) {
+ System.out.printf(" String.getBytes() failed");
+ }
+ }
+
+ static void check6ByteSurrs(String csn) throws Exception {
+ System.out.printf(" Check 6-byte Surrogates <%s>...%n", csn);
+ byte[] bb = new byte[(0x110000 - 0x10000) * 6];
+ char[] cc = new char[(0x110000 - 0x10000) * 2];
+ int bpos = 0;
+ int cpos = 0;
+ for (int i = 0x10000; i < 0x110000; i++) {
+ Character.toChars(i, cc, cpos);
+ bpos += to3ByteUTF8(cc[cpos], bb, bpos);
+ bpos += to3ByteUTF8(cc[cpos + 1], bb, bpos);
+ cpos += 2;
+ }
+ checkSurrs(csn, bb, cc);
}
+
static void compare(String csn1, String csn2) throws Exception {
System.out.printf(" Diff <%s> <%s>...%n", csn1, csn2);
char[] cc = getUTFChars();
@@ -266,6 +294,10 @@ public class TestUTF8 {
{1, (byte)0xFF, (byte)0xFF, (byte)0xFF }, // all ones
{1, (byte)0xE0, (byte)0xC0, (byte)0x80 }, // invalid second byte
{1, (byte)0xE0, (byte)0x80, (byte)0xC0 }, // invalid first byte
+ {1, (byte)0xE0, (byte)0x41,}, // invalid second byte & 2 bytes
+ {3, (byte)0xED, (byte)0xAE, (byte)0x80 }, // 3 bytes surrogate
+ {3, (byte)0xED, (byte)0xB0, (byte)0x80 }, // 3 bytes surrogate
+
// Four-byte sequences
{1, (byte)0xF0, (byte)0x80, (byte)0x80, (byte)0x80 }, // U+0000 zero-padded
@@ -276,8 +308,13 @@ public class TestUTF8 {
{1, (byte)0xFF, (byte)0xFF, (byte)0xFF, (byte)0xFF }, // all ones
{1, (byte)0xF0, (byte)0x80, (byte)0x80, (byte)0x80}, // invalid second byte
{1, (byte)0xF0, (byte)0xC0, (byte)0x80, (byte)0x80 }, // invalid second byte
+ {1, (byte)0xF0, (byte)41 }, // invalid second byte
+ // & only 2 bytes
+
{2, (byte)0xF0, (byte)0x90, (byte)0xC0, (byte)0x80 }, // invalid third byte
- {3, (byte)0xF0, (byte)0x90, (byte)0x80, (byte)0xC0 }, // invalid third byte
+ {3, (byte)0xF0, (byte)0x90, (byte)0x80, (byte)0xC0 }, // invalid forth byte
+ {2, (byte)0xF0, (byte)0x90, (byte)0x41 }, // invalid third byte
+ // & 3 bytes input
{1, (byte)0xF1, (byte)0xC0, (byte)0x80, (byte)0x80 }, // invalid second byte
{2, (byte)0xF1, (byte)0x80, (byte)0xC0, (byte)0x80 }, // invalid third byte
@@ -287,30 +324,113 @@ public class TestUTF8 {
{1, (byte)0xF5, (byte)0x80, (byte)0x80, (byte)0xC0 }, // out-range 4-byte
// Five-byte sequences
- {5, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x80}, // invalid first byte
- {5, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x80 }, // U+0000 zero-padded
- {5, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x81, (byte)0xBF }, // U+007F zero-padded
- {5, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x9F, (byte)0xBF }, // U+07FF zero-padded
- {5, (byte)0xF8, (byte)0x80, (byte)0x8F, (byte)0xBF, (byte)0xBF }, // U+FFFF zero-padded
+ {1, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x80}, // invalid first byte
+ {1, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x80 }, // U+0000 zero-padded
+ {1, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x81, (byte)0xBF }, // U+007F zero-padded
+ {1, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x9F, (byte)0xBF }, // U+07FF zero-padded
+ {1, (byte)0xF8, (byte)0x80, (byte)0x8F, (byte)0xBF, (byte)0xBF }, // U+FFFF zero-padded
{1, (byte)0xF8, (byte)0xC0, (byte)0x80, (byte)0x80, (byte)0x80},
- {2, (byte)0xF8, (byte)0x80, (byte)0xC0, (byte)0x80, (byte)0x80 },
- {3, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0xC1, (byte)0xBF },
- {4, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x9F, (byte)0xC0 },
+ {1, (byte)0xF8, (byte)0x80, (byte)0xC0, (byte)0x80, (byte)0x80 },
+ {1, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0xC1, (byte)0xBF },
+ {1, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x9F, (byte)0xC0 },
// Six-byte sequences
- {6, (byte)0xFC, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x80 }, // U+0000 zero-padded
- {6, (byte)0xFC, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x81, (byte)0xBF }, // U+007F zero-padded
- {6, (byte)0xFC, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x9F, (byte)0xBF }, // U+07FF zero-padded
- {6, (byte)0xFC, (byte)0x80, (byte)0x80, (byte)0x8F, (byte)0xBF, (byte)0xBF }, // U+FFFF zero-padded
+ {1, (byte)0xFC, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x80 }, // U+0000 zero-padded
+ {1, (byte)0xFC, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x81, (byte)0xBF }, // U+007F zero-padded
+ {1, (byte)0xFC, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x9F, (byte)0xBF }, // U+07FF zero-padded
+ {1, (byte)0xFC, (byte)0x80, (byte)0x80, (byte)0x8F, (byte)0xBF, (byte)0xBF }, // U+FFFF zero-padded
{1, (byte)0xF8, (byte)0xC0, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x80 },
- {2, (byte)0xF8, (byte)0x80, (byte)0xC0, (byte)0x80, (byte)0x80, (byte)0x80 },
- {3, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0xC1, (byte)0xBF, (byte)0x80 },
- {4, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x9F, (byte)0xC0, (byte)0x80 },
- {5, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x9F, (byte)0x80, (byte)0xC0 },
+ {1, (byte)0xF8, (byte)0x80, (byte)0xC0, (byte)0x80, (byte)0x80, (byte)0x80 },
+ {1, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0xC1, (byte)0xBF, (byte)0x80 },
+ {1, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x9F, (byte)0xC0, (byte)0x80 },
+ {1, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x9F, (byte)0x80, (byte)0xC0 },
};
- static void checkMalformed(String csn) throws Exception {
+ // The first byte is the length of malformed bytes
+ static byte[][] malformed_cesu8 = {
+ // One-byte sequences:
+ {1, (byte)0xFF },
+ {1, (byte)0xC0 },
+ {1, (byte)0x80 },
+
+ {1, (byte)0xFF, (byte)0xFF}, // all ones
+ {1, (byte)0xA0, (byte)0x80}, // 101x first byte first nibble
+
+ // Two-byte sequences:
+ {1, (byte)0xC0, (byte)0x80}, // invalid first byte
+ {1, (byte)0xC1, (byte)0xBF}, // invalid first byte
+ {1, (byte)0xC2, (byte)0x00}, // invalid second byte
+ {1, (byte)0xC2, (byte)0xC0}, // invalid second byte
+ {1, (byte)0xD0, (byte)0x00}, // invalid second byte
+ {1, (byte)0xD0, (byte)0xC0}, // invalid second byte
+ {1, (byte)0xDF, (byte)0x00}, // invalid second byte
+ {1, (byte)0xDF, (byte)0xC0}, // invalid second byte
+
+ // Three-byte sequences
+ {1, (byte)0xE0, (byte)0x80, (byte)0x80}, // 111x first byte first nibble
+ {1, (byte)0xE0, (byte)0x80, (byte)0x80 }, // U+0000 zero-padded
+ {1, (byte)0xE0, (byte)0x81, (byte)0xBF }, // U+007F zero-padded
+ {1, (byte)0xE0, (byte)0x9F, (byte)0xBF }, // U+07FF zero-padded
+
+ {1, (byte)0xE0, (byte)0xC0, (byte)0xBF }, // invalid second byte
+ {2, (byte)0xE0, (byte)0xA0, (byte)0x7F }, // invalid third byte
+ {2, (byte)0xE0, (byte)0xA0, (byte)0xC0 }, // invalid third byte
+ {1, (byte)0xFF, (byte)0xFF, (byte)0xFF }, // all ones
+ {1, (byte)0xE0, (byte)0xC0, (byte)0x80 }, // invalid second byte
+ {1, (byte)0xE0, (byte)0x80, (byte)0xC0 }, // invalid first byte
+ {1, (byte)0xE0, (byte)0x41,}, // invalid second byte & 2 bytes
+
+ // CESU-8 does not have 4, 5, 6 bytes sequenc
+ // Four-byte sequences
+ {1, (byte)0xF0, (byte)0x80, (byte)0x80, (byte)0x80 }, // U+0000 zero-padded
+ {1, (byte)0xF0, (byte)0x80, (byte)0x81, (byte)0xBF }, // U+007F zero-padded
+ {1, (byte)0xF0, (byte)0x80, (byte)0x9F, (byte)0xBF }, // U+007F zero-padded
+ {1, (byte)0xF0, (byte)0x8F, (byte)0xBF, (byte)0xBF }, // U+07FF zero-padded
+
+ {1, (byte)0xFF, (byte)0xFF, (byte)0xFF, (byte)0xFF }, // all ones
+ {1, (byte)0xF0, (byte)0x80, (byte)0x80, (byte)0x80}, // invalid second byte
+ {1, (byte)0xF0, (byte)0xC0, (byte)0x80, (byte)0x80 }, // invalid second byte
+ {1, (byte)0xF0, (byte)41 }, // invalid second byte
+ // & only 2 bytes
+ {1, (byte)0xF0, (byte)0x90, (byte)0xC0, (byte)0x80 }, // invalid third byte
+ {1, (byte)0xF0, (byte)0x90, (byte)0x80, (byte)0xC0 }, // invalid forth byte
+ {1, (byte)0xF0, (byte)0x90, (byte)0x41 }, // invalid third byte
+ // & 3 bytes input
+
+ {1, (byte)0xF1, (byte)0xC0, (byte)0x80, (byte)0x80 }, // invalid second byte
+ {1, (byte)0xF1, (byte)0x80, (byte)0xC0, (byte)0x80 }, // invalid third byte
+ {1, (byte)0xF1, (byte)0x80, (byte)0x80, (byte)0xC0 }, // invalid forth byte
+ {1, (byte)0xF4, (byte)0x90, (byte)0x80, (byte)0xC0 }, // out-range 4-byte
+ {1, (byte)0xF4, (byte)0xC0, (byte)0x80, (byte)0xC0 }, // out-range 4-byte
+ {1, (byte)0xF5, (byte)0x80, (byte)0x80, (byte)0xC0 }, // out-range 4-byte
+
+ // Five-byte sequences
+ {1, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x80}, // invalid first byte
+ {1, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x80 }, // U+0000 zero-padded
+ {1, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x81, (byte)0xBF }, // U+007F zero-padded
+ {1, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x9F, (byte)0xBF }, // U+07FF zero-padded
+ {1, (byte)0xF8, (byte)0x80, (byte)0x8F, (byte)0xBF, (byte)0xBF }, // U+FFFF zero-padded
+
+ {1, (byte)0xF8, (byte)0xC0, (byte)0x80, (byte)0x80, (byte)0x80},
+ {1, (byte)0xF8, (byte)0x80, (byte)0xC0, (byte)0x80, (byte)0x80 },
+ {1, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0xC1, (byte)0xBF },
+ {1, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x9F, (byte)0xC0 },
+
+ // Six-byte sequences
+ {1, (byte)0xFC, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x80 }, // U+0000 zero-padded
+ {1, (byte)0xFC, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x81, (byte)0xBF }, // U+007F zero-padded
+ {1, (byte)0xFC, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x9F, (byte)0xBF }, // U+07FF zero-padded
+ {1, (byte)0xFC, (byte)0x80, (byte)0x80, (byte)0x8F, (byte)0xBF, (byte)0xBF }, // U+FFFF zero-padded
+ {1, (byte)0xF8, (byte)0xC0, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x80 },
+ {1, (byte)0xF8, (byte)0x80, (byte)0xC0, (byte)0x80, (byte)0x80, (byte)0x80 },
+ {1, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0xC1, (byte)0xBF, (byte)0x80 },
+ {1, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x9F, (byte)0xC0, (byte)0x80 },
+ {1, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x9F, (byte)0x80, (byte)0xC0 },
+ };
+
+
+ static void checkMalformed(String csn, byte[][] malformed) throws Exception {
boolean failed = false;
System.out.printf(" Check malformed <%s>...%n", csn);
Charset cs = Charset.forName(csn);
@@ -430,9 +550,12 @@ public class TestUTF8 {
public static void main(String[] args) throws Exception {
checkRoundtrip("UTF-8");
- check6ByteSurrs("UTF-8");
- //compare("UTF-8", "UTF-8-OLD");
- checkMalformed("UTF-8");
+ check4ByteSurrs("UTF-8");
+ checkMalformed("UTF-8", malformed);
checkUnderOverflow("UTF-8");
+
+ checkRoundtrip("CESU-8");
+ check6ByteSurrs("CESU-8");
+ checkMalformed("CESU-8", malformed_cesu8);
}
}