diff options
author | sherman <none@none> | 2009-05-21 23:32:46 -0700 |
---|---|---|
committer | sherman <none@none> | 2009-05-21 23:32:46 -0700 |
commit | cf9e1bb1cf6dc8eaf595c051d30b1990a6375974 (patch) | |
tree | 3e80cab740ee3e7adf55403f0fa1b25468ec7fbd /src/share/classes/sun/io | |
parent | c58b62084370a0c91e401bdcedf673923109b2fd (diff) |
6843578: Re-implement IBM doublebyte charsets
6639450: IBM949C encoder modifies state of IBM949 encoder
6569191: Cp943 io converter returns U+0000 and U+FFFD for unconvertable character
6577466: Character encoder IBM970 throws a BufferOverflowException
5065777: CharsetEncoder canEncode() methods often incorrectly return false
Summary: Re-write 11 IBM doublebyte charsets. Thanks Ulf.Zibis for the codereview!
Reviewed-by: martin
Diffstat (limited to 'src/share/classes/sun/io')
39 files changed, 599 insertions, 3221 deletions
diff --git a/src/share/classes/sun/io/ByteToCharCp1381.java b/src/share/classes/sun/io/ByteToCharCp1381.java index 988de63f6..bd3d6b369 100644 --- a/src/share/classes/sun/io/ByteToCharCp1381.java +++ b/src/share/classes/sun/io/ByteToCharCp1381.java @@ -24,70 +24,16 @@ */ package sun.io; -import sun.nio.cs.ext.IBM1381; +import sun.nio.cs.ext.*; -/** -* Tables and data to convert Cp1381 to Unicode. -* -* @author Malcolm Ayres, assisted by UniMap program -*/ -public class ByteToCharCp1381 - extends ByteToCharDBCS_ASCII +public class ByteToCharCp1381 extends ByteToCharDBCS_ASCII { -{ + // Return the character set id + public String getCharacterEncoding() { + return "Cp1381"; + } - private final static IBM1381 nioCoder = new IBM1381(); - - // Return the character set id - public String getCharacterEncoding() - { - return "Cp1381"; - } - - - private static final boolean leadByte[] = { - false, false, false, false, false, false, false, false, // 00 - 07 - false, false, false, false, false, false, false, false, // 08 - 0F - false, false, false, false, false, false, false, false, // 10 - 17 - false, false, false, false, false, false, false, false, // 18 - 1F - false, false, false, false, false, false, false, false, // 20 - 27 - false, false, false, false, false, false, false, false, // 28 - 2F - false, false, false, false, false, false, false, false, // 30 - 37 - false, false, false, false, false, false, false, false, // 38 - 3F - false, false, false, false, false, false, false, false, // 40 - 47 - false, false, false, false, false, false, false, false, // 48 - 4F - false, false, false, false, false, false, false, false, // 50 - 57 - false, false, false, false, false, false, false, false, // 58 - 5F - false, false, false, false, false, false, false, false, // 60 - 67 - false, false, false, false, false, false, false, false, // 68 - 6F - false, false, false, false, false, false, false, false, // 70 - 77 - false, false, false, false, false, false, false, false, // 78 - 7F - false, false, false, false, false, false, false, false, // 80 - 87 - false, false, false, false, true, true, true, true, // 88 - 8F - true, true, true, true, true, true, true, true, // 90 - 97 - true, true, true, true, true, true, true, true, // 98 - 9F - true, true, true, true, true, true, true, true, // A0 - A7 - true, true, false, false, false, false, false, false, // A8 - AF - true, true, true, true, true, true, true, true, // B0 - B7 - true, true, true, true, true, true, true, true, // B8 - BF - true, true, true, true, true, true, true, true, // C0 - C7 - true, true, true, true, true, true, true, true, // C8 - CF - true, true, true, true, true, true, true, true, // D0 - D7 - true, true, true, true, true, true, true, true, // D8 - DF - true, true, true, true, true, true, true, true, // E0 - E7 - true, true, true, true, true, true, true, true, // E8 - EF - true, true, true, true, true, true, true, true, // F0 - F7 - false, false, false, false, false, false, false, false, // F8 - FF - }; - - public ByteToCharCp1381() { - super(); - super.mask1 = 0xFFE0; - super.mask2 = 0x001F; - super.shift = 5; - super.leadByte = this.leadByte; - super.singleByteToChar = nioCoder.getDecoderSingleByteMappings(); - super.index1 = nioCoder.getDecoderIndex1(); - super.index2 = nioCoder.getDecoderIndex2(); - } + public ByteToCharCp1381() { + super((DoubleByte.Decoder)new IBM1381().newDecoder()); + } } diff --git a/src/share/classes/sun/io/ByteToCharCp1383.java b/src/share/classes/sun/io/ByteToCharCp1383.java index d9c96f38d..6c65a9cd3 100644 --- a/src/share/classes/sun/io/ByteToCharCp1383.java +++ b/src/share/classes/sun/io/ByteToCharCp1383.java @@ -24,31 +24,16 @@ */ package sun.io; -import sun.nio.cs.ext.IBM1383; +import sun.nio.cs.ext.*; -/** -* A table to convert Cp1383 to Unicode -* -* @author Malcolm Ayres, assisted by UniMap program -*/ -public class ByteToCharCp1383 - extends ByteToCharEUC +public class ByteToCharCp1383 extends ByteToCharEUC2 { -{ + // Return the character set id + public String getCharacterEncoding() { + return "Cp1383"; + } - private final static IBM1383 nioCoder = new IBM1383(); - - // Return the character set id - public String getCharacterEncoding() - { - return "Cp1383"; - } - - public ByteToCharCp1383() - { - // Set the correct mapping table - super(); - super.byteToCharTable = nioCoder.getDecoderSingleByteMappings(); - super.mappingTableG1 = nioCoder.getDecoderMappingTableG1(); - } + public ByteToCharCp1383() { + super((DoubleByte.Decoder)new IBM1383().newDecoder()); + } } diff --git a/src/share/classes/sun/io/ByteToCharCp834.java b/src/share/classes/sun/io/ByteToCharCp834.java index 1bb6e54f8..3d8d8dfeb 100644 --- a/src/share/classes/sun/io/ByteToCharCp834.java +++ b/src/share/classes/sun/io/ByteToCharCp834.java @@ -24,131 +24,15 @@ */ package sun.io; -import sun.nio.cs.ext.IBM933; +import sun.nio.cs.ext.*; + +public class ByteToCharCp834 extends ByteToCharDBCS_ASCII { -public class ByteToCharCp834 extends ByteToCharDBCS_ONLY_EBCDIC { public String getCharacterEncoding() { return "Cp834"; } public ByteToCharCp834() { - super(); - super.mask1 = 0xFFF0; - super.mask2 = 0x000F; - super.shift = 4; - super.index1 = IBM933.getDecoderIndex1(); - super.index2 = IBM933.getDecoderIndex2(); - } -} - -abstract class ByteToCharDBCS_ONLY_EBCDIC extends ByteToCharConverter { - private boolean savedBytePresent; - private byte savedByte; - - protected short index1[]; - protected String index2; - protected int mask1; - protected int mask2; - protected int shift; - - public ByteToCharDBCS_ONLY_EBCDIC() { - super(); - savedBytePresent = false; - } - - public int flush(char [] output, int outStart, int outEnd) - throws MalformedInputException - { - if (savedBytePresent) { - reset(); - badInputLength = 0; - throw new MalformedInputException(); - } - reset(); - return 0; - } - - - /** - * Character conversion - */ - public int convert(byte[] input, int inOff, int inEnd, - char[] output, int outOff, int outEnd) - throws UnknownCharacterException, MalformedInputException, - ConversionBufferFullException - { - int inputSize; - char outputChar = '\uFFFD'; - - charOff = outOff; - byteOff = inOff; - - while(byteOff < inEnd) { - int byte1, byte2; - int v; - - if (!savedBytePresent) { - byte1 = input[byteOff] & 0xff; - inputSize = 1; - } else { - byte1 = savedByte; - savedBytePresent = false; - inputSize = 0; - } - - // valid range of 1st bytes - if (byte1 < 0x40 || byte1 > 0xfe) { - badInputLength = 1; - throw new MalformedInputException(); - } - - if (byteOff + inputSize >= inEnd) { - // We have been split in the middle if a character - // save the first byte for next time around - savedByte = (byte)byte1; - savedBytePresent = true; - byteOff += inputSize; - break; - } - - byte2 = input[byteOff+inputSize] & 0xff; - inputSize++; - - // validate the pair of bytes - if ((byte1 != 0x40 || byte2 != 0x40) && - (byte2 < 0x41 || byte2 > 0xfe)) { - badInputLength = 2; - throw new MalformedInputException(); - } - - // Lookup in the two level index - v = byte1 * 256 + byte2; - outputChar = index2.charAt(index1[((v & mask1) >> shift)] - + (v & mask2)); - - if (outputChar == '\uFFFD') { - if (subMode) - outputChar = subChars[0]; - else { - badInputLength = inputSize; - throw new UnknownCharacterException(); - } - } - - if (charOff >= outEnd) - throw new ConversionBufferFullException(); - - output[charOff++] = outputChar; - byteOff += inputSize; - } - return charOff - outOff; - } - - /** - * Resets the converter. - */ - public void reset() { - charOff = byteOff = 0; - savedBytePresent = false; + super((DoubleByte.Decoder)new IBM834().newDecoder()); } } diff --git a/src/share/classes/sun/io/ByteToCharCp930.java b/src/share/classes/sun/io/ByteToCharCp930.java index 00e6c4fe5..78227f96e 100644 --- a/src/share/classes/sun/io/ByteToCharCp930.java +++ b/src/share/classes/sun/io/ByteToCharCp930.java @@ -24,32 +24,16 @@ */ package sun.io; -import sun.nio.cs.ext.IBM930; +import sun.nio.cs.ext.*; -/** -* Tables and data to convert Cp930 to Unicode. -* -* @author Malcolm Ayres, assisted by UniMap program -*/ -public class ByteToCharCp930 - extends ByteToCharDBCS_EBCDIC +public class ByteToCharCp930 extends ByteToCharDBCS_EBCDIC { -{ - private static IBM930 nioCoder = new IBM930(); - // Return the character set id + // Return the character set id + public String getCharacterEncoding() { + return "Cp930"; + } - public String getCharacterEncoding() - { - return "Cp930"; - } - - public ByteToCharCp930() { - super(); - super.mask1 = 0xFFC0; - super.mask2 = 0x003F; - super.shift = 6; - super.singleByteToChar = nioCoder.getDecoderSingleByteMappings(); - super.index1 = nioCoder.getDecoderIndex1(); - super.index2 = nioCoder.getDecoderIndex2(); - } + public ByteToCharCp930() { + super((DoubleByte.Decoder)new IBM930().newDecoder()); + } } diff --git a/src/share/classes/sun/io/ByteToCharCp933.java b/src/share/classes/sun/io/ByteToCharCp933.java index ddf226ba9..763ccd385 100644 --- a/src/share/classes/sun/io/ByteToCharCp933.java +++ b/src/share/classes/sun/io/ByteToCharCp933.java @@ -24,33 +24,16 @@ */ package sun.io; -import sun.nio.cs.ext.IBM933; +import sun.nio.cs.ext.*; -/** -* Tables and data to convert Cp933 to Unicode. -* -* @author Malcolm Ayres, assisted by UniMap program -*/ -public class ByteToCharCp933 - extends ByteToCharDBCS_EBCDIC +public class ByteToCharCp933 extends ByteToCharDBCS_EBCDIC { -{ - private final static IBM933 nioCoder = new IBM933(); + // Return the character set id + public String getCharacterEncoding() { + return "Cp933"; + } - // Return the character set id - public String getCharacterEncoding() - { - return "Cp933"; - } - - - public ByteToCharCp933() { - super(); - super.mask1 = 0xFFF0; - super.mask2 = 0x000F; - super.shift = 4; - super.singleByteToChar = nioCoder.getDecoderSingleByteMappings(); - super.index1 = nioCoder.getDecoderIndex1(); - super.index2 = nioCoder.getDecoderIndex2(); - } + public ByteToCharCp933() { + super((DoubleByte.Decoder)new IBM933().newDecoder()); + } } diff --git a/src/share/classes/sun/io/ByteToCharCp935.java b/src/share/classes/sun/io/ByteToCharCp935.java index d3b5d3e00..b80c5468c 100644 --- a/src/share/classes/sun/io/ByteToCharCp935.java +++ b/src/share/classes/sun/io/ByteToCharCp935.java @@ -24,33 +24,16 @@ */ package sun.io; -import sun.nio.cs.ext.IBM935; +import sun.nio.cs.ext.*; -/** -* Tables and data to convert Cp935 to Unicode. -* -* @author Malcolm Ayres, assisted by UniMap program -*/ -public class ByteToCharCp935 - extends ByteToCharDBCS_EBCDIC +public class ByteToCharCp935 extends ByteToCharDBCS_EBCDIC { -{ - private static IBM935 nioCoder = new IBM935(); + // Return the character set id + public String getCharacterEncoding() { + return "Cp935"; + } - // Return the character set id - public String getCharacterEncoding() - { - return "Cp935"; - } - - - public ByteToCharCp935() { - super(); - super.mask1 = 0xFFC0; - super.mask2 = 0x003F; - super.shift = 6; - super.singleByteToChar = nioCoder.getDecoderByteToCharMappings(); - super.index1 = nioCoder.getDecoderIndex1(); - super.index2 = nioCoder.getDecoderIndex2(); - } + public ByteToCharCp935() { + super((DoubleByte.Decoder)new IBM935().newDecoder()); + } } diff --git a/src/share/classes/sun/io/ByteToCharCp937.java b/src/share/classes/sun/io/ByteToCharCp937.java index 40c45d32a..eb36ca758 100644 --- a/src/share/classes/sun/io/ByteToCharCp937.java +++ b/src/share/classes/sun/io/ByteToCharCp937.java @@ -23,33 +23,17 @@ * have any questions. */ package sun.io; -import sun.nio.cs.ext.IBM937; -/** -* Tables and data to convert Cp937 to Unicode. -* -* @author Malcolm Ayres, assisted by UniMap program -*/ -public class ByteToCharCp937 - extends ByteToCharDBCS_EBCDIC +import sun.nio.cs.ext.*; -{ - private final static IBM937 nioCoder = new IBM937(); +public class ByteToCharCp937 extends ByteToCharDBCS_EBCDIC { - // Return the character set id - public String getCharacterEncoding() - { - return "Cp937"; - } + // Return the character set id + public String getCharacterEncoding() { + return "Cp937"; + } - - public ByteToCharCp937() { - super(); - super.mask1 = 0xFFC0; - super.mask2 = 0x003F; - super.shift = 6; - super.singleByteToChar = nioCoder.getDecoderByteToCharMappings(); - super.index1 = nioCoder.getDecoderIndex1(); - super.index2 = nioCoder.getDecoderIndex2(); - } + public ByteToCharCp937() { + super((DoubleByte.Decoder)new IBM937().newDecoder()); + } } diff --git a/src/share/classes/sun/io/ByteToCharCp939.java b/src/share/classes/sun/io/ByteToCharCp939.java index fae850bbc..4049984f4 100644 --- a/src/share/classes/sun/io/ByteToCharCp939.java +++ b/src/share/classes/sun/io/ByteToCharCp939.java @@ -24,32 +24,16 @@ */ package sun.io; -import sun.nio.cs.ext.IBM939; +import sun.nio.cs.ext.*; -/** -* Tables and data to convert Cp939 to Unicode. -* -* @author Malcolm Ayres, assisted by UniMap program -*/ -public class ByteToCharCp939 - extends ByteToCharDBCS_EBCDIC +public class ByteToCharCp939 extends ByteToCharDBCS_EBCDIC { -{ - private final static IBM939 nioCoder = new IBM939(); + // Return the character set id + public String getCharacterEncoding() { + return "Cp939"; + } - // Return the character set id - public String getCharacterEncoding() - { - return "Cp939"; - } - - public ByteToCharCp939() { - super(); - super.mask1 = 0xFFC0; - super.mask2 = 0x003F; - super.shift = 6; - super.singleByteToChar = nioCoder.getDecoderByteToCharMappings(); - super.index1 = nioCoder.getDecoderIndex1(); - super.index2 = nioCoder.getDecoderIndex2(); - } + public ByteToCharCp939() { + super((DoubleByte.Decoder)new IBM939().newDecoder()); + } } diff --git a/src/share/classes/sun/io/ByteToCharCp942.java b/src/share/classes/sun/io/ByteToCharCp942.java index 8d7a3a1f7..dfaaee79c 100644 --- a/src/share/classes/sun/io/ByteToCharCp942.java +++ b/src/share/classes/sun/io/ByteToCharCp942.java @@ -24,105 +24,16 @@ */ package sun.io; -import sun.nio.cs.ext.IBM942; +import sun.nio.cs.ext.*; -/** -* Tables and data to convert Cp942 to Unicode. -* -* @author Malcolm Ayres, assisted by UniMap program -*/ -public class ByteToCharCp942 - extends ByteToCharDBCS_ASCII +public class ByteToCharCp942 extends ByteToCharDBCS_ASCII { -{ - private static IBM942 nioCoder = new IBM942(); + // Return the character set id + public String getCharacterEncoding() { + return "Cp942"; + } - // Return the character set id - public String getCharacterEncoding() - { - return "Cp942"; - } - - - private static final boolean leadByte[] = { - false, false, false, false, false, false, false, false, // 00 - 07 - false, false, false, false, false, false, false, false, // 08 - 0F - false, false, false, false, false, false, false, false, // 10 - 17 - false, false, false, false, false, false, false, false, // 18 - 1F - false, false, false, false, false, false, false, false, // 20 - 27 - false, false, false, false, false, false, false, false, // 28 - 2F - false, false, false, false, false, false, false, false, // 30 - 37 - false, false, false, false, false, false, false, false, // 38 - 3F - false, false, false, false, false, false, false, false, // 40 - 47 - false, false, false, false, false, false, false, false, // 48 - 4F - false, false, false, false, false, false, false, false, // 50 - 57 - false, false, false, false, false, false, false, false, // 58 - 5F - false, false, false, false, false, false, false, false, // 60 - 67 - false, false, false, false, false, false, false, false, // 68 - 6F - false, false, false, false, false, false, false, false, // 70 - 77 - false, false, false, false, false, false, false, false, // 78 - 7F - false, true, true, true, true, false, false, false, // 80 - 87 - true, true, true, true, true, true, true, true, // 88 - 8F - true, true, true, true, true, true, true, true, // 90 - 97 - true, true, true, true, true, true, true, true, // 98 - 9F - false, false, false, false, false, false, false, false, // A0 - A7 - false, false, false, false, false, false, false, false, // A8 - AF - false, false, false, false, false, false, false, false, // B0 - B7 - false, false, false, false, false, false, false, false, // B8 - BF - false, false, false, false, false, false, false, false, // C0 - C7 - false, false, false, false, false, false, false, false, // C8 - CF - false, false, false, false, false, false, false, false, // D0 - D7 - false, false, false, false, false, false, false, false, // D8 - DF - true, true, true, true, true, true, true, true, // E0 - E7 - true, true, true, false, false, false, false, false, // E8 - EF - true, true, true, true, true, true, true, true, // F0 - F7 - true, true, true, true, true, false, false, false, // F8 - FF - }; - - - private static final String singleByteToChar = - "\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007" + - "\u0008\u0009\n\u000B\u000C\r\u000E\u000F" + - "\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017" + - "\u0018\u0019\u001C\u001B\u007F\u001D\u001E\u001F" + - "\u0020\u0021\"\u0023\u0024\u0025\u0026\u0027" + - "\u0028\u0029\u002A\u002B\u002C\u002D\u002E\u002F" + - "\u0030\u0031\u0032\u0033\u0034\u0035\u0036\u0037" + - "\u0038\u0039\u003A\u003B\u003C\u003D\u003E\u003F" + - "\u0040\u0041\u0042\u0043\u0044\u0045\u0046\u0047" + - "\u0048\u0049\u004A\u004B\u004C\u004D\u004E\u004F" + - "\u0050\u0051\u0052\u0053\u0054\u0055\u0056\u0057" + - "\u0058\u0059\u005A\u005B\u00A5\u005D\u005E\u005F" + - "\u0060\u0061\u0062\u0063\u0064\u0065\u0066\u0067" + - "\u0068\u0069\u006A\u006B\u006C\u006D\u006E\u006F" + - "\u0070\u0071\u0072\u0073\u0074\u0075\u0076\u0077" + - "\u0078\u0079\u007A\u007B\u007C\u007D\u203E\u001A" + - "\u00A2\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD" + - "\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD" + - "\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD" + - "\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD" + - "\u00A3\uFF61\uFF62\uFF63\uFF64\uFF65\uFF66\uFF67" + - "\uFF68\uFF69\uFF6A\uFF6B\uFF6C\uFF6D\uFF6E\uFF6F" + - "\uFF70\uFF71\uFF72\uFF73\uFF74\uFF75\uFF76\uFF77" + - "\uFF78\uFF79\uFF7A\uFF7B\uFF7C\uFF7D\uFF7E\uFF7F" + - "\uFF80\uFF81\uFF82\uFF83\uFF84\uFF85\uFF86\uFF87" + - "\uFF88\uFF89\uFF8A\uFF8B\uFF8C\uFF8D\uFF8E\uFF8F" + - "\uFF90\uFF91\uFF92\uFF93\uFF94\uFF95\uFF96\uFF97" + - "\uFF98\uFF99\uFF9A\uFF9B\uFF9C\uFF9D\uFF9E\uFF9F" + - "\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD" + - "\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD" + - "\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD" + - "\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\u00AC\\\u007E" - ; - - public ByteToCharCp942() { - super(); - super.mask1 = 0xFFC0; - super.mask2 = 0x003F; - super.shift = 6; - super.leadByte = this.leadByte; - super.singleByteToChar = this.singleByteToChar; - super.index1 = nioCoder.getDecoderIndex1(); - super.index2 = nioCoder.getDecoderIndex2(); - } + public ByteToCharCp942() { + super((DoubleByte.Decoder)new IBM942().newDecoder()); + } } diff --git a/src/share/classes/sun/io/ByteToCharCp942C.java b/src/share/classes/sun/io/ByteToCharCp942C.java index 8589c8108..ce4ca3427 100644 --- a/src/share/classes/sun/io/ByteToCharCp942C.java +++ b/src/share/classes/sun/io/ByteToCharCp942C.java @@ -25,44 +25,16 @@ package sun.io; -import sun.io.*; +import sun.nio.cs.ext.*; public class ByteToCharCp942C extends ByteToCharDBCS_ASCII { - protected static final String singleByteToChar; - protected static final boolean leadByte[]; - protected static final short index1[]; - protected static final String index2; - protected static final int mask1; - protected static final int mask2; - protected static final int shift; - static { - ByteToCharDBCS_ASCII y = new ByteToCharCp942(); - mask1 = y.mask1; - mask2 = y.mask2; - shift = y.shift; - leadByte = y.leadByte; - index1 = y.index1; - index2 = y.index2; + // Return the character set id + public String getCharacterEncoding() { + return "Cp942C"; + } - /* Fix converter to pass through 0x00 to 0x7f unchanged to U+0000 to U+007F */ - String indexs = ""; - for (char c = '\0'; c < '\u0080'; ++c) indexs += c; - singleByteToChar = indexs + y.singleByteToChar.substring(indexs.length()); - } - - public String getCharacterEncoding() { - return "Cp942C"; - } - - ByteToCharCp942C() { - super(); - super.mask1 = mask1; - super.mask2 = mask2; - super.shift = shift; - super.leadByte = leadByte; - super.singleByteToChar = singleByteToChar; - super.index1 = index1; - super.index2 = index2; - } + public ByteToCharCp942C() { + super((DoubleByte.Decoder)new IBM942C().newDecoder()); + } } diff --git a/src/share/classes/sun/io/ByteToCharCp943.java b/src/share/classes/sun/io/ByteToCharCp943.java index 90070e8ca..7df9e0da7 100644 --- a/src/share/classes/sun/io/ByteToCharCp943.java +++ b/src/share/classes/sun/io/ByteToCharCp943.java @@ -22,102 +22,19 @@ * CA 95054 USA or visit www.sun.com if you need additional information or * have any questions. */ -// Table from Cp943 to Unicode -package sun.io; -import sun.nio.cs.ext.IBM943; +package sun.io; -/** - * Tables and data to convert Cp943 to Unicode - * - * @author BuildTable tool - */ +import sun.nio.cs.ext.*; public class ByteToCharCp943 extends ByteToCharDBCS_ASCII { - private static IBM943 nioCoder = new IBM943(); - + // Return the character set id public String getCharacterEncoding() { return "Cp943"; } public ByteToCharCp943() { - super(); - super.leadByte = this.leadByte; - super.singleByteToChar = this.singleByteToChar; - super.index1 = nioCoder.getDecoderIndex1(); - super.index2 = nioCoder.getDecoderIndex2(); - - super.mask1 = 0xFFC0; - super.mask2 = 0x003F; - super.shift = 6; + super((DoubleByte.Decoder)new IBM943().newDecoder()); } - - private static final boolean leadByte[] = { - false, false, false, false, false, false, false, false, // 00 - 07 - false, false, false, false, false, false, false, false, // 08 - 0F - false, false, false, false, false, false, false, false, // 10 - 17 - false, false, false, false, false, false, false, false, // 18 - 1F - false, false, false, false, false, false, false, false, // 20 - 27 - false, false, false, false, false, false, false, false, // 28 - 2F - false, false, false, false, false, false, false, false, // 30 - 37 - false, false, false, false, false, false, false, false, // 38 - 3F - false, false, false, false, false, false, false, false, // 40 - 47 - false, false, false, false, false, false, false, false, // 48 - 4F - false, false, false, false, false, false, false, false, // 50 - 57 - false, false, false, false, false, false, false, false, // 58 - 5F - false, false, false, false, false, false, false, false, // 60 - 67 - false, false, false, false, false, false, false, false, // 68 - 6F - false, false, false, false, false, false, false, false, // 70 - 77 - false, false, false, false, false, false, false, false, // 78 - 7F - false, true, true, true, true, false, false, true, // 80 - 87 - true, true, true, true, true, true, true, true, // 88 - 8F - true, true, true, true, true, true, true, true, // 90 - 97 - true, true, true, true, true, true, true, true, // 98 - 9F - false, false, false, false, false, false, false, false, // A0 - A7 - false, false, false, false, false, false, false, false, // A8 - AF - false, false, false, false, false, false, false, false, // B0 - B7 - false, false, false, false, false, false, false, false, // B8 - BF - false, false, false, false, false, false, false, false, // C0 - C7 - false, false, false, false, false, false, false, false, // C8 - CF - false, false, false, false, false, false, false, false, // D0 - D7 - false, false, false, false, false, false, false, false, // D8 - DF - true, true, true, true, true, true, true, true, // E0 - E7 - true, true, true, true, true, true, true, false, // E8 - EF - true, true, true, true, true, true, true, true, // F0 - F7 - true, true, true, true, true, false, false, false, // F8 - FF - }; - static final String singleByteToChar = - "\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007"+ // 0-7 - "\u0008\u0009\n\u000B\u000C\r\u000E\u000F"+ // 8-F - "\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017"+ // 10-17 - "\u0018\u0019\u001C\u001B\u007F\u001D\u001E\u001F"+ // 18-1F - "\u0020\u0021\"\u0023\u0024\u0025\u0026\u0027"+ // 20-27 - "\u0028\u0029\u002A\u002B\u002C\u002D\u002E\u002F"+ // 28-2F - "\u0030\u0031\u0032\u0033\u0034\u0035\u0036\u0037"+ // 30-37 - "\u0038\u0039\u003A\u003B\u003C\u003D\u003E\u003F"+ // 38-3F - "\u0040\u0041\u0042\u0043\u0044\u0045\u0046\u0047"+ // 40-47 - "\u0048\u0049\u004A\u004B\u004C\u004D\u004E\u004F"+ // 48-4F - "\u0050\u0051\u0052\u0053\u0054\u0055\u0056\u0057"+ // 50-57 - "\u0058\u0059\u005A\u005B\u00A5\u005D\u005E\u005F"+ // 58-5F - "\u0060\u0061\u0062\u0063\u0064\u0065\u0066\u0067"+ // 60-67 - "\u0068\u0069\u006A\u006B\u006C\u006D\u006E\u006F"+ // 68-6F - "\u0070\u0071\u0072\u0073\u0074\u0075\u0076\u0077"+ // 70-77 - "\u0078\u0079\u007A\u007B\u007C\u007D\u203E\u001A"+ // 78-7F - "\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD"+ // 80-87 - "\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD"+ // 88-8F - "\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD"+ // 90-97 - "\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD"+ // 98-9F - "\uFFFD\uFF61\uFF62\uFF63\uFF64\uFF65\uFF66\uFF67"+ // A0-A7 - "\uFF68\uFF69\uFF6A\uFF6B\uFF6C\uFF6D\uFF6E\uFF6F"+ // A8-AF - "\uFF70\uFF71\uFF72\uFF73\uFF74\uFF75\uFF76\uFF77"+ // B0-B7 - "\uFF78\uFF79\uFF7A\uFF7B\uFF7C\uFF7D\uFF7E\uFF7F"+ // B8-BF - "\uFF80\uFF81\uFF82\uFF83\uFF84\uFF85\uFF86\uFF87"+ // C0-C7 - "\uFF88\uFF89\uFF8A\uFF8B\uFF8C\uFF8D\uFF8E\uFF8F"+ // C8-CF - "\uFF90\uFF91\uFF92\uFF93\uFF94\uFF95\uFF96\uFF97"+ // D0-D7 - "\uFF98\uFF99\uFF9A\uFF9B\uFF9C\uFF9D\uFF9E\uFF9F"+ // D8-DF - "\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD"+ // E0-E7 - "\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD"+ // E8-EF - "\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD"+ // F0-F7 - "\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD"; // F8-FF } diff --git a/src/share/classes/sun/io/ByteToCharCp943C.java b/src/share/classes/sun/io/ByteToCharCp943C.java index 1d7b58652..225963840 100644 --- a/src/share/classes/sun/io/ByteToCharCp943C.java +++ b/src/share/classes/sun/io/ByteToCharCp943C.java @@ -25,44 +25,16 @@ package sun.io; -import sun.io.*; +import sun.nio.cs.ext.*; public class ByteToCharCp943C extends ByteToCharDBCS_ASCII { - protected static final String singleByteToChar; - protected static final boolean leadByte[]; - protected static final short index1[]; - protected static final String index2; - protected static final int mask1; - protected static final int mask2; - protected static final int shift; - static { - ByteToCharDBCS_ASCII y = new ByteToCharCp943(); - mask1 = y.mask1; - mask2 = y.mask2; - shift = y.shift; - leadByte = y.leadByte; - index1 = y.index1; - index2 = y.index2; + // Return the character set id + public String getCharacterEncoding() { + return "Cp943C"; + } - /* Fix converter to pass through 0x00 to 0x7f unchanged to U+0000 to U+007F */ - String indexs = ""; - for (char c = '\0'; c < '\u0080'; ++c) indexs += c; - singleByteToChar = indexs + y.singleByteToChar.substring(indexs.length()); - } - - public String getCharacterEncoding() { - return "Cp943C"; - } - - ByteToCharCp943C() { - super(); - super.mask1 = mask1; - super.mask2 = mask2; - super.shift = shift; - super.leadByte = leadByte; - super.singleByteToChar = singleByteToChar; - super.index1 = index1; - super.index2 = index2; - } + public ByteToCharCp943C() { + super((DoubleByte.Decoder)new IBM943C().newDecoder()); + } } diff --git a/src/share/classes/sun/io/ByteToCharCp948.java b/src/share/classes/sun/io/ByteToCharCp948.java index f1f96d3d3..967ff64e6 100644 --- a/src/share/classes/sun/io/ByteToCharCp948.java +++ b/src/share/classes/sun/io/ByteToCharCp948.java @@ -24,105 +24,16 @@ */ package sun.io; -import sun.nio.cs.ext.IBM948; +import sun.nio.cs.ext.*; -/** -* Tables and data to convert Cp948 to Unicode. -* -* @author Malcolm Ayres, assisted by UniMap program -*/ -public class ByteToCharCp948 - extends ByteToCharDBCS_ASCII +public class ByteToCharCp948 extends ByteToCharDBCS_ASCII { -{ - private static IBM948 nioCoder = new IBM948(); + // Return the character set id + public String getCharacterEncoding() { + return "Cp948"; + } - // Return the character set id - public String getCharacterEncoding() - { - return "Cp948"; - } - - - private static final boolean leadByte[] = { - false, false, false, false, false, false, false, false, // 00 - 07 - false, false, false, false, false, false, false, false, // 08 - 0F - false, false, false, false, false, false, false, false, // 10 - 17 - false, false, false, false, false, false, false, false, // 18 - 1F - false, false, false, false, false, false, false, false, // 20 - 27 - false, false, false, false, false, false, false, false, // 28 - 2F - false, false, false, false, false, false, false, false, // 30 - 37 - false, false, false, false, false, false, false, false, // 38 - 3F - false, false, false, false, false, false, false, false, // 40 - 47 - false, false, false, false, false, false, false, false, // 48 - 4F - false, false, false, false, false, false, false, false, // 50 - 57 - false, false, false, false, false, false, false, false, // 58 - 5F - false, false, false, false, false, false, false, false, // 60 - 67 - false, false, false, false, false, false, false, false, // 68 - 6F - false, false, false, false, false, false, false, false, // 70 - 77 - false, false, false, false, false, false, false, false, // 78 - 7F - false, true, true, true, true, false, true, true, // 80 - 87 - true, true, true, true, true, true, true, true, // 88 - 8F - true, true, true, true, true, true, true, true, // 90 - 97 - true, true, true, true, true, true, true, true, // 98 - 9F - true, true, true, true, true, true, true, true, // A0 - A7 - true, true, true, true, true, true, true, true, // A8 - AF - true, true, true, true, true, true, true, true, // B0 - B7 - true, true, true, true, true, true, true, true, // B8 - BF - true, true, true, true, true, true, true, true, // C0 - C7 - true, true, true, true, true, true, true, true, // C8 - CF - true, true, false, false, false, false, false, false, // D0 - D7 - false, false, false, true, true, true, true, true, // D8 - DF - true, true, true, true, true, true, true, true, // E0 - E7 - true, true, true, true, true, true, true, true, // E8 - EF - true, true, true, true, true, true, true, true, // F0 - F7 - true, true, true, true, false, false, false, false, // F8 - FF - }; - - - private static final String singleByteToChar = - "\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007" + - "\u0008\u0009\n\u000B\u000C\r\u000E\u000F" + - "\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017" + - "\u0018\u0019\u001A\u001B\u001C\u001D\u001E\u001F" + - "\u0020\u0021\"\u0023\u0024\u0025\u0026\u0027" + - "\u0028\u0029\u002A\u002B\u002C\u002D\u002E\u002F" + - "\u0030\u0031\u0032\u0033\u0034\u0035\u0036\u0037" + - "\u0038\u0039\u003A\u003B\u003C\u003D\u003E\u003F" + - "\u0040\u0041\u0042\u0043\u0044\u0045\u0046\u0047" + - "\u0048\u0049\u004A\u004B\u004C\u004D\u004E\u004F" + - "\u0050\u0051\u0052\u0053\u0054\u0055\u0056\u0057" + - "\u0058\u0059\u005A\u005B\\\u005D\u005E\u005F" + - "\u0060\u0061\u0062\u0063\u0064\u0065\u0066\u0067" + - "\u0068\u0069\u006A\u006B\u006C\u006D\u006E\u006F" + - "\u0070\u0071\u0072\u0073\u0074\u0075\u0076\u0077" + - "\u0078\u0079\u007A\u007B\u007C\u007D\u007E\u007F" + - "\u00A2\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD" + - "\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD" + - "\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD" + - "\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD" + - "\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD" + - "\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD" + - "\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD" + - "\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD" + - "\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD" + - "\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD" + - "\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD" + - "\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD" + - "\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD" + - "\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD" + - "\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD" + - "\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\u00AC\u00A6\uFFFD" - ; - - public ByteToCharCp948() { - super(); - super.mask1 = 0xFFC0; - super.mask2 = 0x003F; - super.shift = 6; - super.leadByte = this.leadByte; - super.singleByteToChar = this.singleByteToChar; - super.index1 = nioCoder.getDecoderIndex1(); - super.index2 = nioCoder.getDecoderIndex2(); - } + public ByteToCharCp948() { + super((DoubleByte.Decoder)new IBM948().newDecoder()); + } } diff --git a/src/share/classes/sun/io/ByteToCharCp949.java b/src/share/classes/sun/io/ByteToCharCp949.java index 487eb432d..21ec5a4f4 100644 --- a/src/share/classes/sun/io/ByteToCharCp949.java +++ b/src/share/classes/sun/io/ByteToCharCp949.java @@ -24,70 +24,16 @@ */ package sun.io; -import sun.nio.cs.ext.IBM949; +import sun.nio.cs.ext.*; -/** -* Tables and data to convert Cp949 to Unicode. -* -* @author Malcolm Ayres, assisted by UniMap program -*/ -public class ByteToCharCp949 - extends ByteToCharDBCS_ASCII +public class ByteToCharCp949 extends ByteToCharDBCS_ASCII { -{ - private final static IBM949 nioCoder = new IBM949(); + // Return the character set id + public String getCharacterEncoding() { + return "Cp949"; + } - // Return the character set id - public String getCharacterEncoding() - { - return "Cp949"; - } - - - private static final boolean leadByte[] = { - false, false, false, false, false, false, false, false, // 00 - 07 - false, false, false, false, false, false, false, false, // 08 - 0F - false, false, false, false, false, false, false, false, // 10 - 17 - false, false, false, false, false, false, false, false, // 18 - 1F - false, false, false, false, false, false, false, false, // 20 - 27 - false, false, false, false, false, false, false, false, // 28 - 2F - false, false, false, false, false, false, false, false, // 30 - 37 - false, false, false, false, false, false, false, false, // 38 - 3F - false, false, false, false, false, false, false, false, // 40 - 47 - false, false, false, false, false, false, false, false, // 48 - 4F - false, false, false, false, false, false, false, false, // 50 - 57 - false, false, false, false, false, false, false, false, // 58 - 5F - false, false, false, false, false, false, false, false, // 60 - 67 - false, false, false, false, false, false, false, false, // 68 - 6F - false, false, false, false, false, false, false, false, // 70 - 77 - false, false, false, false, false, false, false, false, // 78 - 7F - false, false, false, false, false, false, false, false, // 80 - 87 - false, false, false, false, false, false, false, true, // 88 - 8F - true, true, true, true, true, true, true, true, // 90 - 97 - true, true, true, true, true, true, true, true, // 98 - 9F - true, true, true, true, true, true, true, true, // A0 - A7 - true, true, true, true, true, false, false, false, // A8 - AF - true, true, true, true, true, true, true, true, // B0 - B7 - true, true, true, true, true, true, true, true, // B8 - BF - true, true, true, true, true, true, true, true, // C0 - C7 - true, true, true, true, true, true, true, true, // C8 - CF - true, true, true, true, true, true, true, true, // D0 - D7 - true, true, true, true, true, true, true, true, // D8 - DF - true, true, true, true, true, true, true, true, // E0 - E7 - true, true, true, true, true, true, true, true, // E8 - EF - true, true, true, true, true, true, true, true, // F0 - F7 - true, true, true, true, true, true, true, false, // F8 - FF - }; - - - public ByteToCharCp949() { - super(); - super.mask1 = 0xFFE0; - super.mask2 = 0x001F; - super.shift = 5; - super.leadByte = this.leadByte; - super.singleByteToChar = nioCoder.getDecoderSingleByteMappings(); - super.index1 = nioCoder.getDecoderIndex1(); - super.index2 = nioCoder.getDecoderIndex2(); - } + public ByteToCharCp949() { + super((DoubleByte.Decoder)new IBM949().newDecoder()); + } } diff --git a/src/share/classes/sun/io/ByteToCharCp949C.java b/src/share/classes/sun/io/ByteToCharCp949C.java index b6a477a61..ed0cda924 100644 --- a/src/share/classes/sun/io/ByteToCharCp949C.java +++ b/src/share/classes/sun/io/ByteToCharCp949C.java @@ -25,44 +25,16 @@ package sun.io; -import sun.io.*; +import sun.nio.cs.ext.*; public class ByteToCharCp949C extends ByteToCharDBCS_ASCII { - protected static final String singleByteToChar; - protected static final boolean leadByte[]; - protected static final short index1[]; - protected static final String index2; - protected static final int mask1; - protected static final int mask2; - protected static final int shift; - static { - ByteToCharDBCS_ASCII y = new ByteToCharCp949(); - mask1 = y.mask1; - mask2 = y.mask2; - shift = y.shift; - leadByte = y.leadByte; - index1 = y.index1; - index2 = y.index2; + // Return the character set id + public String getCharacterEncoding() { + return "Cp949C"; + } - /* Fix converter to pass through 0x00 to 0x7f unchanged to U+0000 to U+007F */ - String indexs = ""; - for (char c = '\0'; c < '\u0080'; ++c) indexs += c; - singleByteToChar = indexs + y.singleByteToChar.substring(indexs.length()); - } - - public String getCharacterEncoding() { - return "Cp949C"; - } - - ByteToCharCp949C() { - super(); - super.mask1 = mask1; - super.mask2 = mask2; - super.shift = shift; - super.leadByte = leadByte; - super.singleByteToChar = singleByteToChar; - super.index1 = index1; - super.index2 = index2; - } + public ByteToCharCp949C() { + super((DoubleByte.Decoder)new IBM949C().newDecoder()); + } } diff --git a/src/share/classes/sun/io/ByteToCharCp950.java b/src/share/classes/sun/io/ByteToCharCp950.java index cdbcd5be5..1e214efd5 100644 --- a/src/share/classes/sun/io/ByteToCharCp950.java +++ b/src/share/classes/sun/io/ByteToCharCp950.java @@ -24,105 +24,16 @@ */ package sun.io; -import sun.nio.cs.ext.IBM950; +import sun.nio.cs.ext.*; -/** -* Tables and data to convert Cp950 to Unicode. -* -* @author Malcolm Ayres, assisted by UniMap program -*/ -public class ByteToCharCp950 - extends ByteToCharDBCS_ASCII +public class ByteToCharCp950 extends ByteToCharDBCS_ASCII { -{ - private static IBM950 nioCoder = new IBM950(); + // Return the character set id + public String getCharacterEncoding() { + return "Cp950"; + } - // Return the character set id - public String getCharacterEncoding() - { - return "Cp950"; - } - - - private static final boolean leadByte[] = { - false, false, false, false, false, false, false, false, // 00 - 07 - false, false, false, false, false, false, false, false, // 08 - 0F - false, false, false, false, false, false, false, false, // 10 - 17 - false, false, false, false, false, false, false, false, // 18 - 1F - false, false, false, false, false, false, false, false, // 20 - 27 - false, false, false, false, false, false, false, false, // 28 - 2F - false, false, false, false, false, false, false, false, // 30 - 37 - false, false, false, false, false, false, false, false, // 38 - 3F - false, false, false, false, false, false, false, false, // 40 - 47 - false, false, false, false, false, false, false, false, // 48 - 4F - false, false, false, false, false, false, false, false, // 50 - 57 - false, false, false, false, false, false, false, false, // 58 - 5F - false, false, false, false, false, false, false, false, // 60 - 67 - false, false, false, false, false, false, false, false, // 68 - 6F - false, false, false, false, false, false, false, false, // 70 - 77 - false, false, false, false, false, false, false, false, // 78 - 7F - false, true, true, true, true, true, true, true, // 80 - 87 - true, true, true, true, true, true, true, true, // 88 - 8F - true, true, true, true, true, true, true, true, // 90 - 97 - true, true, true, true, true, true, true, true, // 98 - 9F - true, true, true, true, true, true, true, true, // A0 - A7 - true, true, true, true, true, true, true, true, // A8 - AF - true, true, true, true, true, true, true, true, // B0 - B7 - true, true, true, true, true, true, true, true, // B8 - BF - true, true, true, true, true, true, true, true, // C0 - C7 - true, true, true, true, true, true, true, true, // C8 - CF - true, true, true, true, true, true, true, true, // D0 - D7 - true, true, true, true, true, true, true, true, // D8 - DF - true, true, true, true, true, true, true, true, // E0 - E7 - true, true, true, true, true, true, true, true, // E8 - EF - true, true, true, true, true, true, true, true, // F0 - F7 - true, true, true, true, true, true, true, false, // F8 - FF - }; - - - private static final String singleByteToChar = - "\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007" + - "\u0008\u0009\n\u000B\u000C\r\u000E\u000F" + - "\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017" + - "\u0018\u0019\u001A\u001B\u001C\u001D\u001E\u001F" + - "\u0020\u0021\"\u0023\u0024\u0025\u0026\u0027" + - "\u0028\u0029\u002A\u002B\u002C\u002D\u002E\u002F" + - "\u0030\u0031\u0032\u0033\u0034\u0035\u0036\u0037" + - "\u0038\u0039\u003A\u003B\u003C\u003D\u003E\u003F" + - "\u0040\u0041\u0042\u0043\u0044\u0045\u0046\u0047" + - "\u0048\u0049\u004A\u004B\u004C\u004D\u004E\u004F" + - "\u0050\u0051\u0052\u0053\u0054\u0055\u0056\u0057" + - "\u0058\u0059\u005A\u005B\\\u005D\u005E\u005F" + - "\u0060\u0061\u0062\u0063\u0064\u0065\u0066\u0067" + - "\u0068\u0069\u006A\u006B\u006C\u006D\u006E\u006F" + - "\u0070\u0071\u0072\u0073\u0074\u0075\u0076\u0077" + - "\u0078\u0079\u007A\u007B\u007C\u007D\u007E\u007F" + - "\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD" + - "\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD" + - "\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD" + - "\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD" + - "\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD" + - "\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD" + - "\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD" + - "\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD" + - "\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD" + - "\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD" + - "\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD" + - "\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD" + - "\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD" + - "\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD" + - "\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD" + - "\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD" - ; - - public ByteToCharCp950() { - super(); - super.mask1 = 0xFFE0; - super.mask2 = 0x001F; - super.shift = 5; - super.leadByte = this.leadByte; - super.singleByteToChar = this.singleByteToChar; - super.index1 = nioCoder.getDecoderIndex1(); - super.index2 = nioCoder.getDecoderIndex2(); - } + public ByteToCharCp950() { + super((DoubleByte.Decoder)new IBM950().newDecoder()); + } } diff --git a/src/share/classes/sun/io/ByteToCharCp970.java b/src/share/classes/sun/io/ByteToCharCp970.java index de2cb2c25..562d14a9b 100644 --- a/src/share/classes/sun/io/ByteToCharCp970.java +++ b/src/share/classes/sun/io/ByteToCharCp970.java @@ -24,30 +24,16 @@ */ package sun.io; -import sun.nio.cs.ext.IBM970; +import sun.nio.cs.ext.*; -/** -* A table to convert Cp970 to Unicode -* -* @author Malcolm Ayres, assisted by UniMap program -*/ -public class ByteToCharCp970 - extends ByteToCharEUC +public class ByteToCharCp970 extends ByteToCharEUC2 { -{ - private final static IBM970 nioCoder = new IBM970(); + // Return the character set id + public String getCharacterEncoding() { + return "Cp970"; + } - // Return the character set id - public String getCharacterEncoding() - { - return "Cp970"; - } - - public ByteToCharCp970() - { - // Set the correct mapping table - super(); - super.byteToCharTable = nioCoder.getDecoderSingleByteMappings(); - super.mappingTableG1 = nioCoder.getDecoderMappingTableG1(); - } + public ByteToCharCp970() { + super((DoubleByte.Decoder)new IBM970().newDecoder()); + } } diff --git a/src/share/classes/sun/io/ByteToCharDBCS_ASCII.java b/src/share/classes/sun/io/ByteToCharDBCS_ASCII.java index 9a0ed24c1..34bcd7460 100644 --- a/src/share/classes/sun/io/ByteToCharDBCS_ASCII.java +++ b/src/share/classes/sun/io/ByteToCharDBCS_ASCII.java @@ -24,23 +24,28 @@ */ package sun.io; +import sun.nio.cs.ext.DoubleByte; +import static sun.nio.cs.CharsetMapping.*; + public abstract class ByteToCharDBCS_ASCII extends ByteToCharConverter { private boolean savedBytePresent; - private byte savedByte; - - protected String singleByteToChar; - protected boolean leadByte[]; - protected short index1[]; - protected String index2; - protected int mask1; - protected int mask2; - protected int shift; + private int savedByte; + private DoubleByte.Decoder dec; - public ByteToCharDBCS_ASCII() { + public ByteToCharDBCS_ASCII(DoubleByte.Decoder dec) { super(); savedBytePresent = false; + this.dec = dec; + } + + char decodeSingle(int b) { + return dec.decodeSingle(b); + } + + char decodeDouble(int b1, int b2) { + return dec.decodeDouble(b1, b2); } public int flush(char [] output, int outStart, int outEnd) @@ -66,18 +71,17 @@ public abstract class ByteToCharDBCS_ASCII extends ByteToCharConverter ConversionBufferFullException { int inputSize; - char outputChar = '\uFFFD'; + char outputChar = UNMAPPABLE_DECODING; charOff = outOff; byteOff = inOff; while(byteOff < inEnd) { - int byte1, byte2; - int v; + int byte1; if (!savedBytePresent) { - byte1 = input[byteOff]; + byte1 = input[byteOff] & 0xff; inputSize = 1; } else { byte1 = savedByte; @@ -85,33 +89,21 @@ public abstract class ByteToCharDBCS_ASCII extends ByteToCharConverter inputSize = 0; } - if (byte1 < 0) - byte1 += 256; - - if (!leadByte[byte1]) - { - outputChar = singleByteToChar.charAt(byte1); - } else { + outputChar = decodeSingle(byte1); + if (outputChar == UNMAPPABLE_DECODING) { if (byteOff + inputSize >= inEnd) { - savedByte = (byte)byte1; + savedByte = byte1; savedBytePresent = true; byteOff += inputSize; break; } - byte2 = input[byteOff+inputSize]; - if (byte2 < 0) - byte2 += 256; - + outputChar = decodeDouble(byte1, input[byteOff+inputSize] & 0xff); inputSize++; - - // Lookup in the two level index - v = byte1 * 256 + byte2; - outputChar = index2.charAt(index1[((v & mask1) >> shift)] + (v & mask2)); } - if (outputChar == '\uFFFD') { + if (outputChar == UNMAPPABLE_DECODING) { if (subMode) outputChar = subChars[0]; else { diff --git a/src/share/classes/sun/io/ByteToCharDBCS_EBCDIC.java b/src/share/classes/sun/io/ByteToCharDBCS_EBCDIC.java index cb022b14a..d8d2596a1 100644 --- a/src/share/classes/sun/io/ByteToCharDBCS_EBCDIC.java +++ b/src/share/classes/sun/io/ByteToCharDBCS_EBCDIC.java @@ -24,6 +24,9 @@ */ package sun.io; +import sun.nio.cs.ext.DoubleByte; +import static sun.nio.cs.CharsetMapping.*; + public abstract class ByteToCharDBCS_EBCDIC extends ByteToCharConverter { @@ -35,20 +38,23 @@ public abstract class ByteToCharDBCS_EBCDIC extends ByteToCharConverter private int currentState; private boolean savedBytePresent; - private byte savedByte; - - protected String singleByteToChar; - protected short index1[]; - protected String index2; - protected int mask1; - protected int mask2; - protected int shift; + private int savedByte; + private DoubleByte.Decoder dec; - public ByteToCharDBCS_EBCDIC() { + public ByteToCharDBCS_EBCDIC(DoubleByte.Decoder dec) { super(); currentState = SBCS; savedBytePresent = false; + this.dec = dec; + } + + char decodeSingle(int b) { + return dec.decodeSingle(b); + } + + char decodeDouble(int b1, int b2) { + return dec.decodeDouble(b1, b2); } public int flush(char [] output, int outStart, int outEnd) @@ -74,17 +80,16 @@ public abstract class ByteToCharDBCS_EBCDIC extends ByteToCharConverter ConversionBufferFullException { int inputSize; - char outputChar = '\uFFFD'; + char outputChar = UNMAPPABLE_DECODING; charOff = outOff; byteOff = inOff; while(byteOff < inEnd) { int byte1, byte2; - int v; if (!savedBytePresent) { - byte1 = input[byteOff]; + byte1 = input[byteOff] & 0xff; inputSize = 1; } else { byte1 = savedByte; @@ -122,11 +127,8 @@ public abstract class ByteToCharDBCS_EBCDIC extends ByteToCharConverter // Process the real data characters - if (byte1 < 0) - byte1 += 256; - if (currentState == SBCS) { - outputChar = singleByteToChar.charAt(byte1); + outputChar = decodeSingle(byte1); } else { // for a DBCS character - architecture dictates the @@ -141,16 +143,13 @@ public abstract class ByteToCharDBCS_EBCDIC extends ByteToCharConverter // We have been split in the middle if a character // save the first byte for next time around - savedByte = (byte)byte1; + savedByte = byte1; savedBytePresent = true; byteOff += inputSize; break; } - byte2 = input[byteOff+inputSize]; - if (byte2 < 0) - byte2 += 256; - + byte2 = input[byteOff+inputSize] & 0xff; inputSize++; // validate the pair of bytes meet the architecture @@ -161,12 +160,10 @@ public abstract class ByteToCharDBCS_EBCDIC extends ByteToCharConverter throw new MalformedInputException(); } - // Lookup in the two level index - v = byte1 * 256 + byte2; - outputChar = index2.charAt(index1[((v & mask1) >> shift)] + (v & mask2)); + outputChar = decodeDouble(byte1, byte2); } - if (outputChar == '\uFFFD') { + if (outputChar == UNMAPPABLE_DECODING) { if (subMode) outputChar = subChars[0]; else { diff --git a/src/share/classes/sun/io/ByteToCharEUC2.java b/src/share/classes/sun/io/ByteToCharEUC2.java new file mode 100644 index 000000000..b9d78d721 --- /dev/null +++ b/src/share/classes/sun/io/ByteToCharEUC2.java @@ -0,0 +1,138 @@ +/* + * Copyright 1997 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Sun designates this + * particular file as subject to the "Classpath" exception as provided + * by Sun in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + */ +package sun.io; + +import sun.nio.cs.ext.DoubleByte; +import static sun.nio.cs.CharsetMapping.*; + +public abstract class ByteToCharEUC2 extends ByteToCharConverter +{ + private final int G0 = 0; + private final int G1 = 1; + private final int SS2 = 0x8E; + private final int SS3 = 0x8F; + + private int firstByte, state; + + private DoubleByte.Decoder dec; + + public ByteToCharEUC2(DoubleByte.Decoder dec) { + super(); + state = G0; + this.dec = dec; + } + + char decodeSingle(int b) { + return dec.decodeSingle(b); + } + + char decodeDouble(int b1, int b2) { + return dec.decodeDouble(b1, b2); + } + + /** + * flush out any residual data and reset the buffer state + */ + public int flush(char[] output, int outStart, int outEnd) + throws MalformedInputException + { + if (state != G0) { + reset(); + badInputLength = 0; + throw new MalformedInputException(); + } + + reset(); + return 0; + } + + /** + * Resets the converter. + */ + public void reset() { + state = G0; + charOff = byteOff = 0; + } + + /** + * Character conversion + */ + public int convert(byte[] input, int inOff, int inEnd, + char[] output, int outOff, int outEnd) + throws UnknownCharacterException, MalformedInputException, + ConversionBufferFullException + { + int byte1; + char outputChar = UNMAPPABLE_DECODING; + byteOff = inOff; + charOff = outOff; + + while (byteOff < inEnd) { + byte1 = input[byteOff] & 0xff; + switch (state) { + case G0: + if (byte1 == SS2 || // no general support + byte1 == SS3 ) { // for g2 or g3 + badInputLength = 1; + throw new MalformedInputException(); + } + if ( byte1 <= 0x9f ) // < 0x9f has its own table + outputChar = decodeSingle(byte1); + else + if (byte1 < 0xa1 || byte1 > 0xfe) { // byte within range? + badInputLength = 1; + throw new MalformedInputException(); + } else { // G1 set first byte + firstByte = byte1; + state = G1; + } + break; + case G1: + state = G0; + if ( byte1 < 0xa1 || byte1 > 0xfe) { // valid G1 set second byte + badInputLength = 1; + throw new MalformedInputException(); + } + outputChar = decodeDouble(firstByte, byte1); + break; + } + if (state == G0) { + if (outputChar == UNMAPPABLE_DECODING) { + if (subMode) + outputChar = subChars[0]; + else { + badInputLength = 1; + throw new UnknownCharacterException(); + } + } + if (charOff >= outEnd) + throw new ConversionBufferFullException(); + output[charOff++] = outputChar; + } + byteOff++; + } + return charOff - outOff; + } +} diff --git a/src/share/classes/sun/io/CharToByteCp1381.java b/src/share/classes/sun/io/CharToByteCp1381.java index ccbdd130e..5db3e195d 100644 --- a/src/share/classes/sun/io/CharToByteCp1381.java +++ b/src/share/classes/sun/io/CharToByteCp1381.java @@ -24,34 +24,16 @@ */ package sun.io; -import sun.nio.cs.ext.IBM1381; +import sun.nio.cs.ext.*; -/** -* Tables and data to convert Unicode to Cp1381 -* -* @author Malcolm Ayres, assisted by UniMap program -*/ -public class CharToByteCp1381 - extends CharToByteDBCS_ASCII +public class CharToByteCp1381 extends CharToByteDBCS_ASCII { -{ + // Return the character set id + public String getCharacterEncoding() { + return "Cp1381"; + } - private final static IBM1381 nioCoder = new IBM1381(); - - // Return the character set id - public String getCharacterEncoding() - { - return "Cp1381"; - } - - public CharToByteCp1381() - { - super(); - super.mask1 = 0xFFE0; - super.mask2 = 0x001F; - super.shift = 5; - super.index1 = nioCoder.getEncoderIndex1(); - super.index2 = nioCoder.getEncoderIndex2(); - super.index2a = nioCoder.getEncoderIndex2a(); - } + public CharToByteCp1381() { + super((DoubleByte.Encoder)new IBM1381().newEncoder()); + } } diff --git a/src/share/classes/sun/io/CharToByteCp1383.java b/src/share/classes/sun/io/CharToByteCp1383.java index 4ca950c1b..27c163659 100644 --- a/src/share/classes/sun/io/CharToByteCp1383.java +++ b/src/share/classes/sun/io/CharToByteCp1383.java @@ -24,34 +24,16 @@ */ package sun.io; -import sun.nio.cs.ext.IBM1383; +import sun.nio.cs.ext.*; -/** -* Tables and data to convert Unicode to Cp1383 -* -* @author Malcolm Ayres, assisted by UniMap program -*/ -public class CharToByteCp1383 - extends CharToByteDBCS_ASCII +public class CharToByteCp1383 extends CharToByteDBCS_ASCII { -{ + // Return the character set id + public String getCharacterEncoding() { + return "Cp1383"; + } - private final static IBM1383 nioCoder = new IBM1383(); - - // Return the character set id - public String getCharacterEncoding() - { - return "Cp1383"; - } - - public CharToByteCp1383() - { - super(); - super.mask1 = 0xFFE0; - super.mask2 = 0x001F; - super.shift = 5; - super.index1 = nioCoder.getEncoderIndex1(); - super.index2 = nioCoder.getEncoderIndex2(); - super.index2a = nioCoder.getEncoderIndex2a(); - } + public CharToByteCp1383() { + super((DoubleByte.Encoder)new IBM1383().newEncoder()); + } } diff --git a/src/share/classes/sun/io/CharToByteCp834.java b/src/share/classes/sun/io/CharToByteCp834.java index 322944873..49f214dfc 100644 --- a/src/share/classes/sun/io/CharToByteCp834.java +++ b/src/share/classes/sun/io/CharToByteCp834.java @@ -24,46 +24,17 @@ */ package sun.io; -import sun.nio.cs.ext.IBM933; +import sun.nio.cs.ext.*; //EBIDIC DBCSONLY Korean -public class CharToByteCp834 extends CharToByteCp933 -{ +public class CharToByteCp834 extends CharToByteDBCS_ASCII { + public CharToByteCp834() { - super(); + super((DoubleByte.Encoder)new IBM834().newEncoder()); subBytes = new byte[] {(byte)0xfe, (byte)0xfe}; } - protected boolean doSBCS() { - return false; - } - - protected int encodeHangul(char ch) { - int theBytes = super.encodeHangul(ch); - if (theBytes == -1) { - // Cp834 has 6 additional non-roundtrip char->bytes - // mappings, see#6379808 - if (ch == '\u00b7') { - return 0x4143; - } else if (ch == '\u00ad') { - return 0x4148; - } else if (ch == '\u2015') { - return 0x4149; - } else if (ch == '\u223c') { - return 0x42a1; - } else if (ch == '\uff5e') { - return 0x4954; - } else if (ch == '\u2299') { - return 0x496f; - } - } else if (((theBytes & 0xff00)>>8) == 0) { - //SBCS, including 0 - return -1; - } - return theBytes; - } - public int getMaxBytesPerChar() { return 2; } diff --git a/src/share/classes/sun/io/CharToByteCp930.java b/src/share/classes/sun/io/CharToByteCp930.java index adc9ec066..5bc3fad83 100644 --- a/src/share/classes/sun/io/CharToByteCp930.java +++ b/src/share/classes/sun/io/CharToByteCp930.java @@ -24,36 +24,16 @@ */ package sun.io; -import sun.nio.cs.ext.IBM930; +import sun.nio.cs.ext.*; -/** -* Tables and data to convert Unicode to Cp930 -* -* @author Malcolm Ayres, assisted by UniMap program -*/ -public class CharToByteCp930 - extends CharToByteDBCS_EBCDIC +public class CharToByteCp930 extends CharToByteDBCS_EBCDIC { -{ - private final static IBM930 nioCoder = new IBM930(); + // Return the character set id + public String getCharacterEncoding() { + return "Cp930"; + } - // Return the character set id - public String getCharacterEncoding() - { - return "Cp930"; - } - - - public CharToByteCp930() - { - super(); - super.mask1 = 0xFFE0; - super.mask2 = 0x001F; - super.shift = 5; - super.index1 = nioCoder.getEncoderIndex1(); - super.index2 = nioCoder.getEncoderIndex2(); - super.index2a = nioCoder.getEncoderIndex2a(); - subBytes = new byte[1]; - subBytes[0] = 0x6f; - } + public CharToByteCp930() { + super((DoubleByte.Encoder)new IBM930().newEncoder()); + } } diff --git a/src/share/classes/sun/io/CharToByteCp933.java b/src/share/classes/sun/io/CharToByteCp933.java index caa11ff27..f59c54b70 100644 --- a/src/share/classes/sun/io/CharToByteCp933.java +++ b/src/share/classes/sun/io/CharToByteCp933.java @@ -1,5 +1,5 @@ /* - * Copyright 1997-2006 Sun Microsystems, Inc. All Rights Reserved. + * Copyright 1997-2003 Sun Microsystems, Inc. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -24,462 +24,16 @@ */ package sun.io; -import sun.nio.cs.ext.IBM933; +import sun.nio.cs.ext.*; -/** -* @author Malcolm Ayres -*/ +public class CharToByteCp933 extends CharToByteDBCS_EBCDIC { -public class CharToByteCp933 extends CharToByteConverter -{ - private static final char SBase = '\uAC00'; - private static final char LBase = '\u1100'; - private static final char VBase = '\u1161'; - private static final char TBase = '\u11A7'; - private static final int VCount = 21; - private static final int TCount = 28; - private static final byte G0 = 0; - private static final byte G1 = 1; - private static final byte G2 = 2; - private static final byte G3 = 3; - private byte charState = G0; - private char l, v, t; - - private int byteState; - private byte[] outputByte; - private static final int SBCS = 0; - private static final int DBCS = 1; - private static final byte SO = 0x0e; - private static final byte SI = 0x0f; - - private char highHalfZoneCode; - - private short[] index1; - private String index2; - private String index2a; - private int mask1; - private int mask2; - private int shift; - - private final static IBM933 nioCoder = new IBM933(); - - - public CharToByteCp933() { - super(); - byteState = doSBCS()?SBCS:DBCS; - highHalfZoneCode = 0; - outputByte = new byte[2]; - mask1 = 0xFFF8; - mask2 = 0x0007; - shift = 3; - index1 = nioCoder.getEncoderIndex1(); - index2 = nioCoder.getEncoderIndex2(); - index2a = nioCoder.getEncoderIndex2a(); - subBytes = new byte[1]; - subBytes[0] = 0x6f; - } - - /** - * flush out any residual data and reset the buffer state - */ - public int flush(byte[] output, int outStart, int outEnd) - throws MalformedInputException, - ConversionBufferFullException - { - int bytesOut; - - byteOff = outStart; - - if (highHalfZoneCode != 0) { - reset(); - badInputLength = 0; - throw new MalformedInputException(); - } - - if (charState != G0) { - try { - unicodeToBuffer(composeHangul() ,output, outEnd); - } - catch(UnknownCharacterException e) { - reset(); - badInputLength = 0; - throw new MalformedInputException(); - } - charState = G0; - } - - if (byteState == DBCS && doSBCS()) { - if (byteOff >= outEnd) - throw new ConversionBufferFullException(); - output[byteOff++] = SI; - byteState = SBCS; - } - - bytesOut = byteOff - outStart; - - reset(); - return bytesOut; - } - - /** - * Resets converter to its initial state. - */ - public void reset() { - byteState = doSBCS()?SBCS:DBCS; - highHalfZoneCode = 0; - charState = G0; - charOff = byteOff = 0; - } - - /** - * Returns true if the given character can be converted to the - * target character encoding. - */ - public boolean canConvert(char ch) { - return encodeHangul(ch) != -1; - } - - /** - * Sets the substitution bytes to use when the converter is in - * substitution mode. The given bytes should represent a valid - * character in the target character encoding. - */ - - public void setSubstitutionBytes( byte[] newSubBytes ) - throws IllegalArgumentException - { - if( newSubBytes.length > 2 || newSubBytes.length == 0) { - throw new IllegalArgumentException(); - } - - subBytes = new byte[ newSubBytes.length ]; - System.arraycopy( newSubBytes, 0, subBytes, 0, newSubBytes.length ); - - } - - /** - * Character conversion - */ - - public int convert(char[] input, int inOff, int inEnd, - byte[] output, int outOff, int outEnd) - throws UnknownCharacterException, MalformedInputException, - ConversionBufferFullException - { - char inputChar; - int inputSize; - - charOff = inOff; - byteOff = outOff; - - while (charOff < inEnd) { - - if (highHalfZoneCode == 0) { - inputChar = input[charOff]; - inputSize = 1; - } else { - inputChar = highHalfZoneCode; - inputSize = 0; - highHalfZoneCode = 0; - } - - switch (charState) { - case G0: - - l = LBase; - v = VBase; - t = TBase; - - if ( isLeadingC(inputChar) ) { // Leading Consonant - l = inputChar; - charState = G1; - break; - } - - if ( isVowel(inputChar) ) { // Vowel - v = inputChar; - charState = G2; - break; - } - - if ( isTrailingC(inputChar) ) { // Trailing Consonant - t = inputChar; - charState = G3; - break; - } - - break; - - case G1: - if ( isLeadingC(inputChar) ) { // Leading Consonant - l = composeLL(l, inputChar); - break; - } - - if ( isVowel(inputChar) ) { // Vowel - v = inputChar; - charState = G2; - break; - } - - if ( isTrailingC(inputChar) ) { // Trailing Consonant - t = inputChar; - charState = G3; - break; - } - - unicodeToBuffer(composeHangul(), output, outEnd); - - charState = G0; - break; - - case G2: - if ( isLeadingC(inputChar) ) { // Leading Consonant - - unicodeToBuffer(composeHangul(), output, outEnd); - - l = inputChar; - v = VBase; - t = TBase; - charState = G1; - break; - } - - if ( isVowel(inputChar) ) { // Vowel - v = composeVV(l, inputChar); - charState = G2; - break; - } - - if ( isTrailingC(inputChar) ) { // Trailing Consonant - t = inputChar; - charState = G3; - break; - } - - unicodeToBuffer(composeHangul(), output, outEnd); - - charState = G0; - - break; - - case G3: - if ( isTrailingC(inputChar) ) { // Trailing Consonant - t = composeTT(t, inputChar); - charState = G3; - break; - } - - unicodeToBuffer(composeHangul(), output, outEnd); - - charState = G0; - - break; - } - - if (charState != G0) - charOff++; - else { - - // Is this a high surrogate? - if(inputChar >= '\ud800' && inputChar <= '\udbff') { - // Is this the last character of the input? - if (charOff + inputSize >= inEnd) { - highHalfZoneCode = inputChar; - charOff += inputSize; - break; - } - - // Is there a low surrogate following? - inputChar = input[charOff + inputSize]; - if (inputChar >= '\udc00' && inputChar <= '\udfff') { - // We have a valid surrogate pair. Too bad we don't do - // surrogates. Is substitution enabled? - if (subMode) { - if (subBytes.length == 1) { - outputByte[0] = 0x00; - outputByte[1] = subBytes[0]; - } else { - outputByte[0] = subBytes[0]; - outputByte[1] = subBytes[1]; - } - - bytesToBuffer(outputByte, output, outEnd); - inputSize++; - } else { - badInputLength = 2; - throw new UnknownCharacterException(); - } - } else { - // We have a malformed surrogate pair - badInputLength = 1; - throw new MalformedInputException(); - } - } - - // Is this an unaccompanied low surrogate? - else - if (inputChar >= '\uDC00' && inputChar <= '\uDFFF') { - badInputLength = 1; - throw new MalformedInputException(); - } else { - unicodeToBuffer(inputChar, output, outEnd); - } - - charOff += inputSize; - - } - - } - - return byteOff - outOff; - - } - - private char composeHangul() { - int lIndex, vIndex, tIndex; - - lIndex = l - LBase; - vIndex = v - VBase; - tIndex = t - TBase; - - return (char)((lIndex * VCount + vIndex) * TCount + tIndex + SBase); - } - - private char composeLL(char l1, char l2) { - return l2; - } - - private char composeVV(char v1, char v2) { - return v2; - } - - private char composeTT(char t1, char t2) { - return t2; - } - - private boolean isLeadingC(char c) { - return (c >= LBase && c <= '\u1159'); - } - - private boolean isVowel(char c) { - return (c >= VBase && c <= '\u11a2'); - } - - private boolean isTrailingC(char c) { - return (c >= TBase && c <= '\u11f9'); - } - - /** - * returns the maximum number of bytes needed to convert a char - */ - public int getMaxBytesPerChar() { - return 4; - } - - /** - * Return the character set ID - */ + // Return the character set id public String getCharacterEncoding() { - return "Cp933"; + return "Cp933"; } - /** - * private function to add the bytes to the output buffer - */ - private void bytesToBuffer(byte[] theBytes, byte[] output, int outEnd) - throws ConversionBufferFullException, - UnknownCharacterException { - - int spaceNeeded; - - // Set the output buffer into the correct state - - if (byteState == DBCS && theBytes[0] == 0x00) { - if (byteOff >= outEnd) - throw new ConversionBufferFullException(); - byteState = SBCS; - output[byteOff++] = SI; - } else - if (byteState == SBCS && theBytes[0] != 0x00) { - if (byteOff >= outEnd) - throw new ConversionBufferFullException(); - byteState = DBCS; - output[byteOff++] = SO; - } - - - // ensure sufficient space for the bytes(s) - - if (byteState == DBCS) - spaceNeeded = 2; - else - spaceNeeded = 1; - - if (byteOff + spaceNeeded > outEnd) - throw new ConversionBufferFullException(); - - // move the data into the buffer - - if (byteState == SBCS) - output[byteOff++] = theBytes[1]; - else { - output[byteOff++] = theBytes[0]; - output[byteOff++] = theBytes[1]; - } - } - - // return -1 for unmappable character - protected int encodeHangul(char unicode) { - int theBytes; - int index; - index = index1[((unicode & mask1) >> shift)] + (unicode & mask2); - if (index < 15000) - theBytes = (int)(index2.charAt(index)); - else - theBytes = (int)(index2a.charAt(index-15000)); - - // The input char is undefined if theBytes is 0 and the char is NOT unicode 0 - if (theBytes == 0 && unicode != '\u0000') - return -1; - return theBytes; - } - - /** - * private function to add a unicode character to the output buffer - */ - private void unicodeToBuffer(char unicode, byte[] output, int outEnd) - throws ConversionBufferFullException, - UnknownCharacterException { - - // first we convert the unicode to its byte representation - int theBytes = encodeHangul(unicode); - - // if the unicode was not mappable - look for the substitution bytes - if (theBytes == -1) { - if (subMode) { - if (subBytes.length == 1) { - outputByte[0] = 0x00; - outputByte[1] = subBytes[0]; - } else { - outputByte[0] = subBytes[0]; - outputByte[1] = subBytes[1]; - } - } else { - badInputLength = 1; - throw new UnknownCharacterException(); - } - } else { - outputByte[0] = (byte)((theBytes & 0x0000ff00)>>8); - outputByte[1] = (byte)(theBytes & 0x000000ff); - } - - // now put the bytes in the buffer - bytesToBuffer(outputByte, output, outEnd); - } - - //Methods below are for subclass Cp834 - protected boolean doSBCS() { - return true; + public CharToByteCp933() { + super((DoubleByte.Encoder)new IBM933().newEncoder()); } } diff --git a/src/share/classes/sun/io/CharToByteCp935.java b/src/share/classes/sun/io/CharToByteCp935.java index 3c5665299..c87d0e316 100644 --- a/src/share/classes/sun/io/CharToByteCp935.java +++ b/src/share/classes/sun/io/CharToByteCp935.java @@ -24,35 +24,16 @@ */ package sun.io; -import sun.nio.cs.ext.IBM935; +import sun.nio.cs.ext.*; -/** -* Tables and data to convert Unicode to Cp935 -* -* @author Malcolm Ayres, assisted by UniMap program -*/ -public class CharToByteCp935 - extends CharToByteDBCS_EBCDIC +public class CharToByteCp935 extends CharToByteDBCS_EBCDIC { -{ - private final static IBM935 nioCoder = new IBM935(); + // Return the character set id + public String getCharacterEncoding() { + return "Cp935"; + } - // Return the character set id - public String getCharacterEncoding() - { - return "Cp935"; - } - - public CharToByteCp935() - { - super(); - super.mask1 = 0xFFE0; - super.mask2 = 0x001F; - super.shift = 5; - super.index1 = nioCoder.getEncoderIndex1(); - super.index2 = nioCoder.getEncoderIndex2(); - super.index2a = nioCoder.getEncoderIndex2a(); - subBytes = new byte[1]; - subBytes[0] = 0x6f; - } + public CharToByteCp935() { + super((DoubleByte.Encoder)new IBM935().newEncoder()); + } } diff --git a/src/share/classes/sun/io/CharToByteCp937.java b/src/share/classes/sun/io/CharToByteCp937.java index d872163ae..acdd782bb 100644 --- a/src/share/classes/sun/io/CharToByteCp937.java +++ b/src/share/classes/sun/io/CharToByteCp937.java @@ -24,39 +24,16 @@ */ package sun.io; -import sun.nio.cs.ext.IBM937; +import sun.nio.cs.ext.*; -/** -* Tables and data to convert Unicode to Cp937 -* -* @author Malcolm Ayres, assisted by UniMap program -*/ -public class CharToByteCp937 - extends CharToByteDBCS_EBCDIC +public class CharToByteCp937 extends CharToByteDBCS_EBCDIC { -{ + // Return the character set id + public String getCharacterEncoding() { + return "Cp937"; + } - // Return the character set id - public String getCharacterEncoding() - { - return "Cp937"; - } - - private short index1[]; - private String index2; - private String index2a; - private static final IBM937 nioCoder = new IBM937(); - - public CharToByteCp937() - { - super(); - super.mask1 = 0xFFC0; - super.mask2 = 0x003F; - super.shift = 6; - super.index1 = nioCoder.getEncoderIndex1(); - super.index2 = nioCoder.getEncoderIndex2(); - super.index2a = nioCoder.getEncoderIndex2a(); - subBytes = new byte[1]; - subBytes[0] = 0x6f; - } + public CharToByteCp937() { + super((DoubleByte.Encoder)new IBM937().newEncoder()); + } } diff --git a/src/share/classes/sun/io/CharToByteCp939.java b/src/share/classes/sun/io/CharToByteCp939.java index 00929d1bf..4a5814a7c 100644 --- a/src/share/classes/sun/io/CharToByteCp939.java +++ b/src/share/classes/sun/io/CharToByteCp939.java @@ -24,36 +24,16 @@ */ package sun.io; -import sun.nio.cs.ext.IBM939; +import sun.nio.cs.ext.*; -/** -* Tables and data to convert Unicode to Cp939 -* -* @author Malcolm Ayres, assisted by UniMap program -*/ -public class CharToByteCp939 - extends CharToByteDBCS_EBCDIC +public class CharToByteCp939 extends CharToByteDBCS_EBCDIC { -{ + // Return the character set id + public String getCharacterEncoding() { + return "Cp939"; + } - private final static IBM939 nioCoder = new IBM939(); - - // Return the character set id - public String getCharacterEncoding() - { - return "Cp939"; - } - - public CharToByteCp939() - { - super(); - super.mask1 = 0xFFE0; - super.mask2 = 0x001F; - super.shift = 5; - super.index1 = nioCoder.getEncoderIndex1(); - super.index2 = nioCoder.getEncoderIndex2(); - super.index2a = nioCoder.getEncoderIndex2a(); - subBytes = new byte[1]; - subBytes[0] = 0x6f; - } + public CharToByteCp939() { + super((DoubleByte.Encoder)new IBM939().newEncoder()); + } } diff --git a/src/share/classes/sun/io/CharToByteCp942.java b/src/share/classes/sun/io/CharToByteCp942.java index 1d5224d9f..21b9a7677 100644 --- a/src/share/classes/sun/io/CharToByteCp942.java +++ b/src/share/classes/sun/io/CharToByteCp942.java @@ -24,33 +24,16 @@ */ package sun.io; -import sun.nio.cs.ext.IBM942; +import sun.nio.cs.ext.*; -/** -* Tables and data to convert Unicode to Cp942 -* -* @author Malcolm Ayres, assisted by UniMap program -*/ -public class CharToByteCp942 - extends CharToByteDBCS_ASCII +public class CharToByteCp942 extends CharToByteDBCS_ASCII { -{ - private static IBM942 nioCoder = new IBM942(); + // Return the character set id + public String getCharacterEncoding() { + return "Cp942"; + } - // Return the character set id - public String getCharacterEncoding() - { - return "Cp942"; - } - - public CharToByteCp942() - { - super(); - super.mask1 = 0xFFE0; - super.mask2 = 0x001F; - super.shift = 5; - super.index1 = nioCoder.getEncoderIndex1(); - super.index2 = nioCoder.getEncoderIndex2(); - super.index2a = nioCoder.getEncoderIndex2a(); - } + public CharToByteCp942() { + super((DoubleByte.Encoder)new IBM942().newEncoder()); + } } diff --git a/src/share/classes/sun/io/CharToByteCp942C.java b/src/share/classes/sun/io/CharToByteCp942C.java index 65b362f5f..61b40d423 100644 --- a/src/share/classes/sun/io/CharToByteCp942C.java +++ b/src/share/classes/sun/io/CharToByteCp942C.java @@ -25,47 +25,16 @@ package sun.io; -import sun.io.*; +import sun.nio.cs.ext.*; public class CharToByteCp942C extends CharToByteDBCS_ASCII { - protected static final int mask1; - protected static final int mask2; - protected static final int shift; - protected static final short index1[]; - protected static final String index2; - protected static final String index2a; - static { - CharToByteDBCS_ASCII y = new CharToByteCp942(); - mask1 = y.mask1; - mask2 = y.mask2; - shift = y.shift; - index2 = y.index2; + // Return the character set id + public String getCharacterEncoding() { + return "Cp942C"; + } - /* Fix converter to pass through U+0000 to U+007F unchanged to 0x00 to 0x7f */ - String indexs = ""; - for (char c = '\0'; c < '\u0080'; ++c) indexs += c; - index2a = y.index2a + indexs; - - int o = y.index2a.length() + 15000; - index1 = new short[y.index1.length]; - System.arraycopy(y.index1, 0, index1, 0, y.index1.length); - for (int i = 0; i * (1<<shift) < 128; ++i) { - index1[i] = (short)(o + i * (1<<shift)); - } - } - - public String getCharacterEncoding() { - return "Cp942C"; - } - - CharToByteCp942C() { - super(); - super.mask1 = mask1; - super.mask2 = mask2; - super.shift = shift; - super.index1 = index1; - super.index2 = index2; - super.index2a = index2a; - } + public CharToByteCp942C() { + super((DoubleByte.Encoder)new IBM942C().newEncoder()); + } } diff --git a/src/share/classes/sun/io/CharToByteCp943.java b/src/share/classes/sun/io/CharToByteCp943.java index 56b87ab8d..2f4602f6c 100644 --- a/src/share/classes/sun/io/CharToByteCp943.java +++ b/src/share/classes/sun/io/CharToByteCp943.java @@ -22,34 +22,19 @@ * CA 95054 USA or visit www.sun.com if you need additional information or * have any questions. */ -// Table from Unicode to Cp943 + package sun.io; -import sun.nio.cs.ext.IBM943; - -/** - * Tables and data to convert Unicode to Cp943 - * - * @author BuildTables tool - */ +import sun.nio.cs.ext.*; public class CharToByteCp943 extends CharToByteDBCS_ASCII { - private static IBM943 nioCoder = new IBM943(); - + // Return the character set id public String getCharacterEncoding() { return "Cp943"; } public CharToByteCp943() { - super(); - super.mask1 = 0xFFC0; - super.mask2 = 0x003F; - super.shift = 6; - super.index1 = nioCoder.getEncoderIndex1(); - super.index2 = nioCoder.getEncoderIndex2(); - super.index2a = nioCoder.getEncoderIndex2a(); - subBytes = new byte[1]; - subBytes[0] = 0x6f; + super((DoubleByte.Encoder)new IBM943().newEncoder()); } } diff --git a/src/share/classes/sun/io/CharToByteCp943C.java b/src/share/classes/sun/io/CharToByteCp943C.java index 9ed1f7f9a..14fadb798 100644 --- a/src/share/classes/sun/io/CharToByteCp943C.java +++ b/src/share/classes/sun/io/CharToByteCp943C.java @@ -25,47 +25,16 @@ package sun.io; -import sun.io.*; +import sun.nio.cs.ext.*; public class CharToByteCp943C extends CharToByteDBCS_ASCII { - protected static final int mask1; - protected static final int mask2; - protected static final int shift; - protected static final short index1[]; - protected static final String index2; - protected static final String index2a; - static { - CharToByteDBCS_ASCII y = new CharToByteCp943(); - mask1 = y.mask1; - mask2 = y.mask2; - shift = y.shift; - index2 = y.index2; + // Return the character set id + public String getCharacterEncoding() { + return "Cp943C"; + } - /* Fix converter to pass through U+0000 to U+007F unchanged to 0x00 to 0x7f */ - String indexs = ""; - for (char c = '\0'; c < '\u0080'; ++c) indexs += c; - index2a = y.index2a + indexs; - - int o = y.index2a.length() + 15000; - index1 = new short[y.index1.length]; - System.arraycopy(y.index1, 0, index1, 0, y.index1.length); - for (int i = 0; i * (1<<shift) < 128; ++i) { - index1[i] = (short)(o + i * (1<<shift)); - } - } - - public String getCharacterEncoding() { - return "Cp943C"; - } - - CharToByteCp943C() { - super(); - super.mask1 = mask1; - super.mask2 = mask2; - super.shift = shift; - super.index1 = index1; - super.index2 = index2; - super.index2a = index2a; - } + public CharToByteCp943C() { + super((DoubleByte.Encoder)new IBM943C().newEncoder()); + } } diff --git a/src/share/classes/sun/io/CharToByteCp948.java b/src/share/classes/sun/io/CharToByteCp948.java index b6c9d48e8..32ef80605 100644 --- a/src/share/classes/sun/io/CharToByteCp948.java +++ b/src/share/classes/sun/io/CharToByteCp948.java @@ -22,36 +22,19 @@ * CA 95054 USA or visit www.sun.com if you need additional information or * have any questions. */ -package sun.io; - -import sun.nio.cs.ext.IBM948; -/** -* Tables and data to convert Unicode to Cp948 -* -* @author Malcolm Ayres, assisted by UniMap program -*/ -public class CharToByteCp948 - extends CharToByteDBCS_ASCII +package sun.io; -{ - private static IBM948 nioCoder = new IBM948(); +import sun.nio.cs.ext.*; - // Return the character set id - public String getCharacterEncoding() - { - return "Cp948"; - } +public class CharToByteCp948 extends CharToByteDBCS_ASCII { + // Return the character set id + public String getCharacterEncoding() { + return "Cp948"; + } - public CharToByteCp948() - { - super(); - super.mask1 = 0xFFC0; - super.mask2 = 0x003F; - super.shift = 6; - super.index1 = nioCoder.getEncoderIndex1(); - super.index2 = nioCoder.getEncoderIndex2(); - super.index2a = nioCoder.getEncoderIndex2a(); - } + public CharToByteCp948() { + super((DoubleByte.Encoder)new IBM948().newEncoder()); + } } diff --git a/src/share/classes/sun/io/CharToByteCp949.java b/src/share/classes/sun/io/CharToByteCp949.java index 90ca68532..5883f83d0 100644 --- a/src/share/classes/sun/io/CharToByteCp949.java +++ b/src/share/classes/sun/io/CharToByteCp949.java @@ -22,418 +22,19 @@ * CA 95054 USA or visit www.sun.com if you need additional information or * have any questions. */ -package sun.io; - -import sun.nio.cs.ext.IBM949; - -/** -* @author Malcolm Ayres -*/ - -public class CharToByteCp949 extends CharToByteConverter -{ - private static final char SBase = '\uAC00'; - private static final char LBase = '\u1100'; - private static final char VBase = '\u1161'; - private static final char TBase = '\u11A7'; - private static final int VCount = 21; - private static final int TCount = 28; - private static final byte G0 = 0; - private static final byte G1 = 1; - private static final byte G2 = 2; - private static final byte G3 = 3; - private byte charState = G0; - private char l, v, t; - - private byte[] outputByte; - - private char highHalfZoneCode; - private int mask1; - private int mask2; - private int shift; - private short[] index1; - private String index2; - private String index2a; - - private final static IBM949 nioCoder = new IBM949(); - - public CharToByteCp949() { - super(); - highHalfZoneCode = 0; - outputByte = new byte[2]; - mask1 = 0xFFF8; - mask2 = 0x0007; - shift = 3; - index1 = nioCoder.getEncoderIndex1(); - index2 = nioCoder.getEncoderIndex2(); - index2a = nioCoder.getEncoderIndex2a(); - } - - /** - * flush out any residual data and reset the buffer state - */ - public int flush(byte[] output, int outStart, int outEnd) - throws MalformedInputException, - ConversionBufferFullException - { - int bytesOut; - - byteOff = outStart; - - if (highHalfZoneCode != 0) { - reset(); - badInputLength = 0; - throw new MalformedInputException(); - } - - if (charState != G0) { - try { - unicodeToBuffer(composeHangul() ,output, outEnd); - } - catch(UnknownCharacterException e) { - reset(); - badInputLength = 0; - throw new MalformedInputException(); - } - charState = G0; - } - - bytesOut = byteOff - outStart; - - reset(); - return bytesOut; - } - - /** - * Resets converter to its initial state. - */ - public void reset() { - highHalfZoneCode = 0; - charState = G0; - charOff = byteOff = 0; - } - - /** - * Returns true if the given character can be converted to the - * target character encoding. - */ - public boolean canConvert(char ch) { - int index; - int theBytes; - - index = index1[((ch & mask1) >> shift)] + (ch & mask2); - if (index < 15000) - theBytes = (int)(index2.charAt(index)); - else - theBytes = (int)(index2a.charAt(index-15000)); - - if (theBytes != 0) - return (true); - - // only return true if input char was unicode null - all others are - // undefined - return( ch == '\u0000'); - } - - /** - * Character conversion - */ - - public int convert(char[] input, int inOff, int inEnd, - byte[] output, int outOff, int outEnd) - throws UnknownCharacterException, MalformedInputException, - ConversionBufferFullException - { - char inputChar; - int inputSize; - - charOff = inOff; - byteOff = outOff; - - while (charOff < inEnd) { - - if (highHalfZoneCode == 0) { - inputChar = input[charOff]; - inputSize = 1; - } else { - inputChar = highHalfZoneCode; - inputSize = 0; - highHalfZoneCode = 0; - } - - switch (charState) { - case G0: - - l = LBase; - v = VBase; - t = TBase; - - if ( isLeadingC(inputChar) ) { // Leading Consonant - l = inputChar; - charState = G1; - break; - } - - if ( isVowel(inputChar) ) { // Vowel - v = inputChar; - charState = G2; - break; - } - - if ( isTrailingC(inputChar) ) { // Trailing Consonant - t = inputChar; - charState = G3; - break; - } - - break; - - case G1: - if ( isLeadingC(inputChar) ) { // Leading Consonant - l = composeLL(l, inputChar); - break; - } - - if ( isVowel(inputChar) ) { // Vowel - v = inputChar; - charState = G2; - break; - } - - if ( isTrailingC(inputChar) ) { // Trailing Consonant - t = inputChar; - charState = G3; - break; - } - - unicodeToBuffer(composeHangul(), output, outEnd); - - charState = G0; - break; - - case G2: - if ( isLeadingC(inputChar) ) { // Leading Consonant - - unicodeToBuffer(composeHangul(), output, outEnd); - - l = inputChar; - v = VBase; - t = TBase; - charState = G1; - break; - } - - if ( isVowel(inputChar) ) { // Vowel - v = composeVV(l, inputChar); - charState = G2; - break; - } - - if ( isTrailingC(inputChar) ) { // Trailing Consonant - t = inputChar; - charState = G3; - break; - } - - unicodeToBuffer(composeHangul(), output, outEnd); - - charState = G0; - - break; - - case G3: - if ( isTrailingC(inputChar) ) { // Trailing Consonant - t = composeTT(t, inputChar); - charState = G3; - break; - } - unicodeToBuffer(composeHangul(), output, outEnd); - - charState = G0; - - break; - } - - if (charState != G0) - charOff++; - else { - - // Is this a high surrogate? - if(inputChar >= '\ud800' && inputChar <= '\udbff') { - // Is this the last character of the input? - if (charOff + inputSize >= inEnd) { - highHalfZoneCode = inputChar; - charOff += inputSize; - break; - } - - // Is there a low surrogate following? - inputChar = input[charOff + inputSize]; - if (inputChar >= '\udc00' && inputChar <= '\udfff') { - // We have a valid surrogate pair. Too bad we don't do - // surrogates. Is substitution enabled? - if (subMode) { - if (subBytes.length == 1) { - outputByte[0] = 0x00; - outputByte[1] = subBytes[0]; - } else { - outputByte[0] = subBytes[0]; - outputByte[1] = subBytes[1]; - } - - bytesToBuffer(outputByte, output, outEnd); - inputSize++; - } else { - badInputLength = 2; - throw new UnknownCharacterException(); - } - } else { - // We have a malformed surrogate pair - badInputLength = 1; - throw new MalformedInputException(); - } - } - - // Is this an unaccompanied low surrogate? - else - if (inputChar >= '\uDC00' && inputChar <= '\uDFFF') { - badInputLength = 1; - throw new MalformedInputException(); - } else { - unicodeToBuffer(inputChar, output, outEnd); - } - - charOff += inputSize; - - } - - } - - return byteOff - outOff; - - } - - private char composeHangul() { - int lIndex, vIndex, tIndex; - - lIndex = l - LBase; - vIndex = v - VBase; - tIndex = t - TBase; - - return (char)((lIndex * VCount + vIndex) * TCount + tIndex + SBase); - } - - private char composeLL(char l1, char l2) { - return l2; - } - - private char composeVV(char v1, char v2) { - return v2; - } - - private char composeTT(char t1, char t2) { - return t2; - } - - private boolean isLeadingC(char c) { - return (c >= LBase && c <= '\u1159'); - } - - private boolean isVowel(char c) { - return (c >= VBase && c <= '\u11a2'); - } - - private boolean isTrailingC(char c) { - return (c >= TBase && c <= '\u11f9'); - } +package sun.io; - /** - * returns the maximum number of bytes needed to convert a char - */ - public int getMaxBytesPerChar() { - return 2; - } +import sun.nio.cs.ext.*; +public class CharToByteCp949 extends CharToByteDBCS_ASCII { - /** - * Return the character set ID - */ + // Return the character set id public String getCharacterEncoding() { - return "Cp949"; - } - - /** - * private function to add the bytes to the output buffer - */ - private void bytesToBuffer(byte[] theBytes, byte[] output, int outEnd) - throws ConversionBufferFullException, - UnknownCharacterException { - - int spaceNeeded; - - // ensure sufficient space for the bytes(s) - - if (theBytes[0] == 0x00) - spaceNeeded = 1; - else - spaceNeeded = 2; - - if (byteOff + spaceNeeded > outEnd) - throw new ConversionBufferFullException(); - - // move the data into the buffer - - if (spaceNeeded == 1) - output[byteOff++] = theBytes[1]; - else { - output[byteOff++] = theBytes[0]; - output[byteOff++] = theBytes[1]; - } - + return "Cp949"; } - /** - * private function to add a unicode character to the output buffer - */ - private void unicodeToBuffer(char unicode, byte[] output, int outEnd) - throws ConversionBufferFullException, - UnknownCharacterException { - - int index; - int theBytes; - - // first we convert the unicode to its byte representation - - index = index1[((unicode & mask1) >> shift)] + (unicode & mask2); - if (index < 15000) - theBytes = (int)(index2.charAt(index)); - else - theBytes = (int)(index2a.charAt(index-15000)); - outputByte[0] = (byte)((theBytes & 0x0000ff00)>>8); - outputByte[1] = (byte)(theBytes & 0x000000ff); - - // if the unicode was not mappable - look for the substitution bytes - - if (outputByte[0] == 0x00 && outputByte[1] == 0x00 - && unicode != '\u0000') { - if (subMode) { - if (subBytes.length == 1) { - outputByte[0] = 0x00; - outputByte[1] = subBytes[0]; - } else { - outputByte[0] = subBytes[0]; - outputByte[1] = subBytes[1]; - } - } else { - badInputLength = 1; - throw new UnknownCharacterException(); - } - } - - // now put the bytes in the buffer - - bytesToBuffer(outputByte, output, outEnd); - + public CharToByteCp949() { + super((DoubleByte.Encoder)new IBM949().newEncoder()); } } diff --git a/src/share/classes/sun/io/CharToByteCp949C.java b/src/share/classes/sun/io/CharToByteCp949C.java index c236d21d0..0f0e8aced 100644 --- a/src/share/classes/sun/io/CharToByteCp949C.java +++ b/src/share/classes/sun/io/CharToByteCp949C.java @@ -1,5 +1,5 @@ /* - * Copyright 1997-2003 Sun Microsystems, Inc. All Rights Reserved. + * Copyright 1997 Sun Microsystems, Inc. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -25,421 +25,16 @@ package sun.io; -import sun.nio.cs.ext.IBM949C; +import sun.nio.cs.ext.*; -/** -* @author Malcolm Ayres -*/ +public class CharToByteCp949C extends CharToByteDBCS_ASCII { -/* -Cp949C is a hand-modified version of Cp949 -maps Unicode U-005C <-> 0x5C (local code page) - */ - -public class CharToByteCp949C extends CharToByteConverter -{ - private static final char SBase = '\uAC00'; - private static final char LBase = '\u1100'; - private static final char VBase = '\u1161'; - private static final char TBase = '\u11A7'; - private static final int VCount = 21; - private static final int TCount = 28; - private static final byte G0 = 0; - private static final byte G1 = 1; - private static final byte G2 = 2; - private static final byte G3 = 3; - private byte charState = G0; - private char l, v, t; - - private byte[] outputByte; - - private char highHalfZoneCode; - private int mask1; - private int mask2; - private int shift; - private short[] index1; - private String index2; - private String index2a; - - private final static IBM949C nioCoder = new IBM949C(); - - public CharToByteCp949C() { - super(); - index1 = nioCoder.getEncoderIndex1(); - index2 = nioCoder.getEncoderIndex2(); - index2a = nioCoder.getEncoderIndex2a(); - highHalfZoneCode = 0; - outputByte = new byte[2]; - mask1 = 0xFFF8; - mask2 = 0x0007; - shift = 3; - } - - /** - * flush out any residual data and reset the buffer state - */ - public int flush(byte[] output, int outStart, int outEnd) - throws MalformedInputException, - ConversionBufferFullException - { - int bytesOut; - - byteOff = outStart; - - if (highHalfZoneCode != 0) { - reset(); - badInputLength = 0; - throw new MalformedInputException(); - } - - if (charState != G0) { - try { - unicodeToBuffer(composeHangul() ,output, outEnd); - } - catch(UnknownCharacterException e) { - reset(); - badInputLength = 0; - throw new MalformedInputException(); - } - charState = G0; - } - - bytesOut = byteOff - outStart; - - reset(); - return bytesOut; - } - - /** - * Resets converter to its initial state. - */ - public void reset() { - highHalfZoneCode = 0; - charState = G0; - charOff = byteOff = 0; - } - - /** - * Returns true if the given character can be converted to the - * target character encoding. - */ - public boolean canConvert(char ch) { - int index; - int theBytes; - - index = index1[((ch & mask1) >> shift)] + (ch & mask2); - if (index < 15000) - theBytes = (int)(index2.charAt(index)); - else - theBytes = (int)(index2a.charAt(index-15000)); - - if (theBytes != 0) - return (true); - - // only return true if input char was unicode null - all others are - // undefined - return( ch == '\u0000'); - } - - /** - * Character conversion - */ - - public int convert(char[] input, int inOff, int inEnd, - byte[] output, int outOff, int outEnd) - throws UnknownCharacterException, MalformedInputException, - ConversionBufferFullException - { - char inputChar; - int inputSize; - - charOff = inOff; - byteOff = outOff; - - while (charOff < inEnd) { - - if (highHalfZoneCode == 0) { - inputChar = input[charOff]; - inputSize = 1; - } else { - inputChar = highHalfZoneCode; - inputSize = 0; - highHalfZoneCode = 0; - } - - switch (charState) { - case G0: - - l = LBase; - v = VBase; - t = TBase; - - if ( isLeadingC(inputChar) ) { // Leading Consonant - l = inputChar; - charState = G1; - break; - } - - if ( isVowel(inputChar) ) { // Vowel - v = inputChar; - charState = G2; - break; - } - - if ( isTrailingC(inputChar) ) { // Trailing Consonant - t = inputChar; - charState = G3; - break; - } - - break; - - case G1: - if ( isLeadingC(inputChar) ) { // Leading Consonant - l = composeLL(l, inputChar); - break; - } - - if ( isVowel(inputChar) ) { // Vowel - v = inputChar; - charState = G2; - break; - } - - if ( isTrailingC(inputChar) ) { // Trailing Consonant - t = inputChar; - charState = G3; - break; - } - - unicodeToBuffer(composeHangul(), output, outEnd); - - charState = G0; - break; - - case G2: - if ( isLeadingC(inputChar) ) { // Leading Consonant - - unicodeToBuffer(composeHangul(), output, outEnd); - - l = inputChar; - v = VBase; - t = TBase; - charState = G1; - break; - } - - if ( isVowel(inputChar) ) { // Vowel - v = composeVV(l, inputChar); - charState = G2; - break; - } - - if ( isTrailingC(inputChar) ) { // Trailing Consonant - t = inputChar; - charState = G3; - break; - } - - unicodeToBuffer(composeHangul(), output, outEnd); - - charState = G0; - - break; - - case G3: - if ( isTrailingC(inputChar) ) { // Trailing Consonant - t = composeTT(t, inputChar); - charState = G3; - break; - } - - unicodeToBuffer(composeHangul(), output, outEnd); - - charState = G0; - - break; - } - - if (charState != G0) - charOff++; - else { - - // Is this a high surrogate? - if(inputChar >= '\ud800' && inputChar <= '\udbff') { - // Is this the last character of the input? - if (charOff + inputSize >= inEnd) { - highHalfZoneCode = inputChar; - charOff += inputSize; - break; - } - - // Is there a low surrogate following? - inputChar = input[charOff + inputSize]; - if (inputChar >= '\udc00' && inputChar <= '\udfff') { - // We have a valid surrogate pair. Too bad we don't do - // surrogates. Is substitution enabled? - if (subMode) { - if (subBytes.length == 1) { - outputByte[0] = 0x00; - outputByte[1] = subBytes[0]; - } else { - outputByte[0] = subBytes[0]; - outputByte[1] = subBytes[1]; - } - - bytesToBuffer(outputByte, output, outEnd); - inputSize++; - } else { - badInputLength = 2; - throw new UnknownCharacterException(); - } - } else { - // We have a malformed surrogate pair - badInputLength = 1; - throw new MalformedInputException(); - } - } - - // Is this an unaccompanied low surrogate? - else - if (inputChar >= '\uDC00' && inputChar <= '\uDFFF') { - badInputLength = 1; - throw new MalformedInputException(); - } else { - unicodeToBuffer(inputChar, output, outEnd); - } - - charOff += inputSize; - - } - - } - - return byteOff - outOff; - - } - - private char composeHangul() { - int lIndex, vIndex, tIndex; - - lIndex = l - LBase; - vIndex = v - VBase; - tIndex = t - TBase; - - return (char)((lIndex * VCount + vIndex) * TCount + tIndex + SBase); - } - - private char composeLL(char l1, char l2) { - return l2; - } - - private char composeVV(char v1, char v2) { - return v2; - } - - private char composeTT(char t1, char t2) { - return t2; - } - - private boolean isLeadingC(char c) { - return (c >= LBase && c <= '\u1159'); - } - - private boolean isVowel(char c) { - return (c >= VBase && c <= '\u11a2'); - } - - private boolean isTrailingC(char c) { - return (c >= TBase && c <= '\u11f9'); - } - - /** - * returns the maximum number of bytes needed to convert a char - */ - public int getMaxBytesPerChar() { - return 2; - } - - - /** - * Return the character set ID - */ + // Return the character set id public String getCharacterEncoding() { - return "Cp949C"; + return "Cp949C"; } - /** - * private function to add the bytes to the output buffer - */ - private void bytesToBuffer(byte[] theBytes, byte[] output, int outEnd) - throws ConversionBufferFullException, - UnknownCharacterException { - - int spaceNeeded; - - // ensure sufficient space for the bytes(s) - - if (theBytes[0] == 0x00) - spaceNeeded = 1; - else - spaceNeeded = 2; - - if (byteOff + spaceNeeded > outEnd) - throw new ConversionBufferFullException(); - - // move the data into the buffer - - if (spaceNeeded == 1) - output[byteOff++] = theBytes[1]; - else { - output[byteOff++] = theBytes[0]; - output[byteOff++] = theBytes[1]; - } - - } - - /** - * private function to add a unicode character to the output buffer - */ - private void unicodeToBuffer(char unicode, byte[] output, int outEnd) - throws ConversionBufferFullException, - UnknownCharacterException { - - int index; - int theBytes; - - // first we convert the unicode to its byte representation - - index = index1[((unicode & mask1) >> shift)] + (unicode & mask2); - if (index < 15000) - theBytes = (int)(index2.charAt(index)); - else - theBytes = (int)(index2a.charAt(index-15000)); - outputByte[0] = (byte)((theBytes & 0x0000ff00)>>8); - outputByte[1] = (byte)(theBytes & 0x000000ff); - - // if the unicode was not mappable - look for the substitution bytes - - if (outputByte[0] == 0x00 && outputByte[1] == 0x00 - && unicode != '\u0000') { - if (subMode) { - if (subBytes.length == 1) { - outputByte[0] = 0x00; - outputByte[1] = subBytes[0]; - } else { - outputByte[0] = subBytes[0]; - outputByte[1] = subBytes[1]; - } - } else { - badInputLength = 1; - throw new UnknownCharacterException(); - } - } - - // now put the bytes in the buffer - - bytesToBuffer(outputByte, output, outEnd); - + public CharToByteCp949C() { + super((DoubleByte.Encoder)new IBM949C().newEncoder()); } } diff --git a/src/share/classes/sun/io/CharToByteCp950.java b/src/share/classes/sun/io/CharToByteCp950.java index d7fa75415..5ac847e85 100644 --- a/src/share/classes/sun/io/CharToByteCp950.java +++ b/src/share/classes/sun/io/CharToByteCp950.java @@ -22,35 +22,19 @@ * CA 95054 USA or visit www.sun.com if you need additional information or * have any questions. */ -package sun.io; -import sun.nio.cs.ext.IBM950; +package sun.io; -/** -* Tables and data to convert Unicode to Cp950 -* -* @author Malcolm Ayres, assisted by UniMap program -*/ -public class CharToByteCp950 - extends CharToByteDBCS_ASCII +import sun.nio.cs.ext.*; -{ - private static IBM950 nioCoder = new IBM950(); +public class CharToByteCp950 extends CharToByteDBCS_ASCII { - // Return the character set id - public String getCharacterEncoding() - { - return "Cp950"; - } + // Return the character set id + public String getCharacterEncoding() { + return "Cp950"; + } - public CharToByteCp950() - { - super(); - super.mask1 = 0xFFC0; - super.mask2 = 0x003F; - super.shift = 6; - super.index1 = nioCoder.getEncoderIndex1(); - super.index2 = nioCoder.getEncoderIndex2(); - super.index2a = nioCoder.getEncoderIndex2a(); - } + public CharToByteCp950() { + super((DoubleByte.Encoder)new IBM950().newEncoder()); + } } diff --git a/src/share/classes/sun/io/CharToByteCp970.java b/src/share/classes/sun/io/CharToByteCp970.java index 7cbadd428..6ebcb2df3 100644 --- a/src/share/classes/sun/io/CharToByteCp970.java +++ b/src/share/classes/sun/io/CharToByteCp970.java @@ -24,419 +24,17 @@ */ package sun.io; -import sun.nio.cs.ext.IBM970; +import sun.nio.cs.ext.*; -/** -* @author Malcolm Ayres -*/ +// EUC_Simple is the same as DBCS_ASCII +public class CharToByteCp970 extends CharToByteDBCS_ASCII { -public class CharToByteCp970 extends CharToByteConverter -{ - private static final char SBase = '\uAC00'; - private static final char LBase = '\u1100'; - private static final char VBase = '\u1161'; - private static final char TBase = '\u11A7'; - private static final int VCount = 21; - private static final int TCount = 28; - private static final byte G0 = 0; - private static final byte G1 = 1; - private static final byte G2 = 2; - private static final byte G3 = 3; - private byte charState = G0; - private char l, v, t; - - private byte[] outputByte; - - private char highHalfZoneCode; - private int mask1; - private int mask2; - private int shift; - - private short[] index1; - private String index2; - private String index2a; - - private final static IBM970 nioCoder = new IBM970(); - - public CharToByteCp970() { - super(); - highHalfZoneCode = 0; - outputByte = new byte[2]; - mask1 = 0xFFF8; - mask2 = 0x0007; - shift = 3; - index1 = nioCoder.getEncoderIndex1(); - index2 = nioCoder.getEncoderIndex2(); - index2a = nioCoder.getEncoderIndex2a(); - } - - /** - * flush out any residual data and reset the buffer state - */ - public int flush(byte[] output, int outStart, int outEnd) - throws MalformedInputException, - ConversionBufferFullException - { - int bytesOut; - - byteOff = outStart; - - if (highHalfZoneCode != 0) { - reset(); - badInputLength = 0; - throw new MalformedInputException(); - } - - if (charState != G0) { - try { - unicodeToBuffer(composeHangul() ,output, outEnd); - } - catch(UnknownCharacterException e) { - reset(); - badInputLength = 0; - throw new MalformedInputException(); - } - charState = G0; - } - - bytesOut = byteOff - outStart; - - reset(); - return bytesOut; - } - - /** - * Resets converter to its initial state. - */ - public void reset() { - highHalfZoneCode = 0; - charState = G0; - charOff = byteOff = 0; - } - - /** - * Returns true if the given character can be converted to the - * target character encoding. - */ - public boolean canConvert(char ch) { - int index; - int theBytes; - - index = index1[((ch & mask1) >> shift)] + (ch & mask2); - if (index < 15000) - theBytes = (int)(index2.charAt(index)); - else - theBytes = (int)(index2a.charAt(index-15000)); - - if (theBytes != 0) - return (true); - - // only return true if input char was unicode null - all others are - // undefined - return( ch == '\u0000'); - } - - /** - * Character conversion - */ - - public int convert(char[] input, int inOff, int inEnd, - byte[] output, int outOff, int outEnd) - throws UnknownCharacterException, MalformedInputException, - ConversionBufferFullException - { - char inputChar; - int inputSize; - - charOff = inOff; - byteOff = outOff; - - while (charOff < inEnd) { - - if (highHalfZoneCode == 0) { - inputChar = input[charOff]; - inputSize = 1; - } else { - inputChar = highHalfZoneCode; - inputSize = 0; - highHalfZoneCode = 0; - } - - switch (charState) { - case G0: - - l = LBase; - v = VBase; - t = TBase; - - if ( isLeadingC(inputChar) ) { // Leading Consonant - l = inputChar; - charState = G1; - break; - } - - if ( isVowel(inputChar) ) { // Vowel - v = inputChar; - charState = G2; - break; - } - - if ( isTrailingC(inputChar) ) { // Trailing Consonant - t = inputChar; - charState = G3; - break; - } - - break; - - case G1: - if ( isLeadingC(inputChar) ) { // Leading Consonant - l = composeLL(l, inputChar); - break; - } - - if ( isVowel(inputChar) ) { // Vowel - v = inputChar; - charState = G2; - break; - } - - if ( isTrailingC(inputChar) ) { // Trailing Consonant - t = inputChar; - charState = G3; - break; - } - - unicodeToBuffer(composeHangul(), output, outEnd); - - charState = G0; - break; - - case G2: - if ( isLeadingC(inputChar) ) { // Leading Consonant - - unicodeToBuffer(composeHangul(), output, outEnd); - - l = inputChar; - v = VBase; - t = TBase; - charState = G1; - break; - } - - if ( isVowel(inputChar) ) { // Vowel - v = composeVV(l, inputChar); - charState = G2; - break; - } - - if ( isTrailingC(inputChar) ) { // Trailing Consonant - t = inputChar; - charState = G3; - break; - } - - unicodeToBuffer(composeHangul(), output, outEnd); - - charState = G0; - - break; - - case G3: - if ( isTrailingC(inputChar) ) { // Trailing Consonant - t = composeTT(t, inputChar); - charState = G3; - break; - } - - unicodeToBuffer(composeHangul(), output, outEnd); - - charState = G0; - - break; - } - - if (charState != G0) - charOff++; - else { - - // Is this a high surrogate? - if(inputChar >= '\ud800' && inputChar <= '\udbff') { - // Is this the last character of the input? - if (charOff + inputSize >= inEnd) { - highHalfZoneCode = inputChar; - charOff += inputSize; - break; - } - - // Is there a low surrogate following? - inputChar = input[charOff + inputSize]; - if (inputChar >= '\udc00' && inputChar <= '\udfff') { - // We have a valid surrogate pair. Too bad we don't do - // surrogates. Is substitution enabled? - if (subMode) { - if (subBytes.length == 1) { - outputByte[0] = 0x00; - outputByte[1] = subBytes[0]; - } else { - outputByte[0] = subBytes[0]; - outputByte[1] = subBytes[1]; - } - - bytesToBuffer(outputByte, output, outEnd); - inputSize++; - } else { - badInputLength = 2; - throw new UnknownCharacterException(); - } - } else { - // We have a malformed surrogate pair - badInputLength = 1; - throw new MalformedInputException(); - } - } - - // Is this an unaccompanied low surrogate? - else - if (inputChar >= '\uDC00' && inputChar <= '\uDFFF') { - badInputLength = 1; - throw new MalformedInputException(); - } else { - unicodeToBuffer(inputChar, output, outEnd); - } - - charOff += inputSize; - - } - - } - - return byteOff - outOff; - - } - - private char composeHangul() { - int lIndex, vIndex, tIndex; - - lIndex = l - LBase; - vIndex = v - VBase; - tIndex = t - TBase; - - return (char)((lIndex * VCount + vIndex) * TCount + tIndex + SBase); - } - - private char composeLL(char l1, char l2) { - return l2; - } - - private char composeVV(char v1, char v2) { - return v2; - } - - private char composeTT(char t1, char t2) { - return t2; - } - - private boolean isLeadingC(char c) { - return (c >= LBase && c <= '\u1159'); - } - - private boolean isVowel(char c) { - return (c >= VBase && c <= '\u11a2'); - } - - private boolean isTrailingC(char c) { - return (c >= TBase && c <= '\u11f9'); - } - - /** - * returns the maximum number of bytes needed to convert a char - */ - public int getMaxBytesPerChar() { - return 2; - } - - - /** - * Return the character set ID - */ + // Return the character set id public String getCharacterEncoding() { - return "Cp970"; - } - - /** - * private function to add the bytes to the output buffer - */ - private void bytesToBuffer(byte[] theBytes, byte[] output, int outEnd) - throws ConversionBufferFullException, - UnknownCharacterException { - - int spaceNeeded; - - // ensure sufficient space for the bytes(s) - - if (theBytes[0] == 0x00) - spaceNeeded = 1; - else - spaceNeeded = 2; - - if (byteOff + spaceNeeded > outEnd) - throw new ConversionBufferFullException(); - - // move the data into the buffer - - if (spaceNeeded == 1) - output[byteOff++] = theBytes[1]; - else { - output[byteOff++] = theBytes[0]; - output[byteOff++] = theBytes[1]; - } - + return "Cp970"; } - /** - * private function to add a unicode character to the output buffer - */ - private void unicodeToBuffer(char unicode, byte[] output, int outEnd) - throws ConversionBufferFullException, - UnknownCharacterException { - - int index; - int theBytes; - - // first we convert the unicode to its byte representation - - index = index1[((unicode & mask1) >> shift)] + (unicode & mask2); - if (index < 15000) { - theBytes = (int)(index2.charAt(index)); - } else { - theBytes = (int)(index2a.charAt(index-15000)); - } - outputByte[0] = (byte)((theBytes & 0x0000ff00)>>8); - outputByte[1] = (byte)(theBytes & 0x000000ff); - - // if the unicode was not mappable - look for the substitution bytes - - if (outputByte[0] == 0x00 && outputByte[1] == 0x00 - && unicode != '\u0000') { - if (subMode) { - if (subBytes.length == 1) { - outputByte[0] = 0x00; - outputByte[1] = subBytes[0]; - } else { - outputByte[0] = subBytes[0]; - outputByte[1] = subBytes[1]; - } - } else { - badInputLength = 1; - throw new UnknownCharacterException(); - } - } - - // now put the bytes in the buffer - - bytesToBuffer(outputByte, output, outEnd); - + public CharToByteCp970() { + super((DoubleByte.Encoder)new IBM970().newEncoder()); } - } diff --git a/src/share/classes/sun/io/CharToByteDBCS_ASCII.java b/src/share/classes/sun/io/CharToByteDBCS_ASCII.java index e58267bb3..f1c9f38ff 100644 --- a/src/share/classes/sun/io/CharToByteDBCS_ASCII.java +++ b/src/share/classes/sun/io/CharToByteDBCS_ASCII.java @@ -24,18 +24,26 @@ */ package sun.io; +import sun.nio.cs.Surrogate; +import sun.nio.cs.ext.DoubleByte; +import static sun.nio.cs.CharsetMapping.*; + public abstract class CharToByteDBCS_ASCII extends CharToByteConverter { private char highHalfZoneCode; private byte[] outputByte = new byte[2]; - protected short index1[]; - protected String index2; - protected String index2a; - protected int mask1; - protected int mask2; - protected int shift; + private DoubleByte.Encoder enc; + + public CharToByteDBCS_ASCII(DoubleByte.Encoder enc) { + super(); + this.enc = enc; + } + + int encodeChar(char c) { + return enc.encodeChar(c); + } /** * flush out any residual data and reset the buffer state @@ -69,112 +77,97 @@ public abstract class CharToByteDBCS_ASCII extends CharToByteConverter charOff = inOff; while(charOff < inEnd) { - - int index; - int theBytes; - int spaceNeeded; - - if (highHalfZoneCode == 0) { - inputChar = input[charOff]; - inputSize = 1; - } else { - inputChar = highHalfZoneCode; - inputSize = 0; - highHalfZoneCode = 0; - } - - - // Is this a high surrogate? - if(inputChar >= '\ud800' && inputChar <= '\udbff') { - // Is this the last character of the input? - if (charOff + inputSize >= inEnd) { - highHalfZoneCode = inputChar; - charOff += inputSize; - break; - } - - // Is there a low surrogate following? - inputChar = input[charOff + inputSize]; - if (inputChar >= '\udc00' && inputChar <= '\udfff') { - - // We have a valid surrogate pair. Too bad we don't do - // surrogates. Is substitution enabled? - if (subMode) { - if (subBytes.length == 1) { - outputByte[0] = 0x00; - outputByte[1] = subBytes[0]; - } - else { - outputByte[0] = subBytes[0]; - outputByte[1] = subBytes[1]; + int index; + int theBytes; + int spaceNeeded; + + if (highHalfZoneCode == 0) { + inputChar = input[charOff]; + inputSize = 1; + } else { + inputChar = highHalfZoneCode; + inputSize = 0; + highHalfZoneCode = 0; + } + + // Is this a high surrogate? + if (Surrogate.isHigh(inputChar)) { + // Is this the last character of the input? + if (charOff + inputSize >= inEnd) { + highHalfZoneCode = inputChar; + charOff += inputSize; + break; + } + + // Is there a low surrogate following? + inputChar = input[charOff + inputSize]; + if (Surrogate.isLow(inputChar)) { + // We have a valid surrogate pair. Too bad we don't do + // surrogates. Is substitution enabled? + if (subMode) { + if (subBytes.length == 1) { + outputByte[0] = 0x00; + outputByte[1] = subBytes[0]; + } + else { + outputByte[0] = subBytes[0]; + outputByte[1] = subBytes[1]; + } + inputSize++; + } else { + badInputLength = 2; + throw new UnknownCharacterException(); } - - inputSize++; - } else { - badInputLength = 2; - throw new UnknownCharacterException(); - } - } else { - - // We have a malformed surrogate pair - badInputLength = 1; - throw new MalformedInputException(); - } - } - - // Is this an unaccompanied low surrogate? - else - if (inputChar >= '\uDC00' && inputChar <= '\uDFFF') { - badInputLength = 1; - throw new MalformedInputException(); - } else { - - // We have a valid character, get the bytes for it - index = index1[((inputChar & mask1) >> shift)] + (inputChar & mask2); - if (index < 15000) - theBytes = (int)(index2.charAt(index)); - else - theBytes = (int)(index2a.charAt(index-15000)); - outputByte[0] = (byte)((theBytes & 0x0000ff00)>>8); - outputByte[1] = (byte)(theBytes & 0x000000ff); - } - - // if there was no mapping - look for substitution characters - if (outputByte[0] == 0x00 && outputByte[1] == 0x00 - && inputChar != '\u0000') - { - if (subMode) { - if (subBytes.length == 1) { - outputByte[0] = 0x00; - outputByte[1] = subBytes[0]; } else { - outputByte[0] = subBytes[0]; - outputByte[1] = subBytes[1]; + // We have a malformed surrogate pair + badInputLength = 1; + throw new MalformedInputException(); } - } else { + } + // Is this an unaccompanied low surrogate? + else if (Surrogate.isLow(inputChar)) { badInputLength = 1; - throw new UnknownCharacterException(); - } - } - - if (outputByte[0] == 0x00) - spaceNeeded = 1; - else - spaceNeeded = 2; - - if (byteOff + spaceNeeded > outEnd) - throw new ConversionBufferFullException(); - - if (spaceNeeded == 1) - output[byteOff++] = outputByte[1]; - else { - output[byteOff++] = outputByte[0]; - output[byteOff++] = outputByte[1]; - } - - charOff += inputSize; + throw new MalformedInputException(); + } else { + + // We have a valid character, get the bytes for it + theBytes = encodeChar(inputChar); + if (theBytes == UNMAPPABLE_ENCODING) { + // if there was no mapping - look for substitution characters + if (subMode) { + if (subBytes.length == 1) { + outputByte[0] = 0x00; + outputByte[1] = subBytes[0]; + } else { + outputByte[0] = subBytes[0]; + outputByte[1] = subBytes[1]; + } + } else { + badInputLength = 1; + throw new UnknownCharacterException(); + } + } else { + outputByte[0] = (byte)(theBytes >>8); + outputByte[1] = (byte)theBytes; + } + } + if (outputByte[0] == 0x00) + spaceNeeded = 1; + else + spaceNeeded = 2; + + if (byteOff + spaceNeeded > outEnd) + throw new ConversionBufferFullException(); + + if (spaceNeeded == 1) + output[byteOff++] = outputByte[1]; + else { + output[byteOff++] = outputByte[0]; + output[byteOff++] = outputByte[1]; + } + + charOff += inputSize; } - return byteOff - outOff; } @@ -193,28 +186,11 @@ public abstract class CharToByteDBCS_ASCII extends CharToByteConverter return 2; } - /** * Returns true if the given character can be converted to the * target character encoding. */ - public boolean canConvert(char ch) { - int index; - int theBytes; - - index = index1[((ch & mask1) >> shift)] + (ch & mask2); - if (index < 15000) - theBytes = (int)(index2.charAt(index)); - else - theBytes = (int)(index2a.charAt(index-15000)); - - if (theBytes != 0) - return (true); - - // only return true if input char was unicode null - all others are - // undefined - return( ch == '\u0000'); - + public boolean canConvert(char c) { + return encodeChar(c) != UNMAPPABLE_ENCODING; } - } diff --git a/src/share/classes/sun/io/CharToByteDBCS_EBCDIC.java b/src/share/classes/sun/io/CharToByteDBCS_EBCDIC.java index 59dddef37..a8d958e1d 100644 --- a/src/share/classes/sun/io/CharToByteDBCS_EBCDIC.java +++ b/src/share/classes/sun/io/CharToByteDBCS_EBCDIC.java @@ -24,9 +24,12 @@ */ package sun.io; +import sun.nio.cs.Surrogate; +import sun.nio.cs.ext.DoubleByte; +import static sun.nio.cs.CharsetMapping.*; + public abstract class CharToByteDBCS_EBCDIC extends CharToByteConverter { - private static final int SBCS = 0; private static final int DBCS = 1; @@ -37,18 +40,17 @@ public abstract class CharToByteDBCS_EBCDIC extends CharToByteConverter private char highHalfZoneCode; private byte[] outputByte = new byte[2]; - protected short index1[]; - protected String index2; - protected String index2a; - protected int mask1; - protected int mask2; - protected int shift; + private DoubleByte.Encoder enc; - - public CharToByteDBCS_EBCDIC() { + public CharToByteDBCS_EBCDIC(DoubleByte.Encoder enc) { super(); highHalfZoneCode = 0; currentState = SBCS; + this.enc = enc; + } + + int encodeChar(char c) { + return enc.encodeChar(c); } /** @@ -106,7 +108,7 @@ public abstract class CharToByteDBCS_EBCDIC extends CharToByteConverter } // Is this a high surrogate? - if(inputChar >= '\ud800' && inputChar <= '\udbff') { + if (Surrogate.isHigh(inputChar)) { // Is this the last character of the input? if (charOff + inputSize >= inEnd) { highHalfZoneCode = inputChar; @@ -116,8 +118,7 @@ public abstract class CharToByteDBCS_EBCDIC extends CharToByteConverter // Is there a low surrogate following? inputChar = input[charOff + inputSize]; - if (inputChar >= '\udc00' && inputChar <= '\udfff') { - + if (Surrogate.isLow(inputChar)) { // We have a valid surrogate pair. Too bad we don't do // surrogates. Is substitution enabled? if (subMode) { @@ -129,58 +130,45 @@ public abstract class CharToByteDBCS_EBCDIC extends CharToByteConverter outputByte[0] = subBytes[0]; outputByte[1] = subBytes[1]; } - inputSize++; } else { badInputLength = 2; throw new UnknownCharacterException(); } } else { - // We have a malformed surrogate pair badInputLength = 1; throw new MalformedInputException(); } } - // Is this an unaccompanied low surrogate? - else - if (inputChar >= '\uDC00' && inputChar <= '\uDFFF') { - badInputLength = 1; - throw new MalformedInputException(); - } else { - - // We have a valid character, get the bytes for it - index = index1[((inputChar & mask1) >> shift)] + (inputChar & mask2); -//System.out.println("Index for U+" + Integer.toHexString(inputChar) + " = " + index); // for debugging purposes - if (index < 15000) - theBytes = (int)(index2.charAt(index)); - else - theBytes = (int)(index2a.charAt(index-15000)); - outputByte[0] = (byte)((theBytes & 0x0000ff00)>>8); - outputByte[1] = (byte)(theBytes & 0x000000ff); - } - + else if (Surrogate.isLow(inputChar)) { + badInputLength = 1; + throw new MalformedInputException(); + } else { - // if there was no mapping - look for substitution characters - if (outputByte[0] == 0x00 && outputByte[1] == 0x00 - && inputChar != '\u0000') - { - if (subMode) { - if (subBytes.length == 1) { - outputByte[0] = 0x00; - outputByte[1] = subBytes[0]; - } else { - outputByte[0] = subBytes[0]; - outputByte[1] = subBytes[1]; - } - } else { - badInputLength = 1; - throw new UnknownCharacterException(); - } + // We have a valid character, get the bytes for it + theBytes = encodeChar(inputChar); + if (theBytes == UNMAPPABLE_ENCODING) { + // if there was no mapping - look for substitution characters + if (subMode) { + if (subBytes.length == 1) { + outputByte[0] = 0x00; + outputByte[1] = subBytes[0]; + } else { + outputByte[0] = subBytes[0]; + outputByte[1] = subBytes[1]; + } + } else { + badInputLength = 1; + throw new UnknownCharacterException(); + } + } else { + outputByte[0] = (byte)((theBytes & 0x0000ff00)>>8); + outputByte[1] = (byte)(theBytes & 0x000000ff); + } } - //Set the output buffer into the correct state if (currentState == DBCS && outputByte[0] == 0x00) { @@ -215,7 +203,6 @@ public abstract class CharToByteDBCS_EBCDIC extends CharToByteConverter charOff += inputSize; } - return byteOff - outOff; } @@ -257,28 +244,11 @@ public abstract class CharToByteDBCS_EBCDIC extends CharToByteConverter } - /** * Returns true if the given character can be converted to the * target character encoding. */ - public boolean canConvert(char ch) { - int index; - int theBytes; - - index = index1[((ch & mask1) >> shift)] + (ch & mask2); - if (index < 15000) - theBytes = (int)(index2.charAt(index)); - else - theBytes = (int)(index2a.charAt(index-15000)); - - if (theBytes != 0) - return (true); - - // only return true if input char was unicode null - all others are - // undefined - return( ch == '\u0000'); - + public boolean canConvert(char c) { + return encodeChar(c) != UNMAPPABLE_ENCODING; } - } |