aboutsummaryrefslogtreecommitdiff
path: root/libcpp
diff options
context:
space:
mode:
authorjason <jason@138bc75d-0d04-0410-961f-82ee72b054a4>2008-04-18 13:58:08 +0000
committerjason <jason@138bc75d-0d04-0410-961f-82ee72b054a4>2008-04-18 13:58:08 +0000
commit924bbf0237ffce7bd66e35cba2afbbe96bea0202 (patch)
tree7a2e1b1d5ba3460de2699e7cd1bc2b1739fc3119 /libcpp
parent83a50aef528bed2048382a81f25ae05a71378794 (diff)
libcpp/ChangeLog:
2008-04-14 Kris Van Hees <kris.van.hees@oracle.com> * include/cpp-id-data.h (UC): Was U, conflicts with U... literal. * include/cpplib.h (CHAR16, CHAR32, STRING16, STRING32): New tokens. (struct cpp_options): Added uliterals. (cpp_interpret_string): Update prototype. (cpp_interpret_string_notranslate): Idem. * charset.c (init_iconv_desc): New width member in cset_converter. (cpp_init_iconv): Add support for char{16,32}_cset_desc. (convert_ucn): Idem. (emit_numeric_escape): Idem. (convert_hex): Idem. (convert_oct): Idem. (convert_escape): Idem. (converter_for_type): New function. (cpp_interpret_string): Use converter_for_type, support u and U prefix. (cpp_interpret_string_notranslate): Match changed prototype. (wide_str_to_charconst): Use converter_for_type. (cpp_interpret_charconst): Add support for CPP_CHAR{16,32}. * directives.c (linemarker_dir): Macro U changed to UC. (parse_include): Idem. (register_pragma_1): Idem. (restore_registered_pragmas): Idem. (get__Pragma_string): Support CPP_STRING{16,32}. * expr.c (eval_token): Support CPP_CHAR{16,32}. * init.c (struct lang_flags): Added uliterals. (lang_defaults): Idem. * internal.h (struct cset_converter) <width>: New field. (struct cpp_reader) <char16_cset_desc>: Idem. (struct cpp_reader) <char32_cset_desc>: Idem. * lex.c (digraph_spellings): Macro U changed to UC. (OP, TK): Idem. (lex_string): Add support for u'...', U'...', u... and U.... (_cpp_lex_direct): Idem. * macro.c (_cpp_builtin_macro_text): Macro U changed to UC. (stringify_arg): Support CPP_CHAR{16,32} and CPP_STRING{16,32}. gcc/ChangeLog: 2008-04-14 Kris Van Hees <kris.van.hees@oracle.com> * c-common.c (CHAR16_TYPE, CHAR32_TYPE): New macros. (fname_as_string): Match updated cpp_interpret_string prototype. (fix_string_type): Support char16_t* and char32_t*. (c_common_nodes_and_builtins): Add char16_t and char32_t (and derivative) nodes. Register as builtin if C++0x. (c_parse_error): Support CPP_CHAR{16,32}. * c-common.h (RID_CHAR16, RID_CHAR32): New elements. (enum c_tree_index) <CTI_CHAR16_TYPE, CTI_SIGNED_CHAR16_TYPE, CTI_UNSIGNED_CHAR16_TYPE, CTI_CHAR32_TYPE, CTI_SIGNED_CHAR32_TYPE, CTI_UNSIGNED_CHAR32_TYPE, CTI_CHAR16_ARRAY_TYPE, CTI_CHAR32_ARRAY_TYPE>: New elements. (char16_type_node, signed_char16_type_node, unsigned_char16_type_node, char32_type_node, signed_char32_type_node, char16_array_type_node, char32_array_type_node): New defines. * c-lex.c (cb_ident): Match updated cpp_interpret_string prototype. (c_lex_with_flags): Support CPP_CHAR{16,32} and CPP_STRING{16,32}. (lex_string): Support CPP_STRING{16,32}, match updated cpp_interpret_string and cpp_interpret_string_notranslate prototypes. (lex_charconst): Support CPP_CHAR{16,32}. * c-parser.c (c_parser_postfix_expression): Support CPP_CHAR{16,32} and CPP_STRING{16,32}. gcc/cp/ChangeLog: 2008-04-14 Kris Van Hees <kris.van.hees@oracle.com> * cvt.c (type_promotes_to): Support char16_t and char32_t. * decl.c (grokdeclarator): Disallow signed/unsigned/short/long on char16_t and char32_t. * lex.c (reswords): Add char16_t and char32_t (for c++0x). * mangle.c (write_builtin_type): Mangle char16_t/char32_t as vendor extended builtin type u8char32_t. * parser.c (cp_lexer_next_token_is_decl_specifier_keyword): Support RID_CHAR{16,32}. (cp_lexer_print_token): Support CPP_STRING{16,32}. (cp_parser_is_string_literal): Idem. (cp_parser_string_literal): Idem. (cp_parser_primary_expression): Support CPP_CHAR{16,32} and CPP_STRING{16,32}. (cp_parser_simple_type_specifier): Support RID_CHAR{16,32}. * tree.c (char_type_p): Support char16_t and char32_t as char types. * typeck.c (string_conv_p): Support char16_t and char32_t. gcc/testsuite/ChangeLog: 2008-04-14 Kris Van Hees <kris.van.hees@oracle.com> Tests for char16_t and char32_t support. * g++.dg/ext/utf-cvt.C: New * g++.dg/ext/utf-cxx0x.C: New * g++.dg/ext/utf-cxx98.C: New * g++.dg/ext/utf-dflt.C: New * g++.dg/ext/utf-gnuxx0x.C: New * g++.dg/ext/utf-gnuxx98.C: New * g++.dg/ext/utf-mangle.C: New * g++.dg/ext/utf-typedef-cxx0x.C: New * g++.dg/ext/utf-typedef- * g++.dg/ext/utf-typespec.C: New * g++.dg/ext/utf16-1.C: New * g++.dg/ext/utf16-2.C: New * g++.dg/ext/utf16-3.C: New * g++.dg/ext/utf16-4.C: New * g++.dg/ext/utf32-1.C: New * g++.dg/ext/utf32-2.C: New * g++.dg/ext/utf32-3.C: New * g++.dg/ext/utf32-4.C: New * gcc.dg/utf-cvt.c: New * gcc.dg/utf-dflt.c: New * gcc.dg/utf16-1.c: New * gcc.dg/utf16-2.c: New * gcc.dg/utf16-3.c: New * gcc.dg/utf16-4.c: New * gcc.dg/utf32-1.c: New * gcc.dg/utf32-2.c: New * gcc.dg/utf32-3.c: New * gcc.dg/utf32-4.c: New libiberty/ChangeLog: 2008-04-14 Kris Van Hees <kris.van.hees@oracle.com> * testsuite/demangle-expected: Added tests for char16_t and char32_t. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@134438 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'libcpp')
-rw-r--r--libcpp/ChangeLog37
-rw-r--r--libcpp/charset.c115
-rw-r--r--libcpp/directives.c13
-rw-r--r--libcpp/expr.c4
-rw-r--r--libcpp/include/cpp-id-data.h2
-rw-r--r--libcpp/include/cpplib.h11
-rw-r--r--libcpp/init.c24
-rw-r--r--libcpp/internal.h9
-rw-r--r--libcpp/lex.c37
-rw-r--r--libcpp/macro.c12
10 files changed, 187 insertions, 77 deletions
diff --git a/libcpp/ChangeLog b/libcpp/ChangeLog
index 0bd2aad74c5..9eef6efb3e9 100644
--- a/libcpp/ChangeLog
+++ b/libcpp/ChangeLog
@@ -1,3 +1,40 @@
+2008-04-18 Kris Van Hees <kris.van.hees@oracle.com>
+
+ * include/cpp-id-data.h (UC): Was U, conflicts with U"..." literal.
+ * include/cpplib.h (CHAR16, CHAR32, STRING16, STRING32): New tokens.
+ (struct cpp_options): Added uliterals.
+ (cpp_interpret_string): Update prototype.
+ (cpp_interpret_string_notranslate): Idem.
+ * charset.c (init_iconv_desc): New width member in cset_converter.
+ (cpp_init_iconv): Add support for char{16,32}_cset_desc.
+ (convert_ucn): Idem.
+ (emit_numeric_escape): Idem.
+ (convert_hex): Idem.
+ (convert_oct): Idem.
+ (convert_escape): Idem.
+ (converter_for_type): New function.
+ (cpp_interpret_string): Use converter_for_type, support u and U prefix.
+ (cpp_interpret_string_notranslate): Match changed prototype.
+ (wide_str_to_charconst): Use converter_for_type.
+ (cpp_interpret_charconst): Add support for CPP_CHAR{16,32}.
+ * directives.c (linemarker_dir): Macro U changed to UC.
+ (parse_include): Idem.
+ (register_pragma_1): Idem.
+ (restore_registered_pragmas): Idem.
+ (get__Pragma_string): Support CPP_STRING{16,32}.
+ * expr.c (eval_token): Support CPP_CHAR{16,32}.
+ * init.c (struct lang_flags): Added uliterals.
+ (lang_defaults): Idem.
+ * internal.h (struct cset_converter) <width>: New field.
+ (struct cpp_reader) <char16_cset_desc>: Idem.
+ (struct cpp_reader) <char32_cset_desc>: Idem.
+ * lex.c (digraph_spellings): Macro U changed to UC.
+ (OP, TK): Idem.
+ (lex_string): Add support for u'...', U'...', u"..." and U"...".
+ (_cpp_lex_direct): Idem.
+ * macro.c (_cpp_builtin_macro_text): Macro U changed to UC.
+ (stringify_arg): Support CPP_CHAR{16,32} and CPP_STRING{16,32}.
+
2008-04-18 Paolo Bonzini <bonzini@gnu.org>
PR bootstrap/35457
diff --git a/libcpp/charset.c b/libcpp/charset.c
index 5db8fc13430..225cdb4915e 100644
--- a/libcpp/charset.c
+++ b/libcpp/charset.c
@@ -642,6 +642,7 @@ init_iconv_desc (cpp_reader *pfile, const char *to, const char *from)
{
ret.func = convert_no_conversion;
ret.cd = (iconv_t) -1;
+ ret.width = -1;
return ret;
}
@@ -655,6 +656,7 @@ init_iconv_desc (cpp_reader *pfile, const char *to, const char *from)
{
ret.func = conversion_tab[i].func;
ret.cd = conversion_tab[i].fake_cd;
+ ret.width = -1;
return ret;
}
@@ -663,6 +665,7 @@ init_iconv_desc (cpp_reader *pfile, const char *to, const char *from)
{
ret.func = convert_using_iconv;
ret.cd = iconv_open (to, from);
+ ret.width = -1;
if (ret.cd == (iconv_t) -1)
{
@@ -683,6 +686,7 @@ init_iconv_desc (cpp_reader *pfile, const char *to, const char *from)
from, to);
ret.func = convert_no_conversion;
ret.cd = (iconv_t) -1;
+ ret.width = -1;
}
return ret;
}
@@ -716,7 +720,17 @@ cpp_init_iconv (cpp_reader *pfile)
wcset = default_wcset;
pfile->narrow_cset_desc = init_iconv_desc (pfile, ncset, SOURCE_CHARSET);
+ pfile->narrow_cset_desc.width = CPP_OPTION (pfile, char_precision);
+ pfile->char16_cset_desc = init_iconv_desc (pfile,
+ be ? "UTF-16BE" : "UTF-16LE",
+ SOURCE_CHARSET);
+ pfile->char16_cset_desc.width = 16;
+ pfile->char32_cset_desc = init_iconv_desc (pfile,
+ be ? "UTF-32BE" : "UTF-32LE",
+ SOURCE_CHARSET);
+ pfile->char32_cset_desc.width = 32;
pfile->wide_cset_desc = init_iconv_desc (pfile, wcset, SOURCE_CHARSET);
+ pfile->wide_cset_desc.width = CPP_OPTION (pfile, wchar_precision);
}
/* Destroy iconv(3) descriptors set up by cpp_init_iconv, if necessary. */
@@ -1051,15 +1065,13 @@ _cpp_valid_ucn (cpp_reader *pfile, const uchar **pstr,
An advanced pointer is returned. Issues all relevant diagnostics. */
static const uchar *
convert_ucn (cpp_reader *pfile, const uchar *from, const uchar *limit,
- struct _cpp_strbuf *tbuf, bool wide)
+ struct _cpp_strbuf *tbuf, struct cset_converter cvt)
{
cppchar_t ucn;
uchar buf[6];
uchar *bufp = buf;
size_t bytesleft = 6;
int rval;
- struct cset_converter cvt
- = wide ? pfile->wide_cset_desc : pfile->narrow_cset_desc;
struct normalize_state nst = INITIAL_NORMALIZE_STATE;
from++; /* Skip u/U. */
@@ -1086,14 +1098,15 @@ convert_ucn (cpp_reader *pfile, const uchar *from, const uchar *limit,
function issues no diagnostics and never fails. */
static void
emit_numeric_escape (cpp_reader *pfile, cppchar_t n,
- struct _cpp_strbuf *tbuf, bool wide)
+ struct _cpp_strbuf *tbuf, struct cset_converter cvt)
{
- if (wide)
+ size_t width = cvt.width;
+
+ if (width != CPP_OPTION (pfile, char_precision))
{
/* We have to render this into the target byte order, which may not
be our byte order. */
bool bigend = CPP_OPTION (pfile, bytes_big_endian);
- size_t width = CPP_OPTION (pfile, wchar_precision);
size_t cwidth = CPP_OPTION (pfile, char_precision);
size_t cmask = width_to_mask (cwidth);
size_t nbwc = width / cwidth;
@@ -1136,12 +1149,11 @@ emit_numeric_escape (cpp_reader *pfile, cppchar_t n,
number. You can, e.g. generate surrogate pairs this way. */
static const uchar *
convert_hex (cpp_reader *pfile, const uchar *from, const uchar *limit,
- struct _cpp_strbuf *tbuf, bool wide)
+ struct _cpp_strbuf *tbuf, struct cset_converter cvt)
{
cppchar_t c, n = 0, overflow = 0;
int digits_found = 0;
- size_t width = (wide ? CPP_OPTION (pfile, wchar_precision)
- : CPP_OPTION (pfile, char_precision));
+ size_t width = cvt.width;
size_t mask = width_to_mask (width);
if (CPP_WTRADITIONAL (pfile))
@@ -1174,7 +1186,7 @@ convert_hex (cpp_reader *pfile, const uchar *from, const uchar *limit,
n &= mask;
}
- emit_numeric_escape (pfile, n, tbuf, wide);
+ emit_numeric_escape (pfile, n, tbuf, cvt);
return from;
}
@@ -1187,12 +1199,11 @@ convert_hex (cpp_reader *pfile, const uchar *from, const uchar *limit,
number. */
static const uchar *
convert_oct (cpp_reader *pfile, const uchar *from, const uchar *limit,
- struct _cpp_strbuf *tbuf, bool wide)
+ struct _cpp_strbuf *tbuf, struct cset_converter cvt)
{
size_t count = 0;
cppchar_t c, n = 0;
- size_t width = (wide ? CPP_OPTION (pfile, wchar_precision)
- : CPP_OPTION (pfile, char_precision));
+ size_t width = cvt.width;
size_t mask = width_to_mask (width);
bool overflow = false;
@@ -1213,7 +1224,7 @@ convert_oct (cpp_reader *pfile, const uchar *from, const uchar *limit,
n &= mask;
}
- emit_numeric_escape (pfile, n, tbuf, wide);
+ emit_numeric_escape (pfile, n, tbuf, cvt);
return from;
}
@@ -1224,7 +1235,7 @@ convert_oct (cpp_reader *pfile, const uchar *from, const uchar *limit,
pointer. Handles all relevant diagnostics. */
static const uchar *
convert_escape (cpp_reader *pfile, const uchar *from, const uchar *limit,
- struct _cpp_strbuf *tbuf, bool wide)
+ struct _cpp_strbuf *tbuf, struct cset_converter cvt)
{
/* Values of \a \b \e \f \n \r \t \v respectively. */
#if HOST_CHARSET == HOST_CHARSET_ASCII
@@ -1236,23 +1247,21 @@ convert_escape (cpp_reader *pfile, const uchar *from, const uchar *limit,
#endif
uchar c;
- struct cset_converter cvt
- = wide ? pfile->wide_cset_desc : pfile->narrow_cset_desc;
c = *from;
switch (c)
{
/* UCNs, hex escapes, and octal escapes are processed separately. */
case 'u': case 'U':
- return convert_ucn (pfile, from, limit, tbuf, wide);
+ return convert_ucn (pfile, from, limit, tbuf, cvt);
case 'x':
- return convert_hex (pfile, from, limit, tbuf, wide);
+ return convert_hex (pfile, from, limit, tbuf, cvt);
break;
case '0': case '1': case '2': case '3':
case '4': case '5': case '6': case '7':
- return convert_oct (pfile, from, limit, tbuf, wide);
+ return convert_oct (pfile, from, limit, tbuf, cvt);
/* Various letter escapes. Get the appropriate host-charset
value into C. */
@@ -1312,6 +1321,27 @@ convert_escape (cpp_reader *pfile, const uchar *from, const uchar *limit,
return from + 1;
}
+/* TYPE is a token type. The return value is the conversion needed to
+ convert from source to execution character set for the given type. */
+static struct cset_converter
+converter_for_type (cpp_reader *pfile, enum cpp_ttype type)
+{
+ switch (type)
+ {
+ default:
+ return pfile->narrow_cset_desc;
+ case CPP_CHAR16:
+ case CPP_STRING16:
+ return pfile->char16_cset_desc;
+ case CPP_CHAR32:
+ case CPP_STRING32:
+ return pfile->char32_cset_desc;
+ case CPP_WCHAR:
+ case CPP_WSTRING:
+ return pfile->wide_cset_desc;
+ }
+}
+
/* FROM is an array of cpp_string structures of length COUNT. These
are to be converted from the source to the execution character set,
escape sequences translated, and finally all are to be
@@ -1320,13 +1350,12 @@ convert_escape (cpp_reader *pfile, const uchar *from, const uchar *limit,
false for failure. */
bool
cpp_interpret_string (cpp_reader *pfile, const cpp_string *from, size_t count,
- cpp_string *to, bool wide)
+ cpp_string *to, enum cpp_ttype type)
{
struct _cpp_strbuf tbuf;
const uchar *p, *base, *limit;
size_t i;
- struct cset_converter cvt
- = wide ? pfile->wide_cset_desc : pfile->narrow_cset_desc;
+ struct cset_converter cvt = converter_for_type (pfile, type);
tbuf.asize = MAX (OUTBUF_BLOCK_SIZE, from->len);
tbuf.text = XNEWVEC (uchar, tbuf.asize);
@@ -1335,7 +1364,7 @@ cpp_interpret_string (cpp_reader *pfile, const cpp_string *from, size_t count,
for (i = 0; i < count; i++)
{
p = from[i].text;
- if (*p == 'L') p++;
+ if (*p == 'L' || *p == 'u' || *p == 'U') p++;
p++; /* Skip leading quote. */
limit = from[i].text + from[i].len - 1; /* Skip trailing quote. */
@@ -1354,12 +1383,12 @@ cpp_interpret_string (cpp_reader *pfile, const cpp_string *from, size_t count,
if (p == limit)
break;
- p = convert_escape (pfile, p + 1, limit, &tbuf, wide);
+ p = convert_escape (pfile, p + 1, limit, &tbuf, cvt);
}
}
/* NUL-terminate the 'to' buffer and translate it to a cpp_string
structure. */
- emit_numeric_escape (pfile, 0, &tbuf, wide);
+ emit_numeric_escape (pfile, 0, &tbuf, cvt);
tbuf.text = XRESIZEVEC (uchar, tbuf.text, tbuf.len);
to->text = tbuf.text;
to->len = tbuf.len;
@@ -1375,7 +1404,8 @@ cpp_interpret_string (cpp_reader *pfile, const cpp_string *from, size_t count,
in a string, but do not perform character set conversion. */
bool
cpp_interpret_string_notranslate (cpp_reader *pfile, const cpp_string *from,
- size_t count, cpp_string *to, bool wide)
+ size_t count, cpp_string *to,
+ enum cpp_ttype type ATTRIBUTE_UNUSED)
{
struct cset_converter save_narrow_cset_desc = pfile->narrow_cset_desc;
bool retval;
@@ -1383,7 +1413,7 @@ cpp_interpret_string_notranslate (cpp_reader *pfile, const cpp_string *from,
pfile->narrow_cset_desc.func = convert_no_conversion;
pfile->narrow_cset_desc.cd = (iconv_t) -1;
- retval = cpp_interpret_string (pfile, from, count, to, wide);
+ retval = cpp_interpret_string (pfile, from, count, to, CPP_STRING);
pfile->narrow_cset_desc = save_narrow_cset_desc;
return retval;
@@ -1462,13 +1492,14 @@ narrow_str_to_charconst (cpp_reader *pfile, cpp_string str,
/* Subroutine of cpp_interpret_charconst which performs the conversion
to a number, for wide strings. STR is the string structure returned
by cpp_interpret_string. PCHARS_SEEN and UNSIGNEDP are as for
- cpp_interpret_charconst. */
+ cpp_interpret_charconst. TYPE is the token type. */
static cppchar_t
wide_str_to_charconst (cpp_reader *pfile, cpp_string str,
- unsigned int *pchars_seen, int *unsignedp)
+ unsigned int *pchars_seen, int *unsignedp,
+ enum cpp_ttype type)
{
bool bigend = CPP_OPTION (pfile, bytes_big_endian);
- size_t width = CPP_OPTION (pfile, wchar_precision);
+ size_t width = converter_for_type (pfile, type).width;
size_t cwidth = CPP_OPTION (pfile, char_precision);
size_t mask = width_to_mask (width);
size_t cmask = width_to_mask (cwidth);
@@ -1490,7 +1521,7 @@ wide_str_to_charconst (cpp_reader *pfile, cpp_string str,
/* Wide character constants have type wchar_t, and a single
character exactly fills a wchar_t, so a multi-character wide
character constant is guaranteed to overflow. */
- if (off > 0)
+ if (str.len > nbwc * 2)
cpp_error (pfile, CPP_DL_WARNING,
"character constant too long for its type");
@@ -1498,13 +1529,20 @@ wide_str_to_charconst (cpp_reader *pfile, cpp_string str,
sign- or zero-extend to the full width of cppchar_t. */
if (width < BITS_PER_CPPCHAR_T)
{
- if (CPP_OPTION (pfile, unsigned_wchar) || !(result & (1 << (width - 1))))
+ if (type == CPP_CHAR16 || type == CPP_CHAR32
+ || CPP_OPTION (pfile, unsigned_wchar)
+ || !(result & (1 << (width - 1))))
result &= mask;
else
result |= ~mask;
}
- *unsignedp = CPP_OPTION (pfile, unsigned_wchar);
+ if (type == CPP_CHAR16 || type == CPP_CHAR32
+ || CPP_OPTION (pfile, unsigned_wchar))
+ *unsignedp = 1;
+ else
+ *unsignedp = 0;
+
*pchars_seen = 1;
return result;
}
@@ -1518,20 +1556,21 @@ cpp_interpret_charconst (cpp_reader *pfile, const cpp_token *token,
unsigned int *pchars_seen, int *unsignedp)
{
cpp_string str = { 0, 0 };
- bool wide = (token->type == CPP_WCHAR);
+ bool wide = (token->type != CPP_CHAR);
cppchar_t result;
- /* an empty constant will appear as L'' or '' */
+ /* an empty constant will appear as L'', u'', U'' or '' */
if (token->val.str.len == (size_t) (2 + wide))
{
cpp_error (pfile, CPP_DL_ERROR, "empty character constant");
return 0;
}
- else if (!cpp_interpret_string (pfile, &token->val.str, 1, &str, wide))
+ else if (!cpp_interpret_string (pfile, &token->val.str, 1, &str, token->type))
return 0;
if (wide)
- result = wide_str_to_charconst (pfile, str, pchars_seen, unsignedp);
+ result = wide_str_to_charconst (pfile, str, pchars_seen, unsignedp,
+ token->type);
else
result = narrow_str_to_charconst (pfile, str, pchars_seen, unsignedp);
diff --git a/libcpp/directives.c b/libcpp/directives.c
index 0ca1117c19a..3478cd5047a 100644
--- a/libcpp/directives.c
+++ b/libcpp/directives.c
@@ -188,7 +188,7 @@ DIRECTIVE_TABLE
did use this notation in its preprocessed output. */
static const directive linemarker_dir =
{
- do_linemarker, U"#", 1, KANDR, IN_I
+ do_linemarker, UC"#", 1, KANDR, IN_I
};
#define SEEN_EOL() (pfile->cur_token[-1].type == CPP_EOF)
@@ -697,7 +697,7 @@ parse_include (cpp_reader *pfile, int *pangle_brackets,
const unsigned char *dir;
if (pfile->directive == &dtable[T_PRAGMA])
- dir = U"pragma dependency";
+ dir = UC"pragma dependency";
else
dir = pfile->directive->name;
cpp_error (pfile, CPP_DL_ERROR, "#%s expects \"FILENAME\" or <FILENAME>",
@@ -1085,7 +1085,7 @@ register_pragma_1 (cpp_reader *pfile, const char *space, const char *name,
if (space)
{
- node = cpp_lookup (pfile, U space, strlen (space));
+ node = cpp_lookup (pfile, UC space, strlen (space));
entry = lookup_pragma_entry (*chain, node);
if (!entry)
{
@@ -1114,7 +1114,7 @@ register_pragma_1 (cpp_reader *pfile, const char *space, const char *name,
}
/* Check for duplicates. */
- node = cpp_lookup (pfile, U name, strlen (name));
+ node = cpp_lookup (pfile, UC name, strlen (name));
entry = lookup_pragma_entry (*chain, node);
if (entry == NULL)
{
@@ -1262,7 +1262,7 @@ restore_registered_pragmas (cpp_reader *pfile, struct pragma_entry *pe,
{
if (pe->is_nspace)
sd = restore_registered_pragmas (pfile, pe->u.space, sd);
- pe->pragma = cpp_lookup (pfile, U *sd, strlen (*sd));
+ pe->pragma = cpp_lookup (pfile, UC *sd, strlen (*sd));
free (*sd);
sd++;
}
@@ -1491,7 +1491,8 @@ get__Pragma_string (cpp_reader *pfile)
string = get_token_no_padding (pfile);
if (string->type == CPP_EOF)
_cpp_backup_tokens (pfile, 1);
- if (string->type != CPP_STRING && string->type != CPP_WSTRING)
+ if (string->type != CPP_STRING && string->type != CPP_WSTRING
+ && string->type != CPP_STRING32 && string->type != CPP_STRING16)
return NULL;
paren = get_token_no_padding (pfile);
diff --git a/libcpp/expr.c b/libcpp/expr.c
index 9e89dd9574a..00149b2422d 100644
--- a/libcpp/expr.c
+++ b/libcpp/expr.c
@@ -705,6 +705,8 @@ eval_token (cpp_reader *pfile, const cpp_token *token)
case CPP_WCHAR:
case CPP_CHAR:
+ case CPP_CHAR16:
+ case CPP_CHAR32:
{
cppchar_t cc = cpp_interpret_charconst (pfile, token,
&temp, &unsignedp);
@@ -863,6 +865,8 @@ _cpp_parse_expr (cpp_reader *pfile)
case CPP_NUMBER:
case CPP_CHAR:
case CPP_WCHAR:
+ case CPP_CHAR16:
+ case CPP_CHAR32:
case CPP_NAME:
case CPP_HASH:
if (!want_value)
diff --git a/libcpp/include/cpp-id-data.h b/libcpp/include/cpp-id-data.h
index 2445186c228..db37c2beccc 100644
--- a/libcpp/include/cpp-id-data.h
+++ b/libcpp/include/cpp-id-data.h
@@ -22,7 +22,7 @@ Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
typedef unsigned char uchar;
#endif
-#define U (const unsigned char *) /* Intended use: U"string" */
+#define UC (const unsigned char *) /* Intended use: UC"string" */
/* Chained list of answers to an assertion. */
struct answer GTY(())
diff --git a/libcpp/include/cpplib.h b/libcpp/include/cpplib.h
index 84de0e09975..483c54331fb 100644
--- a/libcpp/include/cpplib.h
+++ b/libcpp/include/cpplib.h
@@ -123,10 +123,14 @@ struct _cpp_file;
\
TK(CHAR, LITERAL) /* 'char' */ \
TK(WCHAR, LITERAL) /* L'char' */ \
+ TK(CHAR16, LITERAL) /* u'char' */ \
+ TK(CHAR32, LITERAL) /* U'char' */ \
TK(OTHER, LITERAL) /* stray punctuation */ \
\
TK(STRING, LITERAL) /* "string" */ \
TK(WSTRING, LITERAL) /* L"string" */ \
+ TK(STRING16, LITERAL) /* u"string" */ \
+ TK(STRING32, LITERAL) /* U"string" */ \
TK(OBJC_STRING, LITERAL) /* @"string" - Objective-C */ \
TK(HEADER_NAME, LITERAL) /* <stdio.h> in #include */ \
\
@@ -291,6 +295,9 @@ struct cpp_options
/* Nonzero means to allow hexadecimal floats and LL suffixes. */
unsigned char extended_numbers;
+ /* Nonzero means process u/U prefix literals (UTF-16/32). */
+ unsigned char uliterals;
+
/* Nonzero means print names of header files (-H). */
unsigned char print_include_names;
@@ -712,10 +719,10 @@ extern cppchar_t cpp_interpret_charconst (cpp_reader *, const cpp_token *,
/* Evaluate a vector of CPP_STRING or CPP_WSTRING tokens. */
extern bool cpp_interpret_string (cpp_reader *,
const cpp_string *, size_t,
- cpp_string *, bool);
+ cpp_string *, enum cpp_ttype);
extern bool cpp_interpret_string_notranslate (cpp_reader *,
const cpp_string *, size_t,
- cpp_string *, bool);
+ cpp_string *, enum cpp_ttype);
/* Convert a host character constant to the execution character set. */
extern cppchar_t cpp_host_to_exec_charset (cpp_reader *, cppchar_t);
diff --git a/libcpp/init.c b/libcpp/init.c
index aa0c0b10e3d..040bf2a0489 100644
--- a/libcpp/init.c
+++ b/libcpp/init.c
@@ -76,20 +76,21 @@ struct lang_flags
char std;
char cplusplus_comments;
char digraphs;
+ char uliterals;
};
static const struct lang_flags lang_defaults[] =
-{ /* c99 c++ xnum xid std // digr */
- /* GNUC89 */ { 0, 0, 1, 0, 0, 1, 1 },
- /* GNUC99 */ { 1, 0, 1, 0, 0, 1, 1 },
- /* STDC89 */ { 0, 0, 0, 0, 1, 0, 0 },
- /* STDC94 */ { 0, 0, 0, 0, 1, 0, 1 },
- /* STDC99 */ { 1, 0, 1, 0, 1, 1, 1 },
- /* GNUCXX */ { 0, 1, 1, 0, 0, 1, 1 },
- /* CXX98 */ { 0, 1, 1, 0, 1, 1, 1 },
- /* GNUCXX0X */ { 1, 1, 1, 0, 0, 1, 1 },
- /* CXX0X */ { 1, 1, 1, 0, 1, 1, 1 },
- /* ASM */ { 0, 0, 1, 0, 0, 1, 0 }
+{ /* c99 c++ xnum xid std // digr ulit */
+ /* GNUC89 */ { 0, 0, 1, 0, 0, 1, 1, 0 },
+ /* GNUC99 */ { 1, 0, 1, 0, 0, 1, 1, 1 },
+ /* STDC89 */ { 0, 0, 0, 0, 1, 0, 0, 0 },
+ /* STDC94 */ { 0, 0, 0, 0, 1, 0, 1, 0 },
+ /* STDC99 */ { 1, 0, 1, 0, 1, 1, 1, 0 },
+ /* GNUCXX */ { 0, 1, 1, 0, 0, 1, 1, 0 },
+ /* CXX98 */ { 0, 1, 1, 0, 1, 1, 1, 0 },
+ /* GNUCXX0X */ { 1, 1, 1, 0, 0, 1, 1, 1 },
+ /* CXX0X */ { 1, 1, 1, 0, 1, 1, 1, 1 },
+ /* ASM */ { 0, 0, 1, 0, 0, 1, 0, 0 }
/* xid should be 1 for GNUC99, STDC99, GNUCXX, CXX98, GNUCXX0X, and
CXX0X when no longer experimental (when all uses of identifiers
in the compiler have been audited for correct handling of
@@ -112,6 +113,7 @@ cpp_set_lang (cpp_reader *pfile, enum c_lang lang)
CPP_OPTION (pfile, trigraphs) = l->std;
CPP_OPTION (pfile, cplusplus_comments) = l->cplusplus_comments;
CPP_OPTION (pfile, digraphs) = l->digraphs;
+ CPP_OPTION (pfile, uliterals) = l->uliterals;
}
/* Initialize library global state. */
diff --git a/libcpp/internal.h b/libcpp/internal.h
index 6110e5cdb08..bf6c5f8c8d2 100644
--- a/libcpp/internal.h
+++ b/libcpp/internal.h
@@ -48,6 +48,7 @@ struct cset_converter
{
convert_f func;
iconv_t cd;
+ int width;
};
#define BITS_PER_CPPCHAR_T (CHAR_BIT * sizeof (cppchar_t))
@@ -399,6 +400,14 @@ struct cpp_reader
struct cset_converter narrow_cset_desc;
/* Descriptor for converting from the source character set to the
+ UTF-16 execution character set. */
+ struct cset_converter char16_cset_desc;
+
+ /* Descriptor for converting from the source character set to the
+ UTF-32 execution character set. */
+ struct cset_converter char32_cset_desc;
+
+ /* Descriptor for converting from the source character set to the
wide execution character set. */
struct cset_converter wide_cset_desc;
diff --git a/libcpp/lex.c b/libcpp/lex.c
index 2eaf6105922..772a8701654 100644
--- a/libcpp/lex.c
+++ b/libcpp/lex.c
@@ -39,10 +39,10 @@ struct token_spelling
};
static const unsigned char *const digraph_spellings[] =
-{ U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
+{ UC"%:", UC"%:%:", UC"<:", UC":>", UC"<%", UC"%>" };
-#define OP(e, s) { SPELL_OPERATOR, U s },
-#define TK(e, s) { SPELL_ ## s, U #e },
+#define OP(e, s) { SPELL_OPERATOR, UC s },
+#define TK(e, s) { SPELL_ ## s, UC #e },
static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
#undef OP
#undef TK
@@ -611,8 +611,8 @@ create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
/* Lexes a string, character constant, or angle-bracketed header file
name. The stored string contains the spelling, including opening
- quote and leading any leading 'L'. It returns the type of the
- literal, or CPP_OTHER if it was not properly terminated.
+ quote and leading any leading 'L', 'u' or 'U'. It returns the type
+ of the literal, or CPP_OTHER if it was not properly terminated.
The spelling is NUL-terminated, but it is not guaranteed that this
is the first NUL since embedded NULs are preserved. */
@@ -626,12 +626,16 @@ lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
cur = base;
terminator = *cur++;
- if (terminator == 'L')
+ if (terminator == 'L' || terminator == 'u' || terminator == 'U')
terminator = *cur++;
if (terminator == '\"')
- type = *base == 'L' ? CPP_WSTRING: CPP_STRING;
+ type = (*base == 'L' ? CPP_WSTRING :
+ *base == 'U' ? CPP_STRING32 :
+ *base == 'u' ? CPP_STRING16 : CPP_STRING);
else if (terminator == '\'')
- type = *base == 'L' ? CPP_WCHAR: CPP_CHAR;
+ type = (*base == 'L' ? CPP_WCHAR :
+ *base == 'U' ? CPP_CHAR32 :
+ *base == 'u' ? CPP_CHAR16 : CPP_CHAR);
else
terminator = '>', type = CPP_HEADER_NAME;
@@ -965,11 +969,16 @@ _cpp_lex_direct (cpp_reader *pfile)
}
case 'L':
- /* 'L' may introduce wide characters or strings. */
- if (*buffer->cur == '\'' || *buffer->cur == '"')
+ case 'u':
+ case 'U':
+ /* 'L', 'u' or 'U' may introduce wide characters or strings. */
+ if (c == 'L' || CPP_OPTION (pfile, uliterals))
{
- lex_string (pfile, result, buffer->cur - 1);
- break;
+ if (*buffer->cur == '\'' || *buffer->cur == '"')
+ {
+ lex_string (pfile, result, buffer->cur - 1);
+ break;
+ }
}
/* Fall through. */
@@ -977,12 +986,12 @@ _cpp_lex_direct (cpp_reader *pfile)
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
- case 's': case 't': case 'u': case 'v': case 'w': case 'x':
+ case 's': case 't': case 'v': case 'w': case 'x':
case 'y': case 'z':
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
case 'G': case 'H': case 'I': case 'J': case 'K':
case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
- case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
+ case 'S': case 'T': case 'V': case 'W': case 'X':
case 'Y': case 'Z':
result->type = CPP_NAME;
{
diff --git a/libcpp/macro.c b/libcpp/macro.c
index 587b94814cc..016754bc952 100644
--- a/libcpp/macro.c
+++ b/libcpp/macro.c
@@ -158,7 +158,7 @@ _cpp_builtin_macro_text (cpp_reader *pfile, cpp_hashnode *node)
{
cpp_errno (pfile, CPP_DL_WARNING,
"could not determine file timestamp");
- pbuffer->timestamp = U"\"??? ??? ?? ??:??:?? ????\"";
+ pbuffer->timestamp = UC"\"??? ??? ?? ??:??:?? ????\"";
}
}
}
@@ -256,8 +256,8 @@ _cpp_builtin_macro_text (cpp_reader *pfile, cpp_hashnode *node)
cpp_errno (pfile, CPP_DL_WARNING,
"could not determine date and time");
- pfile->date = U"\"??? ?? ????\"";
- pfile->time = U"\"??:??:??\"";
+ pfile->date = UC"\"??? ?? ????\"";
+ pfile->time = UC"\"??:??:??\"";
}
}
@@ -375,8 +375,10 @@ stringify_arg (cpp_reader *pfile, macro_arg *arg)
continue;
}
- escape_it = (token->type == CPP_STRING || token->type == CPP_WSTRING
- || token->type == CPP_CHAR || token->type == CPP_WCHAR);
+ escape_it = (token->type == CPP_STRING || token->type == CPP_CHAR
+ || token->type == CPP_WSTRING || token->type == CPP_STRING
+ || token->type == CPP_STRING32 || token->type == CPP_CHAR32
+ || token->type == CPP_STRING16 || token->type == CPP_CHAR16);
/* Room for each char being written in octal, initial space and
final quote and NUL. */