diff options
-rw-r--r-- | libc/ChangeLog.eglibc | 68 | ||||
-rw-r--r-- | libc/debug/tst-chk1.c | 3 | ||||
-rw-r--r-- | libc/intl/Makefile | 2 | ||||
-rw-r--r-- | libc/libio/Makefile | 22 | ||||
-rw-r--r-- | libc/nptl/Makefile | 4 | ||||
-rw-r--r-- | libc/posix/Makefile | 15 | ||||
-rw-r--r-- | libc/posix/bug-regex6.c | 8 | ||||
-rw-r--r-- | libc/posix/fnmatch.c | 6 | ||||
-rw-r--r-- | libc/posix/fnmatch_loop.c | 17 | ||||
-rw-r--r-- | libc/posix/regcomp.c | 97 | ||||
-rw-r--r-- | libc/posix/regex_internal.c | 45 | ||||
-rw-r--r-- | libc/posix/regex_internal.h | 23 | ||||
-rw-r--r-- | libc/posix/regexec.c | 46 | ||||
-rw-r--r-- | libc/stdio-common/Makefile | 6 | ||||
-rw-r--r-- | libc/stdio-common/_i18n_number.h | 13 | ||||
-rw-r--r-- | libc/stdio-common/vfprintf.c | 18 | ||||
-rw-r--r-- | libc/stdio-common/vfscanf.c | 35 | ||||
-rw-r--r-- | libc/stdlib/Makefile | 6 | ||||
-rw-r--r-- | libc/stdlib/strtod_l.c | 13 | ||||
-rw-r--r-- | libc/stdlib/tst-strtod.c | 5 | ||||
-rw-r--r-- | libc/string/strcoll_l.c | 5 | ||||
-rw-r--r-- | libc/string/strxfrm_l.c | 5 | ||||
-rw-r--r-- | libc/string/tst-strxfrm.c | 3 | ||||
-rw-r--r-- | libc/string/tst-strxfrm2.c | 3 | ||||
-rw-r--r-- | libc/time/Makefile | 4 |
25 files changed, 351 insertions, 121 deletions
diff --git a/libc/ChangeLog.eglibc b/libc/ChangeLog.eglibc index db54a5eb1..a3879ac2e 100644 --- a/libc/ChangeLog.eglibc +++ b/libc/ChangeLog.eglibc @@ -1,5 +1,73 @@ 2007-12-11 Jim Blandy <jimb@codesourcery.com> + Clean up test results with OPTION_EGLIBC_LOCALE_CODE disabled. + * debug/tst-chk1.c: Omit locale tests when group is disabled. + * intl/Makefile (tests): Put tst-translit, tst-gettext2, + tst-codeset, and tst-gettext3 in the group. + * libio/Makefile (tests): Put tst-fgetws, tst-fopenloc, + tst-setvbuf1, tst-ungetwc1, tst-ungetwc2, bug-ftell, bug-ungetwc2, + tst-widetext, and tst-fopenloc in the group; some were formerly in + OPTION_POSIX_WIDE_CHAR_DEVICE_IO. + * nptl/Makefile (tests): Put tst-locale1 in the group. + * posix/Makefile (tests): Put bug-regex17, bug-regex18, + bug-regex20, bug-regex23, and bug-regex26 in the group. + (tst-rxspencer): Pass --utf8 only when the group is enabled. + * posix/bug-regex6.c: Omit non-C locale tests when group is disabled. + * stdio-common/Makefile (tests): Put bug14 and scanf13 in the group. + * stdlib/Makefile (tests): Put tst-strtod4, tst-strtod5, and + testmb2 in the group. + * stdlib/tst-strtod.c: Omit locale tests when group is disabled. + * string/tst-strxfrm.c, string/tst-strxfrm2.c: Same. + * time/Makefile (tests): Put tst-ftime_l in the group. + + Fix code broken by OPTION_EGLIBC_LOCALE_CODE. + * posix/regex_internal.h: #include <gnu/option-groups.h>. + (string_mb_cur_max, dfa_mb_cur_max): New macros for accessing the + 'mb_cur_max' fields of re_string_t and re_dfa_t, whose values can + be constant when the group is disabled. Use them throughout. + * posix/regex_internal.c: Use string_mb_cur_max and dfa_mb_cur_max + as appropriate. + * posix/regcomp.c: Same. + (re_compile_fastmap_iter): Process COMPLEX_BRACKET nodes only when + the group is enabled. + (init_dfa): When the group is disabled, clear map_notascii. + (parse_bracket_exp): Process MB_CHAR elements only when the group + is enabled. Otherwise, fix 'nrules' at zero, for the compiler's + benefit, and assume the collation sequence is the identity. + (parse_bracket_element): Create MB_CHAR elements only when the + group is enabled. + (build_equiv_class): When the group is disabled, we know there + will be no collation rules. + (build_charclass): When the group is disabled, do not try to + process references to wide character categories accessed via + 'wctype'. + * posix/regexec.c: Use string_mb_cur_max and dfa_mb_cur_max + as appropriate. + (find_collation_sequence_value): Define function only when the + group is enabled. + (check_node_accept_bytes): Check character against 'wctype' style + classes only if group is enabled. When the group is disabled, + Skip collation-rule-based matching. + * posix/fnmatch.c: #include <gnu/option-groups.h>. + Define HANDLE_MULTIBYTE only if when OPTION_EGLIBC_LOCALE_CODE is + enabled. + * posix/fnmatch_loop.c (FCT (internal_fnmatch or internal_fnwmatch)): + If the group is disabled, assume that the collation sequence is + the identity. + * stdio-common/_i18n_number.h (_i18n_number_rewrite): Provide only + a trivial definition when the group is disabled. + * stdio-common/vfprintf.c: #include <gnu/option-groups.h>. + (LOCALE_SUPPORT): Define. + (vfprintf): Consult it as appropriate. + * stdio-common/vfscanf.c: #include <gnu/option-groups.h>. + (_IO_vfwscanf): If the group is disabled, don't try to consult the + locale for decimal point and thousands separator characters, or + for custom digits. + * stdlib/strtod_l.c (__STRTOF_INTERNAL): Don't try to consult the + locale's numeric settings. + * string/strxfrm_l.c, string/strcoll_l.c: Don't try to consult the + locale's collation settings. + Fix testing with OPTION_EGLIBC_CATGETS disabled. * catgets/Makefile (tests): Put de/libc.cat, test1.cat, test2.cat, and test-gencat.out in the option group. diff --git a/libc/debug/tst-chk1.c b/libc/debug/tst-chk1.c index 487b07102..88e05da8b 100644 --- a/libc/debug/tst-chk1.c +++ b/libc/debug/tst-chk1.c @@ -30,6 +30,7 @@ #include <wchar.h> #include <sys/socket.h> #include <sys/un.h> +#include <gnu/option-groups.h> char *temp_filename; static void do_prepare (void); @@ -1173,6 +1174,7 @@ do_test (void) # endif #endif +#if __OPTION_EGLIBC_LOCALE_CODE if (setlocale (LC_ALL, "de_DE.UTF-8") != NULL) { assert (MB_CUR_MAX <= 10); @@ -1329,6 +1331,7 @@ do_test (void) puts ("cannot set locale"); ret = 1; } +#endif fd = posix_openpt (O_RDWR); if (fd != -1) diff --git a/libc/intl/Makefile b/libc/intl/Makefile index 0b895c91e..6aa1e6bea 100644 --- a/libc/intl/Makefile +++ b/libc/intl/Makefile @@ -63,7 +63,7 @@ $(objpfx)plural.o: plural.c include ../Rules # eglibc: ifeq (no,$(cross-compiling)) -ifeq (yyes,$(OPTION_EGLIBC_LOCALES)$(build-shared)) +ifeq (yyyes,$(OPTION_EGLIBC_LOCALES)$(OPTION_EGLIBC_LOCALE_CODE)$(build-shared)) ifneq ($(strip $(MSGFMT)),:) tests: $(objpfx)tst-translit.out $(objpfx)tst-gettext2.out \ $(objpfx)tst-codeset.out $(objpfx)tst-gettext3.out diff --git a/libc/libio/Makefile b/libc/libio/Makefile index e783968fc..1a4ac0bc2 100644 --- a/libc/libio/Makefile +++ b/libc/libio/Makefile @@ -65,12 +65,14 @@ tests = tst_swprintf tst_swscanf \ tst-memstream1 tst-memstream2 \ tst-wmemstream1 tst-wmemstream2 \ bug-memstream1 -tests-$(OPTION_EGLIBC_LOCALE_CODE) \ - += tst-swscanf -tests-$(OPTION_POSIX_WIDE_CHAR_DEVICE_IO) \ - += bug-ftell bug-rewind bug-rewind2 bug-ungetwc1 bug-ungetwc2 \ - bug-wfflush bug-wmemstream1 tst-fgetws tst-fopenloc tst-fopenloc2 \ - tst-setvbuf1 tst-ungetwc1 tst-ungetwc2 tst-widetext tst_getwc \ +tests-$(OPTION_EGLIBC_LOCALE_CODE) \ + += tst-swscanf tst-fgetws tst-fopenloc tst-setvbuf1 \ + tst-ungetwc1 tst-ungetwc2 bug-ftell bug-ungetwc2 \ + tst-widetext +tests-$(OPTION_POSIX_WIDE_CHAR_DEVICE_IO) \ + += bug-rewind bug-rewind2 bug-ungetwc1 \ + bug-wfflush bug-wmemstream1 tst-fopenloc2 \ + tst_getwc \ tst_putwc tst_wprintf tst_wprintf2 tst_wscanf test-srcs = test-freopen @@ -186,14 +188,14 @@ distribute := iolibio.h libioP.h strfile.h Banner test-freopen.sh \ include ../Rules ifeq (y,$(OPTION_POSIX_WIDE_CHAR_DEVICE_IO)) -# eglibc: ifeq (no,$(cross-compiling)) -tests: $(objpfx)test-freopen.out $(objpfx)tst-fopenloc.check -# eglibc: endif - +tests: $(objpfx)test-freopen.out $(objpfx)test-freopen.out: test-freopen.sh $(objpfx)test-freopen $(SHELL) -e $< $(common-objpfx) '$(run-program-prefix)' \ $(common-objpfx)libio/ +endif +ifeq (y,$(OPTION_EGLIBC_LOCALE_CODE)) +tests: $(objpfx)tst-fopenloc.check $(objpfx)tst-fopenloc.check: $(objpfx)tst-fopenloc.out cmp ../iconvdata/testdata/ISO-8859-1..UTF8 $(objpfx)tst-fopenloc.out \ > $@ diff --git a/libc/nptl/Makefile b/libc/nptl/Makefile index 546058977..4f8900679 100644 --- a/libc/nptl/Makefile +++ b/libc/nptl/Makefile @@ -251,7 +251,7 @@ tests = tst-typesizes \ tst-unload \ tst-dlsym1 \ tst-sysconf \ - tst-locale1 tst-locale2 \ + tst-locale2 \ tst-umask1 \ tst-popen1 \ tst-clock1 \ @@ -272,6 +272,8 @@ tests-$(OPTION_EGLIBC_BACKTRACE) += tst-backtrace1 # This test is written in C++. tests-$(OPTION_EGLIBC_CXX_TESTS) += tst-cancel24 +tests-$(OPTION_EGLIBC_LOCALE_CODE) += tst-locale1 + # Files which must not be linked with libpthread. tests-nolibpthread = tst-unload diff --git a/libc/posix/Makefile b/libc/posix/Makefile index 3f8049bc9..a8e3d2469 100644 --- a/libc/posix/Makefile +++ b/libc/posix/Makefile @@ -86,9 +86,8 @@ tests := tstgetopt testfnm runtests runptests \ tst-gnuglob bug-regex6 bug-regex7 \ bug-regex8 bug-regex9 bug-regex10 bug-regex11 bug-regex12 \ bug-regex13 bug-regex14 bug-regex15 bug-regex16 \ - bug-regex17 bug-regex18 bug-regex20 \ - bug-regex21 bug-regex23 bug-regex24 \ - bug-regex26 bug-regex27 bug-regex28 \ + bug-regex21 bug-regex24 \ + bug-regex27 bug-regex28 \ tst-nice tst-nanosleep tst-regex2 \ transbug tst-rxspencer tst-pcre tst-boost \ tst-vfork1 tst-vfork2 tst-vfork3 tst-waitid \ @@ -98,9 +97,10 @@ tests := tstgetopt testfnm runtests runptests \ tst-execve1 tst-execve2 tst-execle1 tst-execle2 \ tst-execvp3 tst-execvp4 \ tst-fnmatch2 tst-cpucount tst-cpuset -tests-$(OPTION_EGLIBC_LOCALE_CODE) \ +tests-$(OPTION_EGLIBC_LOCALE_CODE) \ += tst-fnmatch tst-regex tst-regexloc bug-regex1 bug-regex5 \ - bug-regex19 bug-regex22 bug-regex25 + bug-regex17 bug-regex18 bug-regex19 bug-regex20 \ + bug-regex22 bug-regex23 bug-regex25 bug-regex26 tests-$(OPTION_EGLIBC_INET) \ += tst-getaddrinfo bug-ga1 tst-getaddrinfo2 \ tst-rfc3484 tst-rfc3484-2 tst-getaddrinfo3 @@ -214,7 +214,10 @@ bug-regex22-ENV = LOCPATH=$(common-objpfx)localedata bug-regex23-ENV = LOCPATH=$(common-objpfx)localedata bug-regex25-ENV = LOCPATH=$(common-objpfx)localedata bug-regex26-ENV = LOCPATH=$(common-objpfx)localedata -tst-rxspencer-ARGS = --utf8 rxspencer/tests +tst-rxspencer-ARGS = rxspencer/tests +ifeq (y,$(OPTION_EGLIBC_LOCALE_CODE)) +tst-rxspencer-ARGS += --utf8 +endif tst-rxspencer-ENV = LOCPATH=$(common-objpfx)localedata tst-pcre-ARGS = PCRE.tests tst-boost-ARGS = BOOST.tests diff --git a/libc/posix/bug-regex6.c b/libc/posix/bug-regex6.c index 9c3f3750d..6dfa44983 100644 --- a/libc/posix/bug-regex6.c +++ b/libc/posix/bug-regex6.c @@ -23,6 +23,7 @@ #include <string.h> #include <sys/types.h> #include <regex.h> +#include <gnu/option-groups.h> int @@ -31,7 +32,12 @@ main (int argc, char *argv[]) regex_t re; regmatch_t mat[10]; int i, j, ret = 0; - const char *locales[] = { "C", "de_DE.UTF-8" }; + const char *locales[] = { + "C", +#if __OPTION_EGLIBC_LOCALE_CODE + "de_DE.UTF-8" +#endif + }; const char *string = "http://www.regex.com/pattern/matching.html#intro"; regmatch_t expect[10] = { { 0, 48 }, { 0, 5 }, { 0, 4 }, { 5, 20 }, { 7, 20 }, { 20, 42 }, diff --git a/libc/posix/fnmatch.c b/libc/posix/fnmatch.c index 4baef9e69..5401dbac5 100644 --- a/libc/posix/fnmatch.c +++ b/libc/posix/fnmatch.c @@ -31,6 +31,10 @@ #include <fnmatch.h> #include <ctype.h> +#if defined _LIBC +# include <gnu/option-groups.h> +#endif + #if HAVE_STRING_H || defined _LIBC # include <string.h> #else @@ -132,7 +136,7 @@ extern int fnmatch (const char *pattern, const char *string, int flags); # define ISWCTYPE(WC, WT) iswctype (WC, WT) # endif -# if (HAVE_MBSTATE_T && HAVE_MBSRTOWCS) || _LIBC +# if (HAVE_MBSTATE_T && HAVE_MBSRTOWCS) || (_LIBC && __OPTION_EGLIBC_LOCALE_CODE) /* In this case we are implementing the multibyte character handling. */ # define HANDLE_MULTIBYTE 1 # endif diff --git a/libc/posix/fnmatch_loop.c b/libc/posix/fnmatch_loop.c index 67c0ee4ab..5d0e8b5a3 100644 --- a/libc/posix/fnmatch_loop.c +++ b/libc/posix/fnmatch_loop.c @@ -52,10 +52,15 @@ FCT (pattern, string, string_end, no_leading_period, flags, ends) const char *collseq = (const char *) _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC); # else +# if __OPTION_EGLIBC_LOCALE_CODE const UCHAR *collseq = (const UCHAR *) _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQMB); -# endif -#endif +# define COLLSEQ_BYTE_LOOKUP(ix) (collseq[(ix)]) +# else +# define COLLSEQ_BYTE_LOOKUP(ix) (ix) +# endif /* __OPTION_EGLIBC_LOCALE_CODE */ +# endif /* WIDE_CHAR_VERSION */ +#endif /* _LIBC */ while ((c = *p++) != L('\0')) { @@ -676,8 +681,10 @@ FCT (pattern, string, string_end, no_leading_period, flags, ends) else lcollseq = __collseq_table_lookup (collseq, cold); # else - fcollseq = collseq[fn]; - lcollseq = is_seqval ? cold : collseq[(UCHAR) cold]; + fcollseq = COLLSEQ_BYTE_LOOKUP (fn); + lcollseq = (is_seqval + ? cold + : COLLSEQ_BYTE_LOOKUP ((UCHAR) cold)); # endif is_seqval = 0; @@ -853,7 +860,7 @@ FCT (pattern, string, string_end, no_leading_period, flags, ends) goto matched; } # else - hcollseq = collseq[cend]; + hcollseq = COLLSEQ_BYTE_LOOKUP (cend); # endif } diff --git a/libc/posix/regcomp.c b/libc/posix/regcomp.c index 4cf168821..0fe18ee8d 100644 --- a/libc/posix/regcomp.c +++ b/libc/posix/regcomp.c @@ -304,7 +304,7 @@ re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state, { re_dfa_t *dfa = (re_dfa_t *) bufp->buffer; int node_cnt; - int icase = (dfa->mb_cur_max == 1 && (bufp->syntax & RE_ICASE)); + int icase = (dfa_mb_cur_max (dfa) == 1 && (bufp->syntax & RE_ICASE)); for (node_cnt = 0; node_cnt < init_state->nodes.nelem; ++node_cnt) { int node = init_state->nodes.elems[node_cnt]; @@ -314,9 +314,9 @@ re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state, { re_set_fastmap (fastmap, icase, dfa->nodes[node].opr.c); #ifdef RE_ENABLE_I18N - if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1) + if ((bufp->syntax & RE_ICASE) && dfa_mb_cur_max (dfa) > 1) { - unsigned char *buf = alloca (dfa->mb_cur_max), *p; + unsigned char *buf = alloca (dfa_mb_cur_max (dfa)), *p; wchar_t wc; mbstate_t state; @@ -347,7 +347,11 @@ re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state, re_set_fastmap (fastmap, icase, ch); } } -#ifdef RE_ENABLE_I18N + + /* When OPTION_EGLIBC_LOCALE_CODE is disabled, the current + locale is always C, which has no rules and no multi-byte + characters. */ +#if defined RE_ENABLE_I18N && __OPTION_EGLIBC_LOCALE_CODE else if (type == COMPLEX_BRACKET) { int i; @@ -371,7 +375,7 @@ re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state, re_set_fastmap (fastmap, icase, i); } # else - if (dfa->mb_cur_max > 1) + if (dfa_mb_cur_max (dfa) > 1) for (i = 0; i < SBC_MAX; ++i) if (__btowc (i) == WEOF) re_set_fastmap (fastmap, icase, i); @@ -384,7 +388,7 @@ re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state, memset (&state, '\0', sizeof (state)); if (__wcrtomb (buf, cset->mbchars[i], &state) != (size_t) -1) re_set_fastmap (fastmap, icase, *(unsigned char *) buf); - if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1) + if ((bufp->syntax & RE_ICASE) && dfa_mb_cur_max (dfa) > 1) { if (__wcrtomb (buf, towlower (cset->mbchars[i]), &state) != (size_t) -1) @@ -392,7 +396,7 @@ re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state, } } } -#endif /* RE_ENABLE_I18N */ +#endif /* RE_ENABLE_I18N && __OPTION_EGLIBC_LOCALE_CODE */ else if (type == OP_PERIOD #ifdef RE_ENABLE_I18N || type == OP_UTF8_PERIOD @@ -835,11 +839,15 @@ init_dfa (re_dfa_t *dfa, size_t pat_len) dfa->mb_cur_max = MB_CUR_MAX; #ifdef _LIBC - if (dfa->mb_cur_max == 6 + if (dfa_mb_cur_max (dfa) == 6 && strcmp (_NL_CURRENT (LC_CTYPE, _NL_CTYPE_CODESET_NAME), "UTF-8") == 0) dfa->is_utf8 = 1; +# if __OPTION_EGLIBC_LOCALE_CODE dfa->map_notascii = (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_MAP_TO_NONASCII) != 0); +# else + dfa->map_notascii = 0; +# endif #else # ifdef HAVE_LANGINFO_CODESET codeset_name = nl_langinfo (CODESET); @@ -865,7 +873,7 @@ init_dfa (re_dfa_t *dfa, size_t pat_len) #endif #ifdef RE_ENABLE_I18N - if (dfa->mb_cur_max > 1) + if (dfa_mb_cur_max (dfa) > 1) { if (dfa->is_utf8) dfa->sb_char = (re_bitset_ptr_t) utf8_sb_map; @@ -1726,7 +1734,7 @@ peek_token (re_token_t *token, re_string_t *input, reg_syntax_t syntax) token->word_char = 0; #ifdef RE_ENABLE_I18N token->mb_partial = 0; - if (input->mb_cur_max > 1 && + if (string_mb_cur_max (input) > 1 && !re_string_first_byte (input, re_string_cur_idx (input))) { token->type = CHARACTER; @@ -1747,7 +1755,7 @@ peek_token (re_token_t *token, re_string_t *input, reg_syntax_t syntax) token->opr.c = c2; token->type = CHARACTER; #ifdef RE_ENABLE_I18N - if (input->mb_cur_max > 1) + if (string_mb_cur_max (input) > 1) { wint_t wc = re_string_wchar_at (input, re_string_cur_idx (input) + 1); @@ -1861,7 +1869,7 @@ peek_token (re_token_t *token, re_string_t *input, reg_syntax_t syntax) token->type = CHARACTER; #ifdef RE_ENABLE_I18N - if (input->mb_cur_max > 1) + if (string_mb_cur_max (input) > 1) { wint_t wc = re_string_wchar_at (input, re_string_cur_idx (input)); token->word_char = IS_WIDE_WORD_CHAR (wc) != 0; @@ -1961,7 +1969,7 @@ peek_token_bracket (re_token_t *token, re_string_t *input, reg_syntax_t syntax) token->opr.c = c; #ifdef RE_ENABLE_I18N - if (input->mb_cur_max > 1 && + if (string_mb_cur_max (input) > 1 && !re_string_first_byte (input, re_string_cur_idx (input))) { token->type = CHARACTER; @@ -2175,7 +2183,7 @@ parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token, return NULL; } #ifdef RE_ENABLE_I18N - if (dfa->mb_cur_max > 1) + if (dfa_mb_cur_max (dfa) > 1) { while (!re_string_eoi (regexp) && !re_string_first_byte (regexp, re_string_cur_idx (regexp))) @@ -2313,7 +2321,7 @@ parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token, *err = REG_ESPACE; return NULL; } - if (dfa->mb_cur_max > 1) + if (dfa_mb_cur_max (dfa) > 1) dfa->has_mb_node = 1; break; case OP_WORD: @@ -2606,7 +2614,7 @@ build_range_exp (bitset_t sbcset, bracket_elem_t *start_elem, However, for !_LIBC we have no collation elements: if the character set is single byte, the single byte character set that we build below suffices. parse_bracket_exp passes - no MBCSET if dfa->mb_cur_max == 1. */ + no MBCSET if dfa_mb_cur_max (dfa) == 1. */ if (mbcset) { /* Check the space of the arrays. */ @@ -2702,7 +2710,13 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, reg_syntax_t syntax, reg_errcode_t *err) { #ifdef _LIBC +#if __OPTION_EGLIBC_LOCALE_CODE const unsigned char *collseqmb; +# define COLLSEQMB_LOOKUP(ix) (collseqmb[(ix)]) +#else +# define COLLSEQMB_LOOKUP(ix) (ix) +#endif + const char *collseqwc; uint32_t nrules; int32_t table_size; @@ -2762,18 +2776,20 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, if (MB_CUR_MAX == 1) */ if (nrules == 0) - return collseqmb[br_elem->opr.ch]; + return COLLSEQMB_LOOKUP (br_elem->opr.ch); else { wint_t wc = __btowc (br_elem->opr.ch); return __collseq_table_lookup (collseqwc, wc); } } +#if __OPTION_EGLIBC_LOCALE_CODE else if (br_elem->type == MB_CHAR) { if (nrules != 0) return __collseq_table_lookup (collseqwc, br_elem->opr.wch); } +#endif else if (br_elem->type == COLL_SYM) { size_t sym_name_len = strlen ((char *) br_elem->opr.name); @@ -2804,11 +2820,11 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, { /* No valid character. Match it as a single byte character. */ - return collseqmb[br_elem->opr.name[0]]; + return COLLSEQMB_LOOKUP (br_elem->opr.name[0]); } } else if (sym_name_len == 1) - return collseqmb[br_elem->opr.name[0]]; + return COLLSEQMB_LOOKUP (br_elem->opr.name[0]); } return UINT_MAX; } @@ -2851,7 +2867,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, However, if we have no collation elements, and the character set is single byte, the single byte character set that we build below suffices. */ - if (nrules > 0 || dfa->mb_cur_max > 1) + if (nrules > 0 || dfa_mb_cur_max (dfa) > 1) { /* Check the space of the arrays. */ if (BE (*range_alloc == mbcset->nranges, 0)) @@ -2888,7 +2904,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, if (MB_CUR_MAX == 1) */ if (nrules == 0) - ch_collseq = collseqmb[ch]; + ch_collseq = COLLSEQMB_LOOKUP (ch); else ch_collseq = __collseq_table_lookup (collseqwc, __btowc (ch)); if (start_collseq <= ch_collseq && ch_collseq <= end_collseq) @@ -2969,7 +2985,10 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, re_bitset_ptr_t sbcset; #ifdef RE_ENABLE_I18N re_charset_t *mbcset; - int coll_sym_alloc = 0, range_alloc = 0, mbchar_alloc = 0; + int coll_sym_alloc = 0, range_alloc = 0; +#if __OPTION_EGLIBC_LOCALE_CODE + int mbchar_alloc = 0; +#endif int equiv_class_alloc = 0, char_class_alloc = 0; #endif /* not RE_ENABLE_I18N */ int non_match = 0; @@ -2977,9 +2996,15 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, int token_len; int first_round = 1; #ifdef _LIBC +#if __OPTION_EGLIBC_LOCALE_CODE collseqmb = (const unsigned char *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB); nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); +#else + /* This is true when OPTION_EGLIBC_LOCALE_CODE is disabled, but the + compiler can't figure that out. */ + nrules = 0; +#endif if (nrules) { /* @@ -3103,7 +3128,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, #else # ifdef RE_ENABLE_I18N *err = build_range_exp (sbcset, - dfa->mb_cur_max > 1 ? mbcset : NULL, + dfa_mb_cur_max (dfa) > 1 ? mbcset : NULL, &range_alloc, &start_elem, &end_elem); # else *err = build_range_exp (sbcset, &start_elem, &end_elem); @@ -3119,7 +3144,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, case SB_CHAR: bitset_set (sbcset, start_elem.opr.ch); break; -#ifdef RE_ENABLE_I18N +#if defined RE_ENABLE_I18N && __OPTION_EGLIBC_LOCALE_CODE case MB_CHAR: /* Check whether the array has enough space. */ if (BE (mbchar_alloc == mbcset->nmbchars, 0)) @@ -3137,7 +3162,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, } mbcset->mbchars[mbcset->nmbchars++] = start_elem.opr.wch; break; -#endif /* RE_ENABLE_I18N */ +#endif /* RE_ENABLE_I18N && __OPTION_EGLIBC_LOCALE_CODE */ case EQUIV_CLASS: *err = build_equiv_class (sbcset, #ifdef RE_ENABLE_I18N @@ -3187,11 +3212,11 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, #ifdef RE_ENABLE_I18N /* Ensure only single byte characters are set. */ - if (dfa->mb_cur_max > 1) + if (dfa_mb_cur_max (dfa) > 1) bitset_mask (sbcset, dfa->sb_char); if (mbcset->nmbchars || mbcset->ncoll_syms || mbcset->nequiv_classes - || mbcset->nranges || (dfa->mb_cur_max > 1 && (mbcset->nchar_classes + || mbcset->nranges || (dfa_mb_cur_max (dfa) > 1 && (mbcset->nchar_classes || mbcset->non_match))) { bin_tree_t *mbc_tree; @@ -3260,7 +3285,7 @@ parse_bracket_element (bracket_elem_t *elem, re_string_t *regexp, re_token_t *token, int token_len, re_dfa_t *dfa, reg_syntax_t syntax, int accept_hyphen) { -#ifdef RE_ENABLE_I18N +#if defined RE_ENABLE_I18N && __OPTION_EGLIBC_LOCALE_CODE int cur_char_size; cur_char_size = re_string_char_size_at (regexp, re_string_cur_idx (regexp)); if (cur_char_size > 1) @@ -3270,7 +3295,7 @@ parse_bracket_element (bracket_elem_t *elem, re_string_t *regexp, re_string_skip_bytes (regexp, cur_char_size); return REG_NOERROR; } -#endif /* RE_ENABLE_I18N */ +#endif /* RE_ENABLE_I18N && __OPTION_EGLIBC_LOCALE_CODE */ re_string_skip_bytes (regexp, token_len); /* Skip a token. */ if (token->type == OP_OPEN_COLL_ELEM || token->type == OP_OPEN_CHAR_CLASS || token->type == OP_OPEN_EQUIV_CLASS) @@ -3350,7 +3375,9 @@ build_equiv_class (bitset_t sbcset, re_charset_t *mbcset, build_equiv_class (bitset_t sbcset, const unsigned char *name) #endif /* not RE_ENABLE_I18N */ { -#ifdef _LIBC + /* When __OPTION_EGLIBC_LOCALE_CODE is disabled, only the C locale + is supported; it has no collation rules. */ +#if defined _LIBC && __OPTION_EGLIBC_LOCALE_CODE uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); if (nrules != 0) { @@ -3423,7 +3450,7 @@ build_equiv_class (bitset_t sbcset, const unsigned char *name) mbcset->equiv_classes[mbcset->nequiv_classes++] = idx1; } else -#endif /* _LIBC */ +#endif /* _LIBC && __OPTION_EGLIBC_LOCALE_CODE */ { if (BE (strlen ((const char *) name) != 1, 0)) return REG_ECOLLATE; @@ -3457,7 +3484,7 @@ build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset, && (strcmp (name, "upper") == 0 || strcmp (name, "lower") == 0)) name = "alpha"; -#ifdef RE_ENABLE_I18N +#if defined RE_ENABLE_I18N && __OPTION_EGLIBC_LOCALE_CODE /* Check the space of the arrays. */ if (BE (*char_class_alloc == mbcset->nchar_classes, 0)) { @@ -3473,7 +3500,7 @@ build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset, *char_class_alloc = new_char_class_alloc; } mbcset->char_classes[mbcset->nchar_classes++] = __wctype (name); -#endif /* RE_ENABLE_I18N */ +#endif /* RE_ENABLE_I18N && __OPTION_EGLIBC_LOCALE_CODE */ #define BUILD_CHARCLASS_LOOP(ctype_func) \ do { \ @@ -3584,7 +3611,7 @@ build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE trans, #ifdef RE_ENABLE_I18N /* Ensure only single byte characters are set. */ - if (dfa->mb_cur_max > 1) + if (dfa_mb_cur_max (dfa) > 1) bitset_mask (sbcset, dfa->sb_char); #endif @@ -3596,7 +3623,7 @@ build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE trans, goto build_word_op_espace; #ifdef RE_ENABLE_I18N - if (dfa->mb_cur_max > 1) + if (dfa_mb_cur_max (dfa) > 1) { bin_tree_t *mbc_tree; /* Build a tree for complex bracket. */ diff --git a/libc/posix/regex_internal.c b/libc/posix/regex_internal.c index 66154e0ce..08c8cca24 100644 --- a/libc/posix/regex_internal.c +++ b/libc/posix/regex_internal.c @@ -44,8 +44,8 @@ re_string_allocate (re_string_t *pstr, const char *str, int len, int init_len, int init_buf_len; /* Ensure at least one character fits into the buffers. */ - if (init_len < dfa->mb_cur_max) - init_len = dfa->mb_cur_max; + if (init_len < dfa_mb_cur_max (dfa)) + init_len = dfa_mb_cur_max (dfa); init_buf_len = (len + 1 < init_len) ? len + 1: init_len; re_string_construct_common (str, len, pstr, trans, icase, dfa); @@ -56,7 +56,7 @@ re_string_allocate (re_string_t *pstr, const char *str, int len, int init_len, pstr->word_char = dfa->word_char; pstr->word_ops_used = dfa->word_ops_used; pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str; - pstr->valid_len = (pstr->mbs_allocated || dfa->mb_cur_max > 1) ? 0 : len; + pstr->valid_len = (pstr->mbs_allocated || dfa_mb_cur_max (dfa) > 1) ? 0 : len; pstr->valid_raw_len = pstr->valid_len; return REG_NOERROR; } @@ -83,7 +83,7 @@ re_string_construct (re_string_t *pstr, const char *str, int len, if (icase) { #ifdef RE_ENABLE_I18N - if (dfa->mb_cur_max > 1) + if (dfa_mb_cur_max (dfa) > 1) { while (1) { @@ -92,7 +92,7 @@ re_string_construct (re_string_t *pstr, const char *str, int len, return ret; if (pstr->valid_raw_len >= len) break; - if (pstr->bufs_len > pstr->valid_len + dfa->mb_cur_max) + if (pstr->bufs_len > pstr->valid_len + dfa_mb_cur_max (dfa)) break; ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2); if (BE (ret != REG_NOERROR, 0)) @@ -106,7 +106,7 @@ re_string_construct (re_string_t *pstr, const char *str, int len, else { #ifdef RE_ENABLE_I18N - if (dfa->mb_cur_max > 1) + if (dfa_mb_cur_max (dfa) > 1) build_wcs_buffer (pstr); else #endif /* RE_ENABLE_I18N */ @@ -131,7 +131,7 @@ internal_function re_string_realloc_buffers (re_string_t *pstr, int new_buf_len) { #ifdef RE_ENABLE_I18N - if (pstr->mb_cur_max > 1) + if (string_mb_cur_max (pstr) > 1) { wint_t *new_wcs = re_realloc (pstr->wcs, wint_t, new_buf_len); if (BE (new_wcs == NULL, 0)) @@ -171,7 +171,7 @@ re_string_construct_common (const char *str, int len, re_string_t *pstr, pstr->trans = trans; pstr->icase = icase ? 1 : 0; pstr->mbs_allocated = (trans != NULL || icase); - pstr->mb_cur_max = dfa->mb_cur_max; + pstr->mb_cur_max = dfa_mb_cur_max (dfa); pstr->is_utf8 = dfa->is_utf8; pstr->map_notascii = dfa->map_notascii; pstr->stop = pstr->len; @@ -197,7 +197,7 @@ build_wcs_buffer (re_string_t *pstr) { #ifdef _LIBC unsigned char buf[MB_LEN_MAX]; - assert (MB_LEN_MAX >= pstr->mb_cur_max); + assert (MB_LEN_MAX >= string_mb_cur_max (pstr)); #else unsigned char buf[64]; #endif @@ -220,7 +220,7 @@ build_wcs_buffer (re_string_t *pstr) { int i, ch; - for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i) + for (i = 0; i < string_mb_cur_max (pstr) && i < remain_len; ++i) { ch = pstr->raw_mbs [pstr->raw_mbs_idx + byte_idx + i]; buf[i] = pstr->mbs[byte_idx + i] = pstr->trans[ch]; @@ -268,7 +268,7 @@ build_wcs_upper_buffer (re_string_t *pstr) size_t mbclen; #ifdef _LIBC char buf[MB_LEN_MAX]; - assert (MB_LEN_MAX >= pstr->mb_cur_max); + assert (MB_LEN_MAX >= string_mb_cur_max (pstr)); #else char buf[64]; #endif @@ -360,7 +360,7 @@ build_wcs_upper_buffer (re_string_t *pstr) { int i, ch; - for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i) + for (i = 0; i < string_mb_cur_max (pstr) && i < remain_len; ++i) { ch = pstr->raw_mbs [pstr->raw_mbs_idx + src_idx + i]; buf[i] = pstr->trans[ch]; @@ -555,8 +555,9 @@ re_string_translate_buffer (re_string_t *pstr) } /* This function re-construct the buffers. - Concretely, convert to wide character in case of pstr->mb_cur_max > 1, - convert to upper case in case of REG_ICASE, apply translation. */ + Concretely, convert to wide character in case of + string_mb_cur_max (pstr) > 1, convert to upper case in case of + REG_ICASE, apply translation. */ static reg_errcode_t internal_function @@ -567,7 +568,7 @@ re_string_reconstruct (re_string_t *pstr, int idx, int eflags) { /* Reset buffer. */ #ifdef RE_ENABLE_I18N - if (pstr->mb_cur_max > 1) + if (string_mb_cur_max (pstr) > 1) memset (&pstr->cur_state, '\0', sizeof (mbstate_t)); #endif /* RE_ENABLE_I18N */ pstr->len = pstr->raw_len; @@ -658,7 +659,7 @@ re_string_reconstruct (re_string_t *pstr, int idx, int eflags) pstr->tip_context = re_string_context_at (pstr, offset - 1, eflags); #ifdef RE_ENABLE_I18N - if (pstr->mb_cur_max > 1) + if (string_mb_cur_max (pstr) > 1) memmove (pstr->wcs, pstr->wcs + offset, (pstr->valid_len - offset) * sizeof (wint_t)); #endif /* RE_ENABLE_I18N */ @@ -687,7 +688,7 @@ re_string_reconstruct (re_string_t *pstr, int idx, int eflags) #endif pstr->valid_len = 0; #ifdef RE_ENABLE_I18N - if (pstr->mb_cur_max > 1) + if (string_mb_cur_max (pstr) > 1) { int wcs_idx; wint_t wc = WEOF; @@ -699,7 +700,7 @@ re_string_reconstruct (re_string_t *pstr, int idx, int eflags) /* Special case UTF-8. Multi-byte chars start with any byte other than 0x80 - 0xbf. */ raw = pstr->raw_mbs + pstr->raw_mbs_idx; - end = raw + (offset - pstr->mb_cur_max); + end = raw + (offset - string_mb_cur_max (pstr)); if (end < pstr->raw_mbs) end = pstr->raw_mbs; p = raw + offset - 1; @@ -791,7 +792,7 @@ re_string_reconstruct (re_string_t *pstr, int idx, int eflags) /* Then build the buffers. */ #ifdef RE_ENABLE_I18N - if (pstr->mb_cur_max > 1) + if (string_mb_cur_max (pstr) > 1) { if (pstr->icase) { @@ -829,7 +830,7 @@ re_string_peek_byte_case (const re_string_t *pstr, int idx) return re_string_peek_byte (pstr, idx); #ifdef RE_ENABLE_I18N - if (pstr->mb_cur_max > 1 + if (string_mb_cur_max (pstr) > 1 && ! re_string_is_single_byte_char (pstr, pstr->cur_idx + idx)) return re_string_peek_byte (pstr, idx); #endif @@ -918,7 +919,7 @@ re_string_context_at (const re_string_t *input, int idx, int eflags) return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF : CONTEXT_NEWLINE | CONTEXT_ENDBUF); #ifdef RE_ENABLE_I18N - if (input->mb_cur_max > 1) + if (string_mb_cur_max (input) > 1) { wint_t wc; int wc_idx = idx; @@ -1429,7 +1430,7 @@ re_dfa_add_node (re_dfa_t *dfa, re_token_t token) dfa->nodes[dfa->nodes_len].constraint = 0; #ifdef RE_ENABLE_I18N dfa->nodes[dfa->nodes_len].accept_mb = - (type == OP_PERIOD && dfa->mb_cur_max > 1) || type == COMPLEX_BRACKET; + (type == OP_PERIOD && dfa_mb_cur_max (dfa) > 1) || type == COMPLEX_BRACKET; #endif dfa->nexts[dfa->nodes_len] = -1; re_node_set_init_empty (dfa->edests + dfa->nodes_len); diff --git a/libc/posix/regex_internal.h b/libc/posix/regex_internal.h index 24d0fbba7..bbd690a17 100644 --- a/libc/posix/regex_internal.h +++ b/libc/posix/regex_internal.h @@ -27,6 +27,10 @@ #include <stdlib.h> #include <string.h> +#if defined _LIBC +# include <gnu/option-groups.h> +#endif + #if defined HAVE_LANGINFO_H || defined HAVE_LANGINFO_CODESET || defined _LIBC # include <langinfo.h> #endif @@ -373,6 +377,13 @@ struct re_string_t }; typedef struct re_string_t re_string_t; +/* When OPTION_EGLIBC_LOCALE_CODE is disabled, this is always 1; + help the compiler make use of that fact. */ +#if __OPTION_EGLIBC_LOCALE_CODE +# define string_mb_cur_max(str) ((str)->mb_cur_max + 0) +#else +# define string_mb_cur_max(str) (1) +#endif struct re_dfa_t; typedef struct re_dfa_t re_dfa_t; @@ -657,6 +668,14 @@ struct re_dfa_t __libc_lock_define (, lock) }; +/* When OPTION_EGLIBC_LOCALE_CODE is disabled, this is always 1; + help the compiler make use of that fact. */ +#if __OPTION_EGLIBC_LOCALE_CODE +# define dfa_mb_cur_max(dfa) ((dfa)->mb_cur_max + 0) +#else +# define dfa_mb_cur_max(dfa) (1) +#endif + #define re_node_set_init_empty(set) memset (set, '\0', sizeof (re_node_set)) #define re_node_set_remove(set,id) \ (re_node_set_remove_at (set, re_node_set_contains (set, id) - 1)) @@ -717,7 +736,7 @@ internal_function __attribute ((pure)) re_string_char_size_at (const re_string_t *pstr, int idx) { int byte_idx; - if (pstr->mb_cur_max == 1) + if (string_mb_cur_max (pstr) == 1) return 1; for (byte_idx = 1; idx + byte_idx < pstr->valid_len; ++byte_idx) if (pstr->wcs[idx + byte_idx] != WEOF) @@ -729,7 +748,7 @@ static inline wint_t internal_function __attribute ((pure)) re_string_wchar_at (const re_string_t *pstr, int idx) { - if (pstr->mb_cur_max == 1) + if (string_mb_cur_max (pstr) == 1) return (wint_t) pstr->mbs[idx]; return (wint_t) pstr->wcs[idx]; } diff --git a/libc/posix/regexec.c b/libc/posix/regexec.c index 135efe744..b635e98eb 100644 --- a/libc/posix/regexec.c +++ b/libc/posix/regexec.c @@ -185,11 +185,11 @@ static int build_trtable (const re_dfa_t *dfa, static int check_node_accept_bytes (const re_dfa_t *dfa, int node_idx, const re_string_t *input, int idx) internal_function; -# ifdef _LIBC +# if defined _LIBC && __OPTION_EGLIBC_LOCALE_CODE static unsigned int find_collation_sequence_value (const unsigned char *mbs, size_t name_len) internal_function; -# endif /* _LIBC */ +# endif /* _LIBC && __OPTION_EGLIBC_LOCALE_CODE */ #endif /* RE_ENABLE_I18N */ static int group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state, @@ -711,7 +711,7 @@ re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch, incr = (range < 0) ? -1 : 1; left_lim = (range < 0) ? start + range : start; right_lim = (range < 0) ? start : start + range; - sb = dfa->mb_cur_max == 1; + sb = dfa_mb_cur_max (dfa) == 1; match_kind = (fastmap ? ((sb || !(preg->syntax & RE_ICASE || t) ? 4 : 0) @@ -3405,7 +3405,7 @@ out_free: if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0)) goto out_free; - if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > 1) + if (dest_states[i] != dest_states_word[i] && dfa_mb_cur_max (dfa) > 1) need_word_trtable = 1; dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows, @@ -3547,7 +3547,7 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state, else if (type == OP_PERIOD) { #ifdef RE_ENABLE_I18N - if (dfa->mb_cur_max > 1) + if (dfa_mb_cur_max (dfa) > 1) bitset_merge (accepts, dfa->sb_char); else #endif @@ -3598,7 +3598,7 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state, continue; } #ifdef RE_ENABLE_I18N - if (dfa->mb_cur_max > 1) + if (dfa_mb_cur_max (dfa) > 1) for (j = 0; j < BITSET_WORDS; ++j) any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j])); else @@ -3617,7 +3617,7 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state, continue; } #ifdef RE_ENABLE_I18N - if (dfa->mb_cur_max > 1) + if (dfa_mb_cur_max (dfa) > 1) for (j = 0; j < BITSET_WORDS; ++j) any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j])); else @@ -3789,12 +3789,6 @@ check_node_accept_bytes (const re_dfa_t *dfa, int node_idx, if (node->type == COMPLEX_BRACKET) { const re_charset_t *cset = node->opr.mbcset; -# ifdef _LIBC - const unsigned char *pin - = ((const unsigned char *) re_string_get_buffer (input) + str_idx); - int j; - uint32_t nrules; -# endif /* _LIBC */ int match_len = 0; wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars) ? re_string_wchar_at (input, str_idx) : 0); @@ -3806,6 +3800,7 @@ check_node_accept_bytes (const re_dfa_t *dfa, int node_idx, match_len = char_len; goto check_node_accept_bytes_match; } +#if __OPTION_EGLIBC_LOCALE_CODE /* match with character_class? */ for (i = 0; i < cset->nchar_classes; ++i) { @@ -3816,8 +3811,16 @@ check_node_accept_bytes (const re_dfa_t *dfa, int node_idx, goto check_node_accept_bytes_match; } } +#endif + + /* When __OPTION_EGLIBC_LOCALE_CODE is disabled, only the C + locale is supported; it has no collation rules. */ +# if defined _LIBC && __OPTION_EGLIBC_LOCALE_CODE + const unsigned char *pin + = ((const unsigned char *) re_string_get_buffer (input) + str_idx); + int j; + uint32_t nrules; -# ifdef _LIBC nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); if (nrules != 0) { @@ -3910,8 +3913,12 @@ check_node_accept_bytes (const re_dfa_t *dfa, int node_idx, } } else -# endif /* _LIBC */ +# endif /* _LIBC && __OPTION_EGLIBC_LOCALE_CODE */ { + /* In the _LIBC version, if OPTION_EGLIBC_LOCALE_CODE is + disabled, there can be no multibyte range endpoints, and + cset->nranges is always zero. */ +#if __OPTION_EGLIBC_LOCALE_CODE /* match with range expression? */ #if __GNUC__ >= 2 wchar_t cmp_buf[] = {L'\0', L'\0', wc, L'\0', L'\0', L'\0'}; @@ -3930,6 +3937,7 @@ check_node_accept_bytes (const re_dfa_t *dfa, int node_idx, goto check_node_accept_bytes_match; } } +#endif /* __OPTION_EGLIBC_LOCALE_CODE */ } check_node_accept_bytes_match: if (!cset->non_match) @@ -3945,7 +3953,7 @@ check_node_accept_bytes (const re_dfa_t *dfa, int node_idx, return 0; } -# ifdef _LIBC +# if defined _LIBC && __OPTION_EGLIBC_LOCALE_CODE static unsigned int internal_function find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len) @@ -4003,7 +4011,7 @@ find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len) return UINT_MAX; } } -# endif /* _LIBC */ +# endif /* _LIBC && __OPTION_EGLIBC_LOCALE_CODE */ #endif /* RE_ENABLE_I18N */ /* Check whether the node accepts the byte which is IDX-th @@ -4088,7 +4096,7 @@ extend_buffers (re_match_context_t *mctx) if (pstr->icase) { #ifdef RE_ENABLE_I18N - if (pstr->mb_cur_max > 1) + if (string_mb_cur_max (pstr) > 1) { ret = build_wcs_upper_buffer (pstr); if (BE (ret != REG_NOERROR, 0)) @@ -4101,7 +4109,7 @@ extend_buffers (re_match_context_t *mctx) else { #ifdef RE_ENABLE_I18N - if (pstr->mb_cur_max > 1) + if (string_mb_cur_max (pstr) > 1) build_wcs_buffer (pstr); else #endif /* RE_ENABLE_I18N */ diff --git a/libc/stdio-common/Makefile b/libc/stdio-common/Makefile index f068269d7..acfabac16 100644 --- a/libc/stdio-common/Makefile +++ b/libc/stdio-common/Makefile @@ -56,12 +56,12 @@ tests := tstscanf test_rdwr test-popen tstgetln test-fseek \ scanf1 scanf2 scanf3 scanf4 scanf5 scanf7 scanf8 scanf9 scanf10 \ scanf11 scanf12 tst-tmpnam tst-cookie tst-obprintf \ tst-fseek tst-fmemopen tst-gets \ - tst-sprintf tst-rndseek tst-fdopen tst-fphex bug14 \ + tst-sprintf tst-rndseek tst-fdopen tst-fphex \ tst-popen tst-unlockedio tst-fmemopen2 tst-put-error tst-fgets \ tst-fwrite bug16 bug17 tst-swscanf tst-sprintf2 bug18 bug18a \ - bug19 tst-popen2 scanf13 scanf14 scanf15 + bug19 tst-popen2 scanf14 scanf15 tests-$(OPTION_EGLIBC_LOCALE_CODE) \ - += tst-sscanf tst-swprintf bug15 test-vfprintf + += tst-sscanf tst-swprintf bug15 test-vfprintf bug14 scanf13 tests-$(OPTION_POSIX_WIDE_CHAR_DEVICE_IO) \ += tst-perror bug19a bug20 diff --git a/libc/stdio-common/_i18n_number.h b/libc/stdio-common/_i18n_number.h index 04d6619b4..bd6adf80d 100644 --- a/libc/stdio-common/_i18n_number.h +++ b/libc/stdio-common/_i18n_number.h @@ -19,10 +19,13 @@ #include <wchar.h> #include <wctype.h> +#include <gnu/option-groups.h> #include "../locale/outdigits.h" #include "../locale/outdigitswc.h" +#if __OPTION_EGLIBC_LOCALE_CODE + static CHAR_T * _i18n_number_rewrite (CHAR_T *w, CHAR_T *rear_ptr) { @@ -93,3 +96,13 @@ _i18n_number_rewrite (CHAR_T *w, CHAR_T *rear_ptr) return w; } + +#else + +static CHAR_T * +_i18n_number_rewrite (CHAR_T *w, CHAR_T *rear_ptr) +{ + return w; +} + +#endif diff --git a/libc/stdio-common/vfprintf.c b/libc/stdio-common/vfprintf.c index d1dc1aaf5..22f92ce77 100644 --- a/libc/stdio-common/vfprintf.c +++ b/libc/stdio-common/vfprintf.c @@ -31,6 +31,7 @@ #include "_itoa.h" #include <locale/localeinfo.h> #include <stdio.h> +#include <gnu/option-groups.h> /* This code is shared between the standard stdio implementation found in GNU C library and the libio implementation originally found in @@ -106,6 +107,12 @@ # define EOF WEOF #endif +#if __OPTION_EGLIBC_LOCALE_CODE +# define LOCALE_SUPPORT (1) +#else +# define LOCALE_SUPPORT (0) +#endif + #include "_i18n_number.h" /* Include the shared code for parsing the format string. */ @@ -1151,7 +1158,8 @@ vfprintf (FILE *s, const CHAR_T *format, va_list ap) /* Search for the end of the string, but don't search past \ the length (in bytes) specified by the precision. Also \ don't use incomplete characters. */ \ - if (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_MB_CUR_MAX) == 1) \ + if (! LOCALE_SUPPORT \ + ||_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_MB_CUR_MAX) == 1) \ len = __strnlen (string, prec); \ else \ { \ @@ -1381,7 +1389,9 @@ vfprintf (FILE *s, const CHAR_T *format, va_list ap) LABEL (flag_quote): group = 1; - if (grouping == (const char *) -1) + if (! LOCALE_SUPPORT) + grouping = NULL; + else if (grouping == (const char *) -1) { #ifdef COMPILE_WPRINTF thousands_sep = _NL_CURRENT_WORD (LC_NUMERIC, @@ -1622,7 +1632,9 @@ do_positional: free (workstart); workstart = NULL; - if (grouping == (const char *) -1) + if (! LOCALE_SUPPORT) + grouping = NULL; + else if (grouping == (const char *) -1) { #ifdef COMPILE_WPRINTF thousands_sep = _NL_CURRENT_WORD (LC_NUMERIC, diff --git a/libc/stdio-common/vfscanf.c b/libc/stdio-common/vfscanf.c index f550109a9..e89e23d62 100644 --- a/libc/stdio-common/vfscanf.c +++ b/libc/stdio-common/vfscanf.c @@ -29,6 +29,7 @@ #include <wctype.h> #include <bits/libc-lock.h> #include <locale/localeinfo.h> +#include <gnu/option-groups.h> #ifdef __GNUC__ # define HAVE_LONGLONG @@ -293,24 +294,35 @@ _IO_vfscanf_internal (_IO_FILE *s, const char *format, _IO_va_list argptr, ARGCHECK (s, format); { -#ifndef COMPILE_WSCANF +#if __OPTION_EGLIBC_LOCALE_CODE && !defined (COMPILE_WSCANF) struct locale_data *const curnumeric = loc->__locales[LC_NUMERIC]; #endif +#if __OPTION_EGLIBC_LOCALE_CODE /* Figure out the decimal point character. */ -#ifdef COMPILE_WSCANF +# ifdef COMPILE_WSCANF decimal = _NL_CURRENT_WORD (LC_NUMERIC, _NL_NUMERIC_DECIMAL_POINT_WC); -#else +# else decimal = curnumeric->values[_NL_ITEM_INDEX (DECIMAL_POINT)].string; -#endif +# endif /* Figure out the thousands separator character. */ -#ifdef COMPILE_WSCANF +# ifdef COMPILE_WSCANF thousands = _NL_CURRENT_WORD (LC_NUMERIC, _NL_NUMERIC_THOUSANDS_SEP_WC); -#else +# else thousands = curnumeric->values[_NL_ITEM_INDEX (THOUSANDS_SEP)].string; if (*thousands == '\0') thousands = NULL; -#endif +# endif +#else /* if ! __OPTION_EGLIBC_LOCALE_CODE */ + /* Hard-code values from the C locale. */ +# ifdef COMPILE_WSCANF + decimal = L'.'; + thousands = L'\0'; +# else + decimal = "."; + thousands = NULL; +# endif +#endif /* __OPTION_EGLIBC_LOCALE_CODE */ } /* Lock the stream. */ @@ -1365,10 +1377,17 @@ _IO_vfscanf_internal (_IO_FILE *s, const char *format, _IO_va_list argptr, const char *mbdigits[10]; const char *mbdigits_extended[10]; #endif +#if __OPTION_EGLIBC_LOCALE_CODE /* "to_inpunct" is a map from ASCII digits to their equivalent in locale. This is defined for locales which use an extra digits set. */ wctrans_t map = __wctrans ("to_inpunct"); +#else + /* This will always be the case when + OPTION_EGLIBC_LOCALE_CODE is disabled, but the + compiler can't figure that out. */ + wctrans_t map = NULL; +#endif int n; from_level = 0; @@ -2026,6 +2045,7 @@ _IO_vfscanf_internal (_IO_FILE *s, const char *format, _IO_va_list argptr, --width; } +#if __OPTION_EGLIBC_LOCALE_CODE wctrans_t map; if (__builtin_expect ((flags & I18N) != 0, 0) /* Hexadecimal floats make no sense, fixing localized @@ -2242,6 +2262,7 @@ _IO_vfscanf_internal (_IO_FILE *s, const char *format, _IO_va_list argptr, ; #endif } +#endif /* __OPTION_EGLIBC_LOCALE_CODE */ /* Have we read any character? If we try to read a number in hexadecimal notation and we have read only the `0x' diff --git a/libc/stdlib/Makefile b/libc/stdlib/Makefile index 28097c1d3..47c5a9429 100644 --- a/libc/stdlib/Makefile +++ b/libc/stdlib/Makefile @@ -73,11 +73,11 @@ tests := tst-strtol tst-strtod testmb testrand testsort testdiv \ test-canon test-canon2 tst-strtoll tst-environ \ tst-xpg-basename tst-random tst-random2 tst-bsearch \ tst-limits tst-rand48 bug-strtod tst-setcontext \ - test-a64l tst-qsort tst-system testmb2 bug-strtod2 \ + test-a64l tst-qsort tst-system bug-strtod2 \ tst-atof1 tst-atof2 tst-strtod2 tst-rand48-2 \ - tst-makecontext tst-strtod4 tst-strtod5 tst-qsort2 + tst-makecontext tst-qsort2 tests-$(OPTION_EGLIBC_LOCALE_CODE) \ - += tst-strtod3 + += tst-strtod3 tst-strtod4 tst-strtod5 testmb2 include ../Makeconfig ifeq ($(build-shared),yes) diff --git a/libc/stdlib/strtod_l.c b/libc/stdlib/strtod_l.c index 86b408e1f..4f43f3853 100644 --- a/libc/stdlib/strtod_l.c +++ b/libc/stdlib/strtod_l.c @@ -19,6 +19,7 @@ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. */ +#include <gnu/option-groups.h> #include <xlocale.h> extern double ____strtod_l_internal (const char *, char **, int, __locale_t); @@ -480,6 +481,7 @@ ____STRTOF_INTERNAL (nptr, endptr, group, loc) /* Used in several places. */ int cnt; +#if __OPTION_EGLIBC_LOCALE_CODE struct locale_data *current = loc->__locales[LC_NUMERIC]; if (__builtin_expect (group, 0)) @@ -518,6 +520,17 @@ ____STRTOF_INTERNAL (nptr, endptr, group, loc) decimal_len = strlen (decimal); assert (decimal_len > 0); #endif +#else /* if ! __OPTION_EGLIBC_LOCALE_CODE */ + /* Hard-code values from the 'C' locale. */ + grouping = NULL; +#ifdef USE_WIDE_CHAR + decimal = L'.'; +# define decimal_len 1 +#else + decimal = "."; + decimal_len = 1; +#endif +#endif /* __OPTION_EGLIBC_LOCALE_CODE */ /* Prepare number representation. */ exponent = 0; diff --git a/libc/stdlib/tst-strtod.c b/libc/stdlib/tst-strtod.c index 628e40ca3..e97a05701 100644 --- a/libc/stdlib/tst-strtod.c +++ b/libc/stdlib/tst-strtod.c @@ -24,6 +24,7 @@ #include <errno.h> #include <string.h> #include <math.h> +#include <gnu/option-groups.h> struct ltest { @@ -174,7 +175,9 @@ main (int argc, char ** argv) status |= long_dbl (); +#if __OPTION_EGLIBC_LOCALE_CODE status |= locale_test (); +#endif return status ? EXIT_FAILURE : EXIT_SUCCESS; } @@ -217,6 +220,7 @@ long_dbl (void) return 0; } +#if __OPTION_EGLIBC_LOCALE_CODE /* Perform a few tests in a locale with thousands separators. */ static int locale_test (void) @@ -274,3 +278,4 @@ locale_test (void) return result; } +#endif /* __OPTION_EGLIBC_LOCALE_CODE */ diff --git a/libc/string/strcoll_l.c b/libc/string/strcoll_l.c index 8bd84b10a..92c5c69f1 100644 --- a/libc/string/strcoll_l.c +++ b/libc/string/strcoll_l.c @@ -25,6 +25,7 @@ #include <stdint.h> #include <stdlib.h> #include <string.h> +#include <gnu/option-groups.h> #ifndef STRING_TYPE # define STRING_TYPE char @@ -49,7 +50,11 @@ STRCOLL (s1, s2, l) __locale_t l; { struct locale_data *current = l->__locales[LC_COLLATE]; +#if __OPTION_EGLIBC_LOCALE_CODE uint_fast32_t nrules = current->values[_NL_ITEM_INDEX (_NL_COLLATE_NRULES)].word; +#else + const uint_fast32_t nrules = 0; +#endif /* We don't assign the following values right away since it might be unnecessary in case there are no rules. */ const unsigned char *rulesets; diff --git a/libc/string/strxfrm_l.c b/libc/string/strxfrm_l.c index 20f2f149b..33070e563 100644 --- a/libc/string/strxfrm_l.c +++ b/libc/string/strxfrm_l.c @@ -26,6 +26,7 @@ #include <stdlib.h> #include <string.h> #include <sys/param.h> +#include <gnu/option-groups.h> #ifndef STRING_TYPE # define STRING_TYPE char @@ -87,7 +88,11 @@ size_t STRXFRM (STRING_TYPE *dest, const STRING_TYPE *src, size_t n, __locale_t l) { struct locale_data *current = l->__locales[LC_COLLATE]; +#if __OPTION_EGLIBC_LOCALE_CODE uint_fast32_t nrules = current->values[_NL_ITEM_INDEX (_NL_COLLATE_NRULES)].word; +#else + const uint_fast32_t nrules = 0; +#endif /* We don't assign the following values right away since it might be unnecessary in case there are no rules. */ const unsigned char *rulesets; diff --git a/libc/string/tst-strxfrm.c b/libc/string/tst-strxfrm.c index 2ae2e2952..42cdd3796 100644 --- a/libc/string/tst-strxfrm.c +++ b/libc/string/tst-strxfrm.c @@ -3,6 +3,7 @@ #include <stdio.h> #include <stdlib.h> #include <string.h> +#include <gnu/option-groups.h> char const string[] = ""; @@ -64,8 +65,10 @@ main (void) int result = 0; result |= test ("C"); +#if __OPTION_EGLIBC_LOCALE_CODE result |= test ("en_US.ISO-8859-1"); result |= test ("de_DE.UTF-8"); +#endif return result; } diff --git a/libc/string/tst-strxfrm2.c b/libc/string/tst-strxfrm2.c index d5a111533..19c7f307a 100644 --- a/libc/string/tst-strxfrm2.c +++ b/libc/string/tst-strxfrm2.c @@ -1,6 +1,7 @@ #include <locale.h> #include <stdio.h> #include <string.h> +#include <gnu/option-groups.h> static int do_test (void) @@ -38,6 +39,7 @@ do_test (void) res = 1; } +#if __OPTION_EGLIBC_LOCALE_CODE if (setlocale (LC_ALL, "de_DE.UTF-8") == NULL) { puts ("setlocale failed"); @@ -75,6 +77,7 @@ do_test (void) res = 1; } } +#endif return res; } diff --git a/libc/time/Makefile b/libc/time/Makefile index 78e127da8..407ab2d16 100644 --- a/libc/time/Makefile +++ b/libc/time/Makefile @@ -36,11 +36,11 @@ aux-$(OPTION_EGLIBC_LOCALE_CODE) += alt_digit era lc-time-cleanup distribute := datemsk tests := test_time clocktest tst-posixtz \ - tst-getdate tst-mktime tst-mktime2 tst-ftime_l tst-strftime \ + tst-getdate tst-mktime tst-mktime2 tst-strftime \ tst-mktime3 tst-strptime2 bug-asctime bug-asctime_r bug-mktime1 \ tst-strptime3 tests-$(OPTION_EGLIBC_LOCALE_CODE) \ - += tst-strptime + += tst-strptime tst-ftime_l tests-$(OPTION_POSIX_WIDE_CHAR_DEVICE_IO) \ += tst_wcsftime |