diff options
author | Damien George <damien.p.george@gmail.com> | 2018-02-14 18:19:22 +1100 |
---|---|---|
committer | Damien George <damien.p.george@gmail.com> | 2018-02-14 18:19:22 +1100 |
commit | 19aee9438a7a8cf8539536dab5147aedb6b16bb3 (patch) | |
tree | c2300b20c7915eb222b3c20fcb61720aa5a78ef3 /py/unicode.c | |
parent | 49e0dd54e650295fcb46b2a47ae08f369e5cfdac (diff) |
py/unicode: Clean up utf8 funcs and provide non-utf8 inline versions.
This patch provides inline versions of the utf8 helper functions for the
case when unicode is disabled (MICROPY_PY_BUILTINS_STR_UNICODE set to 0).
This saves code size.
The unichar_charlen function is also renamed to utf8_charlen to match the
other utf8 helper functions, and the signature of this function is adjusted
for consistency (const char* -> const byte*, mp_uint_t -> size_t).
Diffstat (limited to 'py/unicode.c')
-rw-r--r-- | py/unicode.c | 29 |
1 files changed, 11 insertions, 18 deletions
diff --git a/py/unicode.c b/py/unicode.c index 140b7ba71..935dc9012 100644 --- a/py/unicode.c +++ b/py/unicode.c @@ -67,9 +67,9 @@ STATIC const uint8_t attr[] = { AT_LO, AT_LO, AT_LO, AT_PR, AT_PR, AT_PR, AT_PR, 0 }; -// TODO: Rename to str_get_char -unichar utf8_get_char(const byte *s) { #if MICROPY_PY_BUILTINS_STR_UNICODE + +unichar utf8_get_char(const byte *s) { unichar ord = *s++; if (!UTF8_IS_NONASCII(ord)) return ord; ord &= 0x7F; @@ -80,22 +80,14 @@ unichar utf8_get_char(const byte *s) { ord = (ord << 6) | (*s++ & 0x3F); } return ord; -#else - return *s; -#endif } -// TODO: Rename to str_next_char const byte *utf8_next_char(const byte *s) { -#if MICROPY_PY_BUILTINS_STR_UNICODE ++s; while (UTF8_IS_CONT(*s)) { ++s; } return s; -#else - return s + 1; -#endif } mp_uint_t utf8_ptr_to_index(const byte *s, const byte *ptr) { @@ -109,21 +101,18 @@ mp_uint_t utf8_ptr_to_index(const byte *s, const byte *ptr) { return i; } -// TODO: Rename to str_charlen -mp_uint_t unichar_charlen(const char *str, mp_uint_t len) { -#if MICROPY_PY_BUILTINS_STR_UNICODE - mp_uint_t charlen = 0; - for (const char *top = str + len; str < top; ++str) { +size_t utf8_charlen(const byte *str, size_t len) { + size_t charlen = 0; + for (const byte *top = str + len; str < top; ++str) { if (!UTF8_IS_CONT(*str)) { ++charlen; } } return charlen; -#else - return len; -#endif } +#endif + // Be aware: These unichar_is* functions are actually ASCII-only! bool unichar_isspace(unichar c) { return c < 128 && (attr[c] & FL_SPACE) != 0; @@ -183,6 +172,8 @@ mp_uint_t unichar_xdigit_value(unichar c) { return n; } +#if MICROPY_PY_BUILTINS_STR_UNICODE + bool utf8_check(const byte *p, size_t len) { uint8_t need = 0; const byte *end = p + len; @@ -210,3 +201,5 @@ bool utf8_check(const byte *p, size_t len) { } return need == 0; // no pending fragments allowed } + +#endif |