py/unicode: Clean up utf8 funcs and provide non-utf8 inline versions.

This patch provides inline versions of the utf8 helper functions for the case when unicode is disabled (MICROPY_PY_BUILTINS_STR_UNICODE set to 0). This saves code size. The unichar_charlen function is also renamed to utf8_charlen to match the other utf8 helper functions, and the signature of this function is adjusted for consistency (const char* -> const byte*, mp_uint_t -> size_t).
author: Damien George <damien.p.george@gmail.com> 2018-02-14 18:19:22 +1100
committer: Damien George <damien.p.george@gmail.com> 2018-02-14 18:19:22 +1100
commit: 19aee9438a7a8cf8539536dab5147aedb6b16bb3 (patch)
tree: c2300b20c7915eb222b3c20fcb61720aa5a78ef3 /py/unicode.c
parent: 49e0dd54e650295fcb46b2a47ae08f369e5cfdac (diff)
1 files changed, 11 insertions, 18 deletions
diff --git a/py/unicode.c b/py/unicode.c
index 140b7ba71..935dc9012 100644
--- a/py/unicode.c
+++ b/py/unicode.c
@@ -67,9 +67,9 @@ STATIC const uint8_t attr[] = {
     AT_LO, AT_LO, AT_LO, AT_PR, AT_PR, AT_PR, AT_PR, 0
 };
 
-// TODO: Rename to str_get_char
-unichar utf8_get_char(const byte *s) {
 #if MICROPY_PY_BUILTINS_STR_UNICODE
+
+unichar utf8_get_char(const byte *s) {
     unichar ord = *s++;
     if (!UTF8_IS_NONASCII(ord)) return ord;
     ord &= 0x7F;
@@ -80,22 +80,14 @@ unichar utf8_get_char(const byte *s) {
         ord = (ord << 6) | (*s++ & 0x3F);
     }
     return ord;
-#else
-    return *s;
-#endif
 }
 
-// TODO: Rename to str_next_char
 const byte *utf8_next_char(const byte *s) {
-#if MICROPY_PY_BUILTINS_STR_UNICODE
     ++s;
     while (UTF8_IS_CONT(*s)) {
         ++s;
     }
     return s;
-#else
-    return s + 1;
-#endif
 }
 
 mp_uint_t utf8_ptr_to_index(const byte *s, const byte *ptr) {
@@ -109,21 +101,18 @@ mp_uint_t utf8_ptr_to_index(const byte *s, const byte *ptr) {
     return i;
 }
 
-// TODO: Rename to str_charlen
-mp_uint_t unichar_charlen(const char *str, mp_uint_t len) {
-#if MICROPY_PY_BUILTINS_STR_UNICODE
-    mp_uint_t charlen = 0;
-    for (const char *top = str + len; str < top; ++str) {
+size_t utf8_charlen(const byte *str, size_t len) {
+    size_t charlen = 0;
+    for (const byte *top = str + len; str < top; ++str) {
         if (!UTF8_IS_CONT(*str)) {
             ++charlen;
         }
     }
     return charlen;
-#else
-    return len;
-#endif
 }
 
+#endif
+
 // Be aware: These unichar_is* functions are actually ASCII-only!
 bool unichar_isspace(unichar c) {
     return c < 128 && (attr[c] & FL_SPACE) != 0;
@@ -183,6 +172,8 @@ mp_uint_t unichar_xdigit_value(unichar c) {
     return n;
 }
 
+#if MICROPY_PY_BUILTINS_STR_UNICODE
+
 bool utf8_check(const byte *p, size_t len) {
     uint8_t need = 0;
     const byte *end = p + len;
@@ -210,3 +201,5 @@ bool utf8_check(const byte *p, size_t len) {
     }
     return need == 0; // no pending fragments allowed
 }
+
+#endif
author	Damien George <damien.p.george@gmail.com>	2018-02-14 18:19:22 +1100
committer	Damien George <damien.p.george@gmail.com>	2018-02-14 18:19:22 +1100
commit	19aee9438a7a8cf8539536dab5147aedb6b16bb3 (patch)
tree	c2300b20c7915eb222b3c20fcb61720aa5a78ef3 /py/unicode.c
parent	49e0dd54e650295fcb46b2a47ae08f369e5cfdac (diff)