diff options
author | Artyom Skrobov <tyomitch@gmail.com> | 2021-05-03 14:17:36 -0400 |
---|---|---|
committer | Damien George <damien@micropython.org> | 2022-02-11 22:52:32 +1100 |
commit | 18b1ba086c0e5547ca81030bf13b026961f80720 (patch) | |
tree | 2eda1e61c4c61053b6ddd2f1c53e434854dadbd0 /py/qstr.c | |
parent | e8bc4a3a5be12ad639b811852337c63e8f1d6277 (diff) |
py/qstr: Separate hash and len from string data.
This allows the compiler to merge strings: e.g. "update",
"difference_update" and "symmetric_difference_update" will all point to the
same memory.
No functional change.
The size reduction depends on the number of qstrs in the build. The change
this commit brings is:
bare-arm: -4 -0.007%
minimal x86: +150 +0.092% [incl +48(data)]
unix x64: -608 -0.118%
unix nanbox: -572 -0.126% [incl +32(data)]
stm32: -1392 -0.352% PYBV10
cc3200: -448 -0.244%
esp8266: -1208 -0.173% GENERIC
esp32: -1028 -0.068% GENERIC[incl -1020(data)]
nrf: -440 -0.252% pca10040
rp2: -1072 -0.217% PICO
samd: -368 -0.264% ADAFRUIT_ITSYBITSY_M4_EXPRESS
Performance is also improved (on bare metal at least) for the
core_import_mpy_multi.py, core_import_mpy_single.py and core_qstr.py
performance benchmarks.
Originally at adafruit#4583
Signed-off-by: Artyom Skrobov <tyomitch@gmail.com>
Diffstat (limited to 'py/qstr.c')
-rw-r--r-- | py/qstr.c | 134 |
1 files changed, 68 insertions, 66 deletions
@@ -35,7 +35,6 @@ // NOTE: we are using linear arrays to store and search for qstr's (unique strings, interned strings) // ultimately we will replace this with a static hash table of some kind -// also probably need to include the length in the string data, to allow null bytes in the string #if MICROPY_DEBUG_VERBOSE // print debugging info #define DEBUG_printf DEBUG_printf @@ -44,34 +43,9 @@ #endif // A qstr is an index into the qstr pool. -// The data for a qstr contains (hash, length, data): -// - hash (configurable number of bytes) -// - length (configurable number of bytes) -// - data ("length" number of bytes) -// - \0 terminated (so they can be printed using printf) - -#if MICROPY_QSTR_BYTES_IN_HASH == 1 - #define Q_HASH_MASK (0xff) - #define Q_GET_HASH(q) ((mp_uint_t)(q)[0]) - #define Q_SET_HASH(q, hash) do { (q)[0] = (hash); } while (0) -#elif MICROPY_QSTR_BYTES_IN_HASH == 2 - #define Q_HASH_MASK (0xffff) - #define Q_GET_HASH(q) ((mp_uint_t)(q)[0] | ((mp_uint_t)(q)[1] << 8)) - #define Q_SET_HASH(q, hash) do { (q)[0] = (hash); (q)[1] = (hash) >> 8; } while (0) -#else - #error unimplemented qstr hash decoding -#endif -#define Q_GET_ALLOC(q) (MICROPY_QSTR_BYTES_IN_HASH + MICROPY_QSTR_BYTES_IN_LEN + Q_GET_LENGTH(q) + 1) -#define Q_GET_DATA(q) ((q) + MICROPY_QSTR_BYTES_IN_HASH + MICROPY_QSTR_BYTES_IN_LEN) -#if MICROPY_QSTR_BYTES_IN_LEN == 1 - #define Q_GET_LENGTH(q) ((q)[MICROPY_QSTR_BYTES_IN_HASH]) - #define Q_SET_LENGTH(q, len) do { (q)[MICROPY_QSTR_BYTES_IN_HASH] = (len); } while (0) -#elif MICROPY_QSTR_BYTES_IN_LEN == 2 - #define Q_GET_LENGTH(q) ((q)[MICROPY_QSTR_BYTES_IN_HASH] | ((q)[MICROPY_QSTR_BYTES_IN_HASH + 1] << 8)) - #define Q_SET_LENGTH(q, len) do { (q)[MICROPY_QSTR_BYTES_IN_HASH] = (len); (q)[MICROPY_QSTR_BYTES_IN_HASH + 1] = (len) >> 8; } while (0) -#else - #error unimplemented qstr length decoding -#endif +// The data for a qstr is \0 terminated (so they can be printed using printf) + +#define Q_HASH_MASK ((1 << (8 * MICROPY_QSTR_BYTES_IN_HASH)) - 1) #if MICROPY_PY_THREAD && !MICROPY_PY_THREAD_GIL #define QSTR_ENTER() mp_thread_mutex_lock(&MP_STATE_VM(qstr_mutex), 1) @@ -100,14 +74,32 @@ mp_uint_t qstr_compute_hash(const byte *data, size_t len) { return hash; } +const qstr_hash_t mp_qstr_const_hashes[] = { + #ifndef NO_QSTR +#define QDEF(id, hash, len, str) hash, + #include "genhdr/qstrdefs.generated.h" +#undef QDEF + #endif +}; + +const qstr_len_t mp_qstr_const_lengths[] = { + #ifndef NO_QSTR +#define QDEF(id, hash, len, str) len, + #include "genhdr/qstrdefs.generated.h" +#undef QDEF + #endif +}; + const qstr_pool_t mp_qstr_const_pool = { NULL, // no previous pool 0, // no previous pool MICROPY_ALLOC_QSTR_ENTRIES_INIT, MP_QSTRnumber_of, // corresponds to number of strings in array just below + (qstr_hash_t *)mp_qstr_const_hashes, + (qstr_len_t *)mp_qstr_const_lengths, { #ifndef NO_QSTR -#define QDEF(id, str) str, +#define QDEF(id, hash, len, str) str, #include "genhdr/qstrdefs.generated.h" #undef QDEF #endif @@ -130,19 +122,21 @@ void qstr_init(void) { #endif } -STATIC const byte *find_qstr(qstr q) { +STATIC qstr_pool_t *find_qstr(qstr *q) { // search pool for this qstr // total_prev_len==0 in the final pool, so the loop will always terminate qstr_pool_t *pool = MP_STATE_VM(last_pool); - while (q < pool->total_prev_len) { + while (*q < pool->total_prev_len) { pool = pool->prev; } - return pool->qstrs[q - pool->total_prev_len]; + *q -= pool->total_prev_len; + assert(*q < pool->len); + return pool; } // qstr_mutex must be taken while in this function -STATIC qstr qstr_add(const byte *q_ptr) { - DEBUG_printf("QSTR: add hash=%d len=%d data=%.*s\n", Q_GET_HASH(q_ptr), Q_GET_LENGTH(q_ptr), Q_GET_LENGTH(q_ptr), Q_GET_DATA(q_ptr)); +STATIC qstr qstr_add(mp_uint_t hash, mp_uint_t len, const char *q_ptr) { + DEBUG_printf("QSTR: add hash=%d len=%d data=%.*s\n", hash, len, len, q_ptr); // make sure we have room in the pool for a new qstr if (MP_STATE_VM(last_pool)->len >= MP_STATE_VM(last_pool)->alloc) { @@ -151,7 +145,9 @@ STATIC qstr qstr_add(const byte *q_ptr) { // Put a lower bound on the allocation size in case the extra qstr pool has few entries new_alloc = MAX(MICROPY_ALLOC_QSTR_ENTRIES_INIT, new_alloc); #endif - qstr_pool_t *pool = m_new_obj_var_maybe(qstr_pool_t, const char *, new_alloc); + mp_uint_t pool_size = sizeof(qstr_pool_t) + + (sizeof(const char *) + sizeof(qstr_hash_t) + sizeof(qstr_len_t)) * new_alloc; + qstr_pool_t *pool = (qstr_pool_t *)m_malloc_maybe(pool_size); if (pool == NULL) { // Keep qstr_last_chunk consistent with qstr_pool_t: qstr_last_chunk is not scanned // at garbage collection since it's reachable from a qstr_pool_t. And the caller of @@ -162,6 +158,8 @@ STATIC qstr qstr_add(const byte *q_ptr) { QSTR_EXIT(); m_malloc_fail(new_alloc); } + pool->hashes = (qstr_hash_t *)(pool->qstrs + new_alloc); + pool->lengths = (qstr_len_t *)(pool->hashes + new_alloc); pool->prev = MP_STATE_VM(last_pool); pool->total_prev_len = MP_STATE_VM(last_pool)->total_prev_len + MP_STATE_VM(last_pool)->len; pool->alloc = new_alloc; @@ -171,10 +169,14 @@ STATIC qstr qstr_add(const byte *q_ptr) { } // add the new qstr - MP_STATE_VM(last_pool)->qstrs[MP_STATE_VM(last_pool)->len++] = q_ptr; + mp_uint_t at = MP_STATE_VM(last_pool)->len; + MP_STATE_VM(last_pool)->hashes[at] = hash; + MP_STATE_VM(last_pool)->lengths[at] = len; + MP_STATE_VM(last_pool)->qstrs[at] = q_ptr; + MP_STATE_VM(last_pool)->len++; // return id for the newly-added qstr - return MP_STATE_VM(last_pool)->total_prev_len + MP_STATE_VM(last_pool)->len - 1; + return MP_STATE_VM(last_pool)->total_prev_len + at; } qstr qstr_find_strn(const char *str, size_t str_len) { @@ -183,9 +185,10 @@ qstr qstr_find_strn(const char *str, size_t str_len) { // search pools for the data for (qstr_pool_t *pool = MP_STATE_VM(last_pool); pool != NULL; pool = pool->prev) { - for (const byte **q = pool->qstrs, **q_top = pool->qstrs + pool->len; q < q_top; q++) { - if (Q_GET_HASH(*q) == str_hash && Q_GET_LENGTH(*q) == str_len && memcmp(Q_GET_DATA(*q), str, str_len) == 0) { - return pool->total_prev_len + (q - pool->qstrs); + for (mp_uint_t at = 0, top = pool->len; at < top; at++) { + if (pool->hashes[at] == str_hash && pool->lengths[at] == str_len + && memcmp(pool->qstrs[at], str, str_len) == 0) { + return pool->total_prev_len + at; } } } @@ -211,14 +214,14 @@ qstr qstr_from_strn(const char *str, size_t len) { } // compute number of bytes needed to intern this string - size_t n_bytes = MICROPY_QSTR_BYTES_IN_HASH + MICROPY_QSTR_BYTES_IN_LEN + len + 1; + size_t n_bytes = len + 1; if (MP_STATE_VM(qstr_last_chunk) != NULL && MP_STATE_VM(qstr_last_used) + n_bytes > MP_STATE_VM(qstr_last_alloc)) { // not enough room at end of previously interned string so try to grow - byte *new_p = m_renew_maybe(byte, MP_STATE_VM(qstr_last_chunk), MP_STATE_VM(qstr_last_alloc), MP_STATE_VM(qstr_last_alloc) + n_bytes, false); + char *new_p = m_renew_maybe(char, MP_STATE_VM(qstr_last_chunk), MP_STATE_VM(qstr_last_alloc), MP_STATE_VM(qstr_last_alloc) + n_bytes, false); if (new_p == NULL) { // could not grow existing memory; shrink it to fit previous - (void)m_renew_maybe(byte, MP_STATE_VM(qstr_last_chunk), MP_STATE_VM(qstr_last_alloc), MP_STATE_VM(qstr_last_used), false); + (void)m_renew_maybe(char, MP_STATE_VM(qstr_last_chunk), MP_STATE_VM(qstr_last_alloc), MP_STATE_VM(qstr_last_used), false); MP_STATE_VM(qstr_last_chunk) = NULL; } else { // could grow existing memory @@ -232,10 +235,10 @@ qstr qstr_from_strn(const char *str, size_t len) { if (al < MICROPY_ALLOC_QSTR_CHUNK_INIT) { al = MICROPY_ALLOC_QSTR_CHUNK_INIT; } - MP_STATE_VM(qstr_last_chunk) = m_new_maybe(byte, al); + MP_STATE_VM(qstr_last_chunk) = m_new_maybe(char, al); if (MP_STATE_VM(qstr_last_chunk) == NULL) { // failed to allocate a large chunk so try with exact size - MP_STATE_VM(qstr_last_chunk) = m_new_maybe(byte, n_bytes); + MP_STATE_VM(qstr_last_chunk) = m_new_maybe(char, n_bytes); if (MP_STATE_VM(qstr_last_chunk) == NULL) { QSTR_EXIT(); m_malloc_fail(n_bytes); @@ -247,40 +250,38 @@ qstr qstr_from_strn(const char *str, size_t len) { } // allocate memory from the chunk for this new interned string's data - byte *q_ptr = MP_STATE_VM(qstr_last_chunk) + MP_STATE_VM(qstr_last_used); + char *q_ptr = MP_STATE_VM(qstr_last_chunk) + MP_STATE_VM(qstr_last_used); MP_STATE_VM(qstr_last_used) += n_bytes; // store the interned strings' data mp_uint_t hash = qstr_compute_hash((const byte *)str, len); - Q_SET_HASH(q_ptr, hash); - Q_SET_LENGTH(q_ptr, len); - memcpy(q_ptr + MICROPY_QSTR_BYTES_IN_HASH + MICROPY_QSTR_BYTES_IN_LEN, str, len); - q_ptr[MICROPY_QSTR_BYTES_IN_HASH + MICROPY_QSTR_BYTES_IN_LEN + len] = '\0'; - q = qstr_add(q_ptr); + memcpy(q_ptr, str, len); + q_ptr[len] = '\0'; + q = qstr_add(hash, len, q_ptr); } QSTR_EXIT(); return q; } mp_uint_t qstr_hash(qstr q) { - const byte *qd = find_qstr(q); - return Q_GET_HASH(qd); + qstr_pool_t *pool = find_qstr(&q); + return pool->hashes[q]; } size_t qstr_len(qstr q) { - const byte *qd = find_qstr(q); - return Q_GET_LENGTH(qd); + qstr_pool_t *pool = find_qstr(&q); + return pool->lengths[q]; } const char *qstr_str(qstr q) { - const byte *qd = find_qstr(q); - return (const char *)Q_GET_DATA(qd); + qstr_pool_t *pool = find_qstr(&q); + return pool->qstrs[q]; } const byte *qstr_data(qstr q, size_t *len) { - const byte *qd = find_qstr(q); - *len = Q_GET_LENGTH(qd); - return Q_GET_DATA(qd); + qstr_pool_t *pool = find_qstr(&q); + *len = pool->lengths[q]; + return (byte *)pool->qstrs[q]; } void qstr_pool_info(size_t *n_pool, size_t *n_qstr, size_t *n_str_data_bytes, size_t *n_total_bytes) { @@ -292,13 +293,14 @@ void qstr_pool_info(size_t *n_pool, size_t *n_qstr, size_t *n_str_data_bytes, si for (qstr_pool_t *pool = MP_STATE_VM(last_pool); pool != NULL && pool != &CONST_POOL; pool = pool->prev) { *n_pool += 1; *n_qstr += pool->len; - for (const byte **q = pool->qstrs, **q_top = pool->qstrs + pool->len; q < q_top; q++) { - *n_str_data_bytes += Q_GET_ALLOC(*q); + for (qstr_len_t *l = pool->lengths, *l_top = pool->lengths + pool->len; l < l_top; l++) { + *n_str_data_bytes += *l + 1; } #if MICROPY_ENABLE_GC *n_total_bytes += gc_nbytes(pool); // this counts actual bytes used in heap #else - *n_total_bytes += sizeof(qstr_pool_t) + sizeof(qstr) * pool->alloc; + *n_total_bytes += sizeof(qstr_pool_t) + + (sizeof(const char *) + sizeof(qstr_hash_t) + sizeof(qstr_len_t)) * pool->alloc; #endif } *n_total_bytes += *n_str_data_bytes; @@ -309,8 +311,8 @@ void qstr_pool_info(size_t *n_pool, size_t *n_qstr, size_t *n_str_data_bytes, si void qstr_dump_data(void) { QSTR_ENTER(); for (qstr_pool_t *pool = MP_STATE_VM(last_pool); pool != NULL && pool != &CONST_POOL; pool = pool->prev) { - for (const byte **q = pool->qstrs, **q_top = pool->qstrs + pool->len; q < q_top; q++) { - mp_printf(&mp_plat_print, "Q(%s)\n", Q_GET_DATA(*q)); + for (const char **q = pool->qstrs, **q_top = pool->qstrs + pool->len; q < q_top; q++) { + mp_printf(&mp_plat_print, "Q(%s)\n", *q); } } QSTR_EXIT(); |