diff options
author | Damien George <damien.p.george@gmail.com> | 2017-11-16 13:53:04 +1100 |
---|---|---|
committer | Damien George <damien.p.george@gmail.com> | 2017-11-16 13:53:04 +1100 |
commit | 1f1d5194d775ad996f1d341c1a44b56af7ea4d4c (patch) | |
tree | 1ec3138b14dfbd7a759bd065991b346a112fe1cb /py/objstr.c | |
parent | 4601759bf59e16b860a3f082e9aa4ea78356bf92 (diff) |
py/objstr: Make mp_obj_new_str_of_type check for existing interned qstr.
The function mp_obj_new_str_of_type is a general str object constructor
used in many places in the code to create either a str or bytes object.
When creating a str it should first check if the string data already exists
as an interned qstr, and if so then return the qstr object. This patch
makes the function have such behaviour, which helps to reduce heap usage by
reusing existing interned data where possible.
The old behaviour of mp_obj_new_str_of_type (which didn't check for
existing interned data) is made available through the function
mp_obj_new_str_copy, but should only be used in very special cases.
One consequence of this patch is that the following expression is now True:
'abc' is ' abc '.split()[0]
Diffstat (limited to 'py/objstr.c')
-rw-r--r-- | py/objstr.c | 26 |
1 files changed, 19 insertions, 7 deletions
diff --git a/py/objstr.c b/py/objstr.c index 5c464ba7d..bca2af801 100644 --- a/py/objstr.c +++ b/py/objstr.c @@ -164,7 +164,7 @@ mp_obj_t mp_obj_str_make_new(const mp_obj_type_t *type, size_t n_args, size_t n_ mp_raise_msg(&mp_type_UnicodeError, NULL); } #endif - mp_obj_str_t *o = MP_OBJ_TO_PTR(mp_obj_new_str_of_type(type, NULL, str_len)); + mp_obj_str_t *o = MP_OBJ_TO_PTR(mp_obj_new_str_copy(type, NULL, str_len)); o->data = str_data; o->hash = str_hash; return MP_OBJ_FROM_PTR(o); @@ -205,7 +205,7 @@ STATIC mp_obj_t bytes_make_new(const mp_obj_type_t *type_in, size_t n_args, size if (str_hash == 0) { str_hash = qstr_compute_hash(str_data, str_len); } - mp_obj_str_t *o = MP_OBJ_TO_PTR(mp_obj_new_str_of_type(&mp_type_bytes, NULL, str_len)); + mp_obj_str_t *o = MP_OBJ_TO_PTR(mp_obj_new_str_copy(&mp_type_bytes, NULL, str_len)); o->data = str_data; o->hash = str_hash; return MP_OBJ_FROM_PTR(o); @@ -226,7 +226,7 @@ STATIC mp_obj_t bytes_make_new(const mp_obj_type_t *type_in, size_t n_args, size // check if argument has the buffer protocol mp_buffer_info_t bufinfo; if (mp_get_buffer(args[0], &bufinfo, MP_BUFFER_READ)) { - return mp_obj_new_str_of_type(&mp_type_bytes, bufinfo.buf, bufinfo.len); + return mp_obj_new_bytes(bufinfo.buf, bufinfo.len); } vstr_t vstr; @@ -1977,8 +1977,9 @@ const mp_obj_type_t mp_type_bytes = { const mp_obj_str_t mp_const_empty_bytes_obj = {{&mp_type_bytes}, 0, 0, (const byte*)""}; // Create a str/bytes object using the given data. New memory is allocated and -// the data is copied across. -mp_obj_t mp_obj_new_str_of_type(const mp_obj_type_t *type, const byte* data, size_t len) { +// the data is copied across. This function should only be used if the type is bytes, +// or if the type is str and the string data is known to be not interned. +mp_obj_t mp_obj_new_str_copy(const mp_obj_type_t *type, const byte* data, size_t len) { mp_obj_str_t *o = m_new_obj(mp_obj_str_t); o->base.type = type; o->len = len; @@ -1992,6 +1993,17 @@ mp_obj_t mp_obj_new_str_of_type(const mp_obj_type_t *type, const byte* data, siz return MP_OBJ_FROM_PTR(o); } +// Create a str/bytes object using the given data. If the type is str and the string +// data is already interned, then a qstr object is returned. Otherwise new memory is +// allocated for the object and the data is copied across. +mp_obj_t mp_obj_new_str_of_type(const mp_obj_type_t *type, const byte* data, size_t len) { + if (type == &mp_type_str) { + return mp_obj_new_str((const char*)data, len); + } else { + return mp_obj_new_bytes(data, len); + } +} + // Create a str using a qstr to store the data; may use existing or new qstr. mp_obj_t mp_obj_new_str_via_qstr(const char* data, size_t len) { return MP_OBJ_NEW_QSTR(qstr_from_strn(data, len)); @@ -2034,7 +2046,7 @@ mp_obj_t mp_obj_new_str(const char* data, size_t len) { return MP_OBJ_NEW_QSTR(q); } else { // no existing qstr, don't make one - return mp_obj_new_str_of_type(&mp_type_str, (const byte*)data, len); + return mp_obj_new_str_copy(&mp_type_str, (const byte*)data, len); } } @@ -2044,7 +2056,7 @@ mp_obj_t mp_obj_str_intern(mp_obj_t str) { } mp_obj_t mp_obj_new_bytes(const byte* data, size_t len) { - return mp_obj_new_str_of_type(&mp_type_bytes, data, len); + return mp_obj_new_str_copy(&mp_type_bytes, data, len); } bool mp_obj_str_equal(mp_obj_t s1, mp_obj_t s2) { |