diff options
author | Damien George <damien.p.george@gmail.com> | 2017-02-24 13:43:43 +1100 |
---|---|---|
committer | Damien George <damien.p.george@gmail.com> | 2017-02-24 13:43:43 +1100 |
commit | 5255255fb9ea003db65935fe6cf2ac9d17410faa (patch) | |
tree | 6128e37b20f9440f387f5d3d36c9110dd19950a7 /py/parse.c | |
parent | f62503dc4757902a1f68c55b938bcd6ddfd6e002 (diff) |
py: Create str/bytes objects in the parser, not the compiler.
Previous to this patch any non-interned str/bytes objects would create a
special parse node that held a copy of the str/bytes data. Then in the
compiler this data would be turned into a str/bytes object. This actually
lead to 2 copies of the data, one in the parse node and one in the object.
The parse node's copy of the data would be freed at the end of the compile
stage but nevertheless it meant that the peak memory usage of the
parse/compile stage was higher than it needed to be (by an amount equal to
the number of bytes in all the non-interned str/bytes objects).
This patch changes the behaviour so that str/bytes objects are created
directly in the parser and the object stored in a const-object parse node
(which already exists for bignum, float and complex const objects). This
reduces peak RAM usage of the parse/compile stage, simplifies the parser
and compiler, and reduces code size by about 170 bytes on Thumb2 archs,
and by about 300 bytes on Xtensa archs.
Diffstat (limited to 'py/parse.c')
-rw-r--r-- | py/parse.c | 47 |
1 files changed, 13 insertions, 34 deletions
diff --git a/py/parse.c b/py/parse.c index 7280f7487..5a5adc609 100644 --- a/py/parse.c +++ b/py/parse.c @@ -38,6 +38,7 @@ #include "py/runtime0.h" #include "py/runtime.h" #include "py/objint.h" +#include "py/objstr.h" #include "py/builtin.h" #if MICROPY_ENABLE_COMPILER @@ -75,8 +76,6 @@ enum { #include "py/grammar.h" #undef DEF_RULE #undef DEF_RULE_NC - RULE_string, // special node for non-interned string - RULE_bytes, // special node for non-interned bytes RULE_const_object, // special node for a constant, generic Python object // define rules without a compile function @@ -123,8 +122,6 @@ STATIC const rule_t *const rules[] = { #include "py/grammar.h" #undef DEF_RULE #undef DEF_RULE_NC - NULL, // RULE_string - NULL, // RULE_bytes NULL, // RULE_const_object // define rules without a compile function @@ -326,11 +323,7 @@ void mp_parse_node_print(mp_parse_node_t pn, size_t indent) { } else { // node must be a mp_parse_node_struct_t mp_parse_node_struct_t *pns = (mp_parse_node_struct_t*)pn; - if (MP_PARSE_NODE_STRUCT_KIND(pns) == RULE_string) { - printf("literal str(%.*s)\n", (int)pns->nodes[1], (char*)pns->nodes[0]); - } else if (MP_PARSE_NODE_STRUCT_KIND(pns) == RULE_bytes) { - printf("literal bytes(%.*s)\n", (int)pns->nodes[1], (char*)pns->nodes[0]); - } else if (MP_PARSE_NODE_STRUCT_KIND(pns) == RULE_const_object) { + if (MP_PARSE_NODE_STRUCT_KIND(pns) == RULE_const_object) { #if MICROPY_OBJ_REPR == MICROPY_OBJ_REPR_D printf("literal const(%016llx)\n", (uint64_t)pns->nodes[0] | ((uint64_t)pns->nodes[1] << 32)); #else @@ -392,21 +385,6 @@ STATIC void push_result_node(parser_t *parser, mp_parse_node_t pn) { parser->result_stack[parser->result_stack_top++] = pn; } -STATIC mp_parse_node_t make_node_string_bytes(parser_t *parser, size_t src_line, size_t rule_kind, const char *str, size_t len) { - mp_parse_node_struct_t *pn = parser_alloc(parser, sizeof(mp_parse_node_struct_t) + sizeof(mp_parse_node_t) * 2); - if (pn == NULL) { - parser->parse_error = PARSE_ERROR_MEMORY; - return MP_PARSE_NODE_NULL; - } - pn->source_line = src_line; - pn->kind_num_nodes = rule_kind | (2 << 8); - char *p = m_new(char, len); - memcpy(p, str, len); - pn->nodes[0] = (uintptr_t)p; - pn->nodes[1] = len; - return (mp_parse_node_t)pn; -} - STATIC mp_parse_node_t make_node_const_object(parser_t *parser, size_t src_line, mp_obj_t obj) { mp_parse_node_struct_t *pn = parser_alloc(parser, sizeof(mp_parse_node_struct_t) + sizeof(mp_obj_t)); if (pn == NULL) { @@ -473,8 +451,11 @@ STATIC void push_result_token(parser_t *parser, const rule_t *rule) { // qstr exists, make a leaf node pn = mp_parse_node_new_leaf(lex->tok_kind == MP_TOKEN_STRING ? MP_PARSE_NODE_STRING : MP_PARSE_NODE_BYTES, qst); } else { - // not interned, make a node holding a pointer to the string/bytes data - pn = make_node_string_bytes(parser, lex->tok_line, lex->tok_kind == MP_TOKEN_STRING ? RULE_string : RULE_bytes, lex->vstr.buf, lex->vstr.len); + // not interned, make a node holding a pointer to the string/bytes object + mp_obj_t o = mp_obj_new_str_of_type( + lex->tok_kind == MP_TOKEN_STRING ? &mp_type_str : &mp_type_bytes, + (const byte*)lex->vstr.buf, lex->vstr.len); + pn = make_node_const_object(parser, lex->tok_line, o); } } else { pn = mp_parse_node_new_leaf(MP_PARSE_NODE_TOKEN, lex->tok_kind); @@ -934,15 +915,13 @@ mp_parse_tree_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind) { // this code discards lonely statements, such as doc strings if (input_kind != MP_PARSE_SINGLE_INPUT && rule->rule_id == RULE_expr_stmt && peek_result(&parser, 0) == MP_PARSE_NODE_NULL) { mp_parse_node_t p = peek_result(&parser, 1); - if ((MP_PARSE_NODE_IS_LEAF(p) && !MP_PARSE_NODE_IS_ID(p)) || MP_PARSE_NODE_IS_STRUCT_KIND(p, RULE_string)) { + if ((MP_PARSE_NODE_IS_LEAF(p) && !MP_PARSE_NODE_IS_ID(p)) + || MP_PARSE_NODE_IS_STRUCT_KIND(p, RULE_const_object)) { pop_result(&parser); // MP_PARSE_NODE_NULL - mp_parse_node_t pn = pop_result(&parser); // possibly RULE_string - if (MP_PARSE_NODE_IS_STRUCT(pn)) { - mp_parse_node_struct_t *pns = (mp_parse_node_struct_t *)pn; - if (MP_PARSE_NODE_STRUCT_KIND(pns) == RULE_string) { - m_del(char, (char*)pns->nodes[0], (size_t)pns->nodes[1]); - } - } + pop_result(&parser); // const expression (leaf or RULE_const_object) + // Pushing the "pass" rule here will overwrite any RULE_const_object + // entry that was on the result stack, allowing the GC to reclaim + // the memory from the const object when needed. push_result_rule(&parser, rule_src_line, rules[RULE_pass_stmt], 0); break; } |