aboutsummaryrefslogtreecommitdiff
path: root/py/persistentcode.c
diff options
context:
space:
mode:
authorDamien George <damien.p.george@gmail.com>2019-09-16 22:12:59 +1000
committerDamien George <damien.p.george@gmail.com>2019-10-01 12:26:22 +1000
commitb5ebfadbd615de42c43851f27a062bacd9147996 (patch)
treee4602e96a0eaf9ee0c30913dbabfe9013dda617a /py/persistentcode.c
parent81d04a0200e0d4038c011e4946bfae5707ef9d9c (diff)
py: Compress first part of bytecode prelude.
The start of the bytecode prelude contains 6 numbers telling the amount of stack needed for the Python values and exceptions, and the signature of the function. Prior to this patch these numbers were all encoded one after the other (2x variable unsigned integers, then 4x bytes), but using so many bytes is unnecessary. An entropy analysis of around 150,000 bytecode functions from the CPython standard library showed that the optimal Shannon coding would need about 7.1 bits on average to encode these 6 numbers, compared to the existing 48 bits. This patch attempts to get close to this optimal value by packing the 6 numbers into a single, varible-length unsigned integer via bit-wise interleaving. The interleaving scheme is chosen to minimise the average number of bytes needed, and at the same time keep the scheme simple enough so it can be implemented without too much overhead in code size or speed. The scheme requires about 10.5 bits on average to store the 6 numbers. As a result most functions which originally took 6 bytes to encode these 6 numbers now need only 1 byte (in 80% of cases).
Diffstat (limited to 'py/persistentcode.c')
-rw-r--r--py/persistentcode.c41
1 files changed, 19 insertions, 22 deletions
diff --git a/py/persistentcode.c b/py/persistentcode.c
index 3b59746ba..9776acb1e 100644
--- a/py/persistentcode.c
+++ b/py/persistentcode.c
@@ -157,17 +157,16 @@ typedef struct _bytecode_prelude_t {
uint code_info_size;
} bytecode_prelude_t;
-#if MICROPY_PERSISTENT_CODE_SAVE || MICROPY_EMIT_MACHINE_CODE
-
// ip will point to start of opcodes
// ip2 will point to simple_name, source_file qstrs
STATIC void extract_prelude(const byte **ip, const byte **ip2, bytecode_prelude_t *prelude) {
- prelude->n_state = mp_decode_uint(ip);
- prelude->n_exc_stack = mp_decode_uint(ip);
- prelude->scope_flags = *(*ip)++;
- prelude->n_pos_args = *(*ip)++;
- prelude->n_kwonly_args = *(*ip)++;
- prelude->n_def_pos_args = *(*ip)++;
+ MP_BC_PRELUDE_SIG_DECODE(*ip);
+ prelude->n_state = n_state;
+ prelude->n_exc_stack = n_exc_stack;
+ prelude->scope_flags = scope_flags;
+ prelude->n_pos_args = n_pos_args;
+ prelude->n_kwonly_args = n_kwonly_args;
+ prelude->n_def_pos_args = n_def_pos_args;
*ip2 = *ip;
prelude->code_info_size = mp_decode_uint(ip2);
*ip += prelude->code_info_size;
@@ -175,8 +174,6 @@ STATIC void extract_prelude(const byte **ip, const byte **ip2, bytecode_prelude_
}
}
-#endif
-
#endif // MICROPY_PERSISTENT_CODE_LOAD || MICROPY_PERSISTENT_CODE_SAVE
#if MICROPY_PERSISTENT_CODE_LOAD
@@ -285,19 +282,19 @@ STATIC mp_obj_t load_obj(mp_reader_t *reader) {
}
STATIC void load_prelude(mp_reader_t *reader, byte **ip, byte **ip2, bytecode_prelude_t *prelude) {
- prelude->n_state = read_uint(reader, ip);
- prelude->n_exc_stack = read_uint(reader, ip);
- read_bytes(reader, *ip, 4);
- prelude->scope_flags = *(*ip)++;
- prelude->n_pos_args = *(*ip)++;
- prelude->n_kwonly_args = *(*ip)++;
- prelude->n_def_pos_args = *(*ip)++;
- *ip2 = *ip;
- prelude->code_info_size = read_uint(reader, ip2);
- read_bytes(reader, *ip2, prelude->code_info_size - (*ip2 - *ip));
- *ip += prelude->code_info_size;
- while ((*(*ip)++ = read_byte(reader)) != 255) {
+ // Read in the prelude
+ byte *ip_read = *ip;
+ read_uint(reader, &ip_read); // read in n_state/etc (is effectively a var-uint)
+ byte *ip_read_save = ip_read;
+ size_t code_info_size = read_uint(reader, &ip_read); // read in code_info_size
+ code_info_size -= ip_read - ip_read_save; // subtract bytes taken by code_info_size itself
+ read_bytes(reader, ip_read, code_info_size); // read remaining code info
+ ip_read += code_info_size;
+ while ((*ip_read++ = read_byte(reader)) != 255) {
}
+
+ // Entire prelude has been read into *ip, now decode and extract values from it
+ extract_prelude((const byte**)ip, (const byte**)ip2, prelude);
}
STATIC void load_bytecode(mp_reader_t *reader, qstr_window_t *qw, byte *ip, byte *ip_top) {