aboutsummaryrefslogtreecommitdiff
path: root/py/bc0.h
diff options
context:
space:
mode:
authorDamien George <damien@micropython.org>2022-03-16 09:37:58 +1100
committerDamien George <damien@micropython.org>2022-03-28 15:41:38 +1100
commit538c3c0a5540b4018cedd442b585666130fc8def (patch)
treeddf773175e96e85650b3e77a9400ca3e0ba6ab40 /py/bc0.h
parent9e3e67b1d8bf0fd5ee612b3c828ae549f76d1407 (diff)
py: Change jump opcodes to emit 1-byte jump offset when possible.
This commit introduces changes: - All jump opcodes are changed to have variable length arguments, of either 1 or 2 bytes (previously they were fixed at 2 bytes). In most cases only 1 byte is needed to encode the short jump offset, saving bytecode size. - The bytecode emitter now selects 1 byte jump arguments when the jump offset is guaranteed to fit in 1 byte. This is achieved by checking if the code size changed during the last pass and, if it did (if it shrank), then requesting that the compiler make another pass to get the correct offsets of the now-smaller code. This can continue multiple times until the code stabilises. The code can only ever shrink so this iteration is guaranteed to complete. In most cases no extra passes are needed, the original 4 passes are enough to get it right by the 4th pass (because the 2nd pass computes roughly the correct labels and the 3rd pass computes the correct size for the jump argument). This change to the jump opcode encoding reduces .mpy files and RAM usage (when bytecode is in RAM) by about 2% on average. The performance of the VM is not impacted, at least within measurment of the performance benchmark suite. Code size is reduced for builds that include a decent amount of frozen bytecode. ARM Cortex-M builds without any frozen code increase by about 350 bytes. Signed-off-by: Damien George <damien@micropython.org>
Diffstat (limited to 'py/bc0.h')
-rw-r--r--py/bc0.h34
1 files changed, 23 insertions, 11 deletions
diff --git a/py/bc0.h b/py/bc0.h
index 842034ebf..50c4954b0 100644
--- a/py/bc0.h
+++ b/py/bc0.h
@@ -28,6 +28,18 @@
// MicroPython bytecode opcodes, grouped based on the format of the opcode
+// All opcodes are encoded as a byte with an optional argument. Arguments are
+// variable-length encoded so they can be as small as possible. The possible
+// encodings for arguments are (ip[0] is the opcode):
+//
+// - unsigned relative bytecode offset:
+// - if ip[1] high bit is clear then: arg = ip[1]
+// - if ip[1] high bit is set then: arg = ip[1] & 0x7f | ip[2] << 7
+//
+// - signed relative bytecode offset:
+// - if ip[1] high bit is clear then: arg = ip[1] - 0x40
+// - if ip[1] high bit is set then: arg = (ip[1] & 0x7f | ip[2] << 7) - 0x4000
+
#define MP_BC_MASK_FORMAT (0xf0)
#define MP_BC_MASK_EXTRA_BYTE (0x9e)
@@ -101,17 +113,17 @@
#define MP_BC_ROT_TWO (MP_BC_BASE_BYTE_O + 0x0a)
#define MP_BC_ROT_THREE (MP_BC_BASE_BYTE_O + 0x0b)
-#define MP_BC_JUMP (MP_BC_BASE_JUMP_E + 0x02) // rel byte code offset, 16-bit signed, in excess
-#define MP_BC_POP_JUMP_IF_TRUE (MP_BC_BASE_JUMP_E + 0x03) // rel byte code offset, 16-bit signed, in excess
-#define MP_BC_POP_JUMP_IF_FALSE (MP_BC_BASE_JUMP_E + 0x04) // rel byte code offset, 16-bit signed, in excess
-#define MP_BC_JUMP_IF_TRUE_OR_POP (MP_BC_BASE_JUMP_E + 0x05) // rel byte code offset, 16-bit signed, in excess
-#define MP_BC_JUMP_IF_FALSE_OR_POP (MP_BC_BASE_JUMP_E + 0x06) // rel byte code offset, 16-bit signed, in excess
-#define MP_BC_UNWIND_JUMP (MP_BC_BASE_JUMP_E + 0x00) // rel byte code offset, 16-bit signed, in excess; then a byte
-#define MP_BC_SETUP_WITH (MP_BC_BASE_JUMP_E + 0x07) // rel byte code offset, 16-bit unsigned
-#define MP_BC_SETUP_EXCEPT (MP_BC_BASE_JUMP_E + 0x08) // rel byte code offset, 16-bit unsigned
-#define MP_BC_SETUP_FINALLY (MP_BC_BASE_JUMP_E + 0x09) // rel byte code offset, 16-bit unsigned
-#define MP_BC_POP_EXCEPT_JUMP (MP_BC_BASE_JUMP_E + 0x0a) // rel byte code offset, 16-bit unsigned
-#define MP_BC_FOR_ITER (MP_BC_BASE_JUMP_E + 0x0b) // rel byte code offset, 16-bit unsigned
+#define MP_BC_UNWIND_JUMP (MP_BC_BASE_JUMP_E + 0x00) // signed relative bytecode offset; then a byte
+#define MP_BC_JUMP (MP_BC_BASE_JUMP_E + 0x02) // signed relative bytecode offset
+#define MP_BC_POP_JUMP_IF_TRUE (MP_BC_BASE_JUMP_E + 0x03) // signed relative bytecode offset
+#define MP_BC_POP_JUMP_IF_FALSE (MP_BC_BASE_JUMP_E + 0x04) // signed relative bytecode offset
+#define MP_BC_JUMP_IF_TRUE_OR_POP (MP_BC_BASE_JUMP_E + 0x05) // signed relative bytecode offset
+#define MP_BC_JUMP_IF_FALSE_OR_POP (MP_BC_BASE_JUMP_E + 0x06) // signed relative bytecode offset
+#define MP_BC_SETUP_WITH (MP_BC_BASE_JUMP_E + 0x07) // unsigned relative bytecode offset
+#define MP_BC_SETUP_EXCEPT (MP_BC_BASE_JUMP_E + 0x08) // unsigned relative bytecode offset
+#define MP_BC_SETUP_FINALLY (MP_BC_BASE_JUMP_E + 0x09) // unsigned relative bytecode offset
+#define MP_BC_POP_EXCEPT_JUMP (MP_BC_BASE_JUMP_E + 0x0a) // unsigned relative bytecode offset
+#define MP_BC_FOR_ITER (MP_BC_BASE_JUMP_E + 0x0b) // unsigned relative bytecode offset
#define MP_BC_WITH_CLEANUP (MP_BC_BASE_BYTE_O + 0x0c)
#define MP_BC_END_FINALLY (MP_BC_BASE_BYTE_O + 0x0d)
#define MP_BC_GET_ITER (MP_BC_BASE_BYTE_O + 0x0e)