summaryrefslogtreecommitdiff
path: root/core
diff options
context:
space:
mode:
authorJerome Forissier <jerome@forissier.org>2020-06-04 16:44:57 +0200
committerJérôme Forissier <jerome@forissier.org>2020-08-14 12:19:21 +0200
commite2f03e0736a6774eb8fb07ce8dc55d8ed186b67b (patch)
tree75e489ec751f457e76a2254118db663b2e10429e /core
parent0733f3d1de1a076c8fd07727464cc1f27e909830 (diff)
core: add stack overflow detection
This commit introduces CFG_CORE_DEBUG_CHECK_STACKS to check the stack limits using compiler instrumentation (-finstrument-functions). When enabled, the C compiler will insert entry and exit hooks in all functions in the TEE core. On entry, the stack pointer is checked and if an overflow is detected, panic() is called. How is this helpful since we have stack canaries already? 1. When a dead canary is found, the call stack will give no indication of the root cause of the corruption which may have happened quite some time before. Running the test case again with a debugger attached and a watchpoint on the canary is not always an option. 2. The system may corrupt the stack and hang in an exception handler before the first canary check, for instance, during boot when the temporary stack is used. This code will likely catch such issues, too. The downside is increased stack usage and a significant runtime overhead which is why this feature should be enabled only for troubleshooting. Signed-off-by: Jerome Forissier <jerome@forissier.org> Tested-by: Jerome Forissier <jerome@forissier.org> (QEMU, QEMUv8) Reviewed-by: Jens Wiklander <jens.wiklander@linaro.org> Acked-by: Etienne Carriere <etienne.carriere@linaro.org>
Diffstat (limited to 'core')
-rw-r--r--core/arch/arm/include/kernel/spinlock.h4
-rw-r--r--core/arch/arm/include/kernel/thread.h31
-rw-r--r--core/arch/arm/kernel/spin_lock_debug.c3
-rw-r--r--core/arch/arm/kernel/thread.c209
-rw-r--r--core/arch/arm/kernel/unwind_arm32.c2
-rw-r--r--core/arch/arm/kernel/unwind_arm64.c2
-rw-r--r--core/core.mk7
-rw-r--r--core/kernel/assert.c8
8 files changed, 208 insertions, 58 deletions
diff --git a/core/arch/arm/include/kernel/spinlock.h b/core/arch/arm/include/kernel/spinlock.h
index b2206223..6097df78 100644
--- a/core/arch/arm/include/kernel/spinlock.h
+++ b/core/arch/arm/include/kernel/spinlock.h
@@ -20,14 +20,14 @@
void spinlock_count_incr(void);
void spinlock_count_decr(void);
bool have_spinlock(void);
-static inline void assert_have_no_spinlock(void)
+static inline void __nostackcheck assert_have_no_spinlock(void)
{
assert(!have_spinlock());
}
#else
static inline void spinlock_count_incr(void) { }
static inline void spinlock_count_decr(void) { }
-static inline void assert_have_no_spinlock(void) { }
+static inline void __nostackcheck assert_have_no_spinlock(void) { }
#endif
void __cpu_spin_lock(unsigned int *lock);
diff --git a/core/arch/arm/include/kernel/thread.h b/core/arch/arm/include/kernel/thread.h
index 202aee35..9ecc67ca 100644
--- a/core/arch/arm/include/kernel/thread.h
+++ b/core/arch/arm/include/kernel/thread.h
@@ -48,6 +48,9 @@ struct thread_core_local {
#ifdef CFG_TEE_CORE_DEBUG
unsigned int locked_count; /* Number of spinlocks held */
#endif
+#ifdef CFG_CORE_DEBUG_CHECK_STACKS
+ bool stackcheck_recursion;
+#endif
} THREAD_CORE_LOCAL_ALIGNED;
struct thread_vector_table {
@@ -241,7 +244,9 @@ struct thread_specific_data {
vaddr_t abort_va;
unsigned int abort_core;
struct thread_abort_regs abort_regs;
-
+#ifdef CFG_CORE_DEBUG_CHECK_STACKS
+ bool stackcheck_recursion;
+#endif
};
#ifdef CFG_WITH_ARM_TRUSTED_FW
@@ -367,7 +372,7 @@ uint32_t thread_mask_exceptions(uint32_t exceptions);
void thread_unmask_exceptions(uint32_t state);
-static inline bool thread_foreign_intr_disabled(void)
+static inline bool __nostackcheck thread_foreign_intr_disabled(void)
{
return !!(thread_get_exceptions() & THREAD_EXCP_FOREIGN_INTR);
}
@@ -558,10 +563,26 @@ vaddr_t thread_stack_start(void);
size_t thread_stack_size(void);
/*
- * Returns the start and end addresses of the current stack (thread, temporary
- * or abort stack).
+ * Returns the start (top, lowest address) and end (bottom, highest address) of
+ * the current stack (thread, temporary or abort stack).
+ * When CFG_CORE_DEBUG_CHECK_STACKS=y, the @hard parameter tells if the hard or
+ * soft limits are queried. The difference between soft and hard is that for the
+ * latter, the stack start includes some additional space to let any function
+ * overflow the soft limit and still be able to print a stack dump in this case.
*/
-void get_stack_limits(vaddr_t *start, vaddr_t *end);
+bool get_stack_limits(vaddr_t *start, vaddr_t *end, bool hard);
+
+static inline bool __nostackcheck get_stack_soft_limits(vaddr_t *start,
+ vaddr_t *end)
+{
+ return get_stack_limits(start, end, false);
+}
+
+static inline bool __nostackcheck get_stack_hard_limits(vaddr_t *start,
+ vaddr_t *end)
+{
+ return get_stack_limits(start, end, true);
+}
bool thread_is_in_normal_mode(void);
diff --git a/core/arch/arm/kernel/spin_lock_debug.c b/core/arch/arm/kernel/spin_lock_debug.c
index f76729a9..4ebe8651 100644
--- a/core/arch/arm/kernel/spin_lock_debug.c
+++ b/core/arch/arm/kernel/spin_lock_debug.c
@@ -4,6 +4,7 @@
*/
#include <assert.h>
+#include <compiler.h>
#include <kernel/spinlock.h>
#include "thread_private.h"
@@ -23,7 +24,7 @@ void spinlock_count_decr(void)
l->locked_count--;
}
-bool have_spinlock(void)
+bool __nostackcheck have_spinlock(void)
{
struct thread_core_local *l;
diff --git a/core/arch/arm/kernel/thread.c b/core/arch/arm/kernel/thread.c
index 3a9030a5..9069e927 100644
--- a/core/arch/arm/kernel/thread.c
+++ b/core/arch/arm/kernel/thread.c
@@ -8,6 +8,7 @@
#include <arm.h>
#include <assert.h>
+#include <config.h>
#include <io.h>
#include <keep.h>
#include <kernel/asan.h>
@@ -33,13 +34,26 @@
#include "thread_private.h"
+struct thread_ctx threads[CFG_NUM_THREADS];
+
+struct thread_core_local thread_core_local[CFG_TEE_CORE_NB_CORE] __nex_bss;
+
+/*
+ * Stacks
+ *
+ * [Lower addresses on the left]
+ *
+ * [ STACK_CANARY_SIZE/2 | STACK_CHECK_EXTRA | STACK_XXX_SIZE | STACK_CANARY_SIZE/2 ]
+ * ^ ^ ^ ^
+ * stack_xxx[n] "hard" top "soft" top bottom
+ */
+
#ifdef CFG_WITH_ARM_TRUSTED_FW
#define STACK_TMP_OFFS 0
#else
#define STACK_TMP_OFFS SM_STACK_TMP_RESERVE_SIZE
#endif
-
#ifdef ARM32
#ifdef CFG_CORE_SANITIZE_KADDRESS
#define STACK_TMP_SIZE (3072 + STACK_TMP_OFFS)
@@ -71,10 +85,6 @@
#endif
#endif /*ARM64*/
-struct thread_ctx threads[CFG_NUM_THREADS];
-
-struct thread_core_local thread_core_local[CFG_TEE_CORE_NB_CORE] __nex_bss;
-
#ifdef CFG_WITH_STACK_CANARIES
#ifdef ARM32
#define STACK_CANARY_SIZE (4 * sizeof(uint32_t))
@@ -91,17 +101,27 @@ struct thread_core_local thread_core_local[CFG_TEE_CORE_NB_CORE] __nex_bss;
#define STACK_CANARY_SIZE 0
#endif
+#ifdef CFG_CORE_DEBUG_CHECK_STACKS
+/*
+ * Extra space added to each stack in order to reliably detect and dump stack
+ * overflows. Should cover the maximum expected overflow size caused by any C
+ * function (say, 512 bytes; no function should have that much local variables),
+ * plus the maximum stack space needed by __cyg_profile_func_exit(): about 1 KB,
+ * a large part of which is used to print the call stack. Total: 1.5 KB.
+ */
+#define STACK_CHECK_EXTRA 1536
+#else
+#define STACK_CHECK_EXTRA 0
+#endif
+
#define DECLARE_STACK(name, num_stacks, stack_size, linkage) \
linkage uint32_t name[num_stacks] \
- [ROUNDUP(stack_size + STACK_CANARY_SIZE, STACK_ALIGNMENT) / \
- sizeof(uint32_t)] \
+ [ROUNDUP(stack_size + STACK_CANARY_SIZE + STACK_CHECK_EXTRA, \
+ STACK_ALIGNMENT) / sizeof(uint32_t)] \
__attribute__((section(".nozi_stack." # name), \
aligned(STACK_ALIGNMENT)))
-#define STACK_SIZE(stack) (sizeof(stack) - STACK_CANARY_SIZE / 2)
-
-#define GET_STACK(stack) \
- ((vaddr_t)(stack) + STACK_SIZE(stack))
+#define GET_STACK(stack) ((vaddr_t)(stack) + STACK_SIZE(stack))
DECLARE_STACK(stack_tmp, CFG_TEE_CORE_NB_CORE, STACK_TMP_SIZE, static);
DECLARE_STACK(stack_abt, CFG_TEE_CORE_NB_CORE, STACK_ABT_SIZE, static);
@@ -109,9 +129,15 @@ DECLARE_STACK(stack_abt, CFG_TEE_CORE_NB_CORE, STACK_ABT_SIZE, static);
DECLARE_STACK(stack_thread, CFG_NUM_THREADS, STACK_THREAD_SIZE, static);
#endif
+#define GET_STACK_TOP_HARD(stack, n) \
+ ((vaddr_t)&(stack)[n] + STACK_CANARY_SIZE / 2)
+#define GET_STACK_TOP_SOFT(stack, n) \
+ (GET_STACK_TOP_HARD(stack, n) + STACK_CHECK_EXTRA)
+#define GET_STACK_BOTTOM(stack, n) ((vaddr_t)&(stack)[n] + sizeof(stack[n]) - \
+ STACK_CANARY_SIZE / 2)
+
const void *stack_tmp_export __section(".identity_map.stack_tmp_export") =
- (uint8_t *)stack_tmp + sizeof(stack_tmp[0]) -
- (STACK_TMP_OFFS + STACK_CANARY_SIZE / 2);
+ (void *)(GET_STACK_BOTTOM(stack_tmp, 0) - STACK_TMP_OFFS);
const uint32_t stack_tmp_stride __section(".identity_map.stack_tmp_stride") =
sizeof(stack_tmp[0]);
@@ -214,14 +240,14 @@ void thread_unlock_global(void)
}
#ifdef ARM32
-uint32_t thread_get_exceptions(void)
+uint32_t __nostackcheck thread_get_exceptions(void)
{
uint32_t cpsr = read_cpsr();
return (cpsr >> CPSR_F_SHIFT) & THREAD_EXCP_ALL;
}
-void thread_set_exceptions(uint32_t exceptions)
+void __nostackcheck thread_set_exceptions(uint32_t exceptions)
{
uint32_t cpsr = read_cpsr();
@@ -236,14 +262,14 @@ void thread_set_exceptions(uint32_t exceptions)
#endif /*ARM32*/
#ifdef ARM64
-uint32_t thread_get_exceptions(void)
+uint32_t __nostackcheck thread_get_exceptions(void)
{
uint32_t daif = read_daif();
return (daif >> DAIF_F_SHIFT) & THREAD_EXCP_ALL;
}
-void thread_set_exceptions(uint32_t exceptions)
+void __nostackcheck thread_set_exceptions(uint32_t exceptions)
{
uint32_t daif = read_daif();
@@ -257,7 +283,7 @@ void thread_set_exceptions(uint32_t exceptions)
}
#endif /*ARM64*/
-uint32_t thread_mask_exceptions(uint32_t exceptions)
+uint32_t __nostackcheck thread_mask_exceptions(uint32_t exceptions)
{
uint32_t state = thread_get_exceptions();
@@ -265,13 +291,14 @@ uint32_t thread_mask_exceptions(uint32_t exceptions)
return state;
}
-void thread_unmask_exceptions(uint32_t state)
+void __nostackcheck thread_unmask_exceptions(uint32_t state)
{
thread_set_exceptions(state & THREAD_EXCP_ALL);
}
-static struct thread_core_local *get_core_local(unsigned int pos)
+static struct thread_core_local * __nostackcheck
+get_core_local(unsigned int pos)
{
/*
* Foreign interrupts must be disabled before playing with core_local
@@ -284,13 +311,95 @@ static struct thread_core_local *get_core_local(unsigned int pos)
return &thread_core_local[pos];
}
-struct thread_core_local *thread_get_core_local(void)
+struct thread_core_local * __nostackcheck thread_get_core_local(void)
{
unsigned int pos = get_core_pos();
return get_core_local(pos);
}
+#ifdef CFG_CORE_DEBUG_CHECK_STACKS
+static void print_stack_limits(void)
+{
+ size_t n = 0;
+ vaddr_t __maybe_unused start = 0;
+ vaddr_t __maybe_unused end = 0;
+
+ for (n = 0; n < CFG_TEE_CORE_NB_CORE; n++) {
+ start = GET_STACK_TOP_SOFT(stack_tmp, n);
+ end = GET_STACK_BOTTOM(stack_tmp, n);
+ DMSG("tmp [%zu] 0x%" PRIxVA "..0x%" PRIxVA, n, start, end);
+ }
+ for (n = 0; n < CFG_TEE_CORE_NB_CORE; n++) {
+ start = GET_STACK_TOP_SOFT(stack_abt, n);
+ end = GET_STACK_BOTTOM(stack_abt, n);
+ DMSG("abt [%zu] 0x%" PRIxVA "..0x%" PRIxVA, n, start, end);
+ }
+ for (n = 0; n < CFG_NUM_THREADS; n++) {
+ end = threads[n].stack_va_end;
+ start = end - STACK_THREAD_SIZE;
+ DMSG("thr [%zu] 0x%" PRIxVA "..0x%" PRIxVA, n, start, end);
+ }
+}
+
+static void check_stack_limits(void)
+{
+ vaddr_t stack_start = 0;
+ vaddr_t stack_end = 0;
+ /* Any value in the current stack frame will do */
+ vaddr_t current_sp = (vaddr_t)&stack_start;
+
+ if (!get_stack_soft_limits(&stack_start, &stack_end))
+ panic("Unknown stack limits");
+ if (current_sp < stack_start || current_sp > stack_end) {
+ DMSG("Stack pointer out of range (0x%" PRIxVA ")", current_sp);
+ print_stack_limits();
+ panic();
+ }
+}
+
+static bool * __nostackcheck get_stackcheck_recursion_flag(void)
+{
+ uint32_t exceptions = thread_mask_exceptions(THREAD_EXCP_FOREIGN_INTR);
+ unsigned int pos = get_core_pos();
+ struct thread_core_local *l = get_core_local(pos);
+ int ct = l->curr_thread;
+ bool *p = NULL;
+
+ if (l->flags & (THREAD_CLF_ABORT | THREAD_CLF_TMP))
+ p = &l->stackcheck_recursion;
+ else if (!l->flags)
+ p = &threads[ct].tsd.stackcheck_recursion;
+
+ thread_unmask_exceptions(exceptions);
+ return p;
+}
+
+void __cyg_profile_func_enter(void *this_fn, void *call_site);
+void __nostackcheck __cyg_profile_func_enter(void *this_fn __unused,
+ void *call_site __unused)
+{
+ bool *p = get_stackcheck_recursion_flag();
+
+ assert(p);
+ if (*p)
+ return;
+ *p = true;
+ check_stack_limits();
+ *p = false;
+}
+
+void __cyg_profile_func_exit(void *this_fn, void *call_site);
+void __nostackcheck __cyg_profile_func_exit(void *this_fn __unused,
+ void *call_site __unused)
+{
+}
+#else
+static void print_stack_limits(void)
+{
+}
+#endif
+
static void thread_lazy_save_ns_vfp(void)
{
#ifdef CFG_WITH_VFP
@@ -396,7 +505,7 @@ void thread_init_boot_thread(void)
threads[0].state = THREAD_STATE_ACTIVE;
}
-void thread_clr_boot_thread(void)
+void __nostackcheck thread_clr_boot_thread(void)
{
struct thread_core_local *l = thread_get_core_local();
@@ -580,7 +689,7 @@ void thread_resume_from_rpc(uint32_t thread_id, uint32_t a0, uint32_t a1,
panic();
}
-void *thread_get_tmp_sp(void)
+void __nostackcheck *thread_get_tmp_sp(void)
{
struct thread_core_local *l = thread_get_core_local();
@@ -621,32 +730,41 @@ size_t thread_stack_size(void)
return STACK_THREAD_SIZE;
}
-void get_stack_limits(vaddr_t *start, vaddr_t *end)
+bool get_stack_limits(vaddr_t *start, vaddr_t *end, bool hard)
{
uint32_t exceptions = thread_mask_exceptions(THREAD_EXCP_FOREIGN_INTR);
unsigned int pos = get_core_pos();
struct thread_core_local *l = get_core_local(pos);
- struct thread_ctx *thr = NULL;
- int ct = -1;
+ int ct = l->curr_thread;
+ bool ret = false;
if (l->flags & THREAD_CLF_TMP) {
- /* We're using the temporary stack for this core */
- *start = (vaddr_t)stack_tmp[pos];
- *end = *start + STACK_TMP_SIZE;
+ if (hard)
+ *start = GET_STACK_TOP_HARD(stack_tmp, pos);
+ else
+ *start = GET_STACK_TOP_SOFT(stack_tmp, pos);
+ *end = GET_STACK_BOTTOM(stack_tmp, pos);
+ ret = true;
} else if (l->flags & THREAD_CLF_ABORT) {
- /* We're using the abort stack for this core */
- *start = (vaddr_t)stack_abt[pos];
- *end = *start + STACK_ABT_SIZE;
+ if (hard)
+ *start = GET_STACK_TOP_HARD(stack_abt, pos);
+ else
+ *start = GET_STACK_TOP_SOFT(stack_abt, pos);
+ *end = GET_STACK_BOTTOM(stack_abt, pos);
+ ret = true;
} else if (!l->flags) {
- /* We're using a thread stack */
- ct = l->curr_thread;
- assert(ct >= 0 && ct < CFG_NUM_THREADS);
- thr = threads + ct;
- *end = thr->stack_va_end;
+ if (ct < 0 || ct >= CFG_NUM_THREADS)
+ goto out;
+
+ *end = threads[ct].stack_va_end;
*start = *end - STACK_THREAD_SIZE;
+ if (!hard)
+ *start += STACK_CHECK_EXTRA;
+ ret = true;
}
-
+out:
thread_unmask_exceptions(exceptions);
+ return ret;
}
bool thread_is_from_abort_mode(void)
@@ -890,7 +1008,7 @@ static void init_thread_stacks(void)
/* Assign the thread stacks */
for (n = 0; n < CFG_NUM_THREADS; n++) {
- if (!thread_init_stack(n, GET_STACK(stack_thread[n])))
+ if (!thread_init_stack(n, GET_STACK_BOTTOM(stack_thread, n)))
panic("thread_init_stack failed");
}
}
@@ -926,6 +1044,7 @@ void thread_init_threads(void)
size_t n = 0;
init_thread_stacks();
+ print_stack_limits();
pgt_init();
mutex_lockdep_init();
@@ -936,7 +1055,7 @@ void thread_init_threads(void)
}
}
-void thread_init_thread_core_local(void)
+void __nostackcheck thread_init_thread_core_local(void)
{
size_t n = 0;
struct thread_core_local *tcl = thread_core_local;
@@ -945,6 +1064,8 @@ void thread_init_thread_core_local(void)
tcl[n].curr_thread = -1;
tcl[n].flags = THREAD_CLF_TMP;
}
+
+ tcl[0].tmp_stack_va_end = GET_STACK_BOTTOM(stack_tmp, 0);
}
void thread_init_primary(void)
@@ -959,7 +1080,7 @@ static void init_sec_mon_stack(size_t pos __maybe_unused)
{
#if !defined(CFG_WITH_ARM_TRUSTED_FW)
/* Initialize secure monitor */
- sm_init(GET_STACK(stack_tmp[pos]));
+ sm_init(GET_STACK_BOTTOM(stack_tmp, pos));
#endif
}
@@ -1049,8 +1170,8 @@ void thread_init_per_cpu(void)
init_sec_mon_stack(pos);
- set_tmp_stack(l, GET_STACK(stack_tmp[pos]) - STACK_TMP_OFFS);
- set_abt_stack(l, GET_STACK(stack_abt[pos]));
+ set_tmp_stack(l, GET_STACK_BOTTOM(stack_tmp, pos) - STACK_TMP_OFFS);
+ set_abt_stack(l, GET_STACK_BOTTOM(stack_abt, pos));
thread_init_vbar(get_excp_vect());
@@ -1069,7 +1190,7 @@ struct thread_specific_data *thread_get_tsd(void)
return &threads[thread_get_id()].tsd;
}
-struct thread_ctx_regs *thread_get_ctx_regs(void)
+struct thread_ctx_regs * __nostackcheck thread_get_ctx_regs(void)
{
struct thread_core_local *l = thread_get_core_local();
diff --git a/core/arch/arm/kernel/unwind_arm32.c b/core/arch/arm/kernel/unwind_arm32.c
index 0d35d003..ae838530 100644
--- a/core/arch/arm/kernel/unwind_arm32.c
+++ b/core/arch/arm/kernel/unwind_arm32.c
@@ -475,7 +475,7 @@ void print_kernel_stack(int level)
*/
state.registers[PC] = (uint32_t)print_kernel_stack + 4;
- get_stack_limits(&stack_start, &stack_end);
+ get_stack_hard_limits(&stack_start, &stack_end);
print_stack_arm32(level, &state, exidx, exidx_sz, stack_start,
stack_end - stack_start);
}
diff --git a/core/arch/arm/kernel/unwind_arm64.c b/core/arch/arm/kernel/unwind_arm64.c
index adb4404b..e0974f8e 100644
--- a/core/arch/arm/kernel/unwind_arm64.c
+++ b/core/arch/arm/kernel/unwind_arm64.c
@@ -129,7 +129,7 @@ void print_kernel_stack(int level)
state.pc = read_pc();
state.fp = read_fp();
- get_stack_limits(&stack_start, &stack_end);
+ get_stack_hard_limits(&stack_start, &stack_end);
print_stack_arm64(level, &state, stack_start, stack_end - stack_start);
}
diff --git a/core/core.mk b/core/core.mk
index 52ce6885..7ac989be 100644
--- a/core/core.mk
+++ b/core/core.mk
@@ -40,6 +40,13 @@ cflags_kasan += -fsanitize=kernel-address \
--param asan-instrumentation-with-call-threshold=0
cflags$(sm) += $(cflags_kasan)
endif
+ifeq ($(CFG_CORE_DEBUG_CHECK_STACKS),y)
+finstrument-functions := $(call cc-option,-finstrument-functions)
+ifeq (,$(finstrument-functions))
+$(error -finstrument-functions not supported)
+endif
+cflags$(sm) += $(finstrument-functions)
+endif
ifeq ($(CFG_SYSCALL_FTRACE),y)
cflags$(sm) += -pg
endif
diff --git a/core/kernel/assert.c b/core/kernel/assert.c
index d8c23483..1d2e3c93 100644
--- a/core/kernel/assert.c
+++ b/core/kernel/assert.c
@@ -10,10 +10,10 @@
/* assert log and break for the optee kernel */
-void _assert_log(const char *expr __maybe_unused,
- const char *file __maybe_unused,
- const int line __maybe_unused,
- const char *func __maybe_unused)
+void __nostackcheck _assert_log(const char *expr __maybe_unused,
+ const char *file __maybe_unused,
+ const int line __maybe_unused,
+ const char *func __maybe_unused)
{
#if defined(CFG_TEE_CORE_DEBUG)
EMSG_RAW("assertion '%s' failed at %s:%d <%s>",