aboutsummaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
authorKees Cook <kees.cook@canonical.com>2010-05-25 09:51:25 -0700
committerLeann Ogasawara <leann.ogasawara@canonical.com>2010-06-14 08:51:21 -0700
commit1d5c7285752a9ae7efe4117fd4b2014ba9375b56 (patch)
treef75c7581e08ca4ac65caa6c2c2c512885441ca5a /arch
parent13527fe6f2a3924ce49c7a54e76378ae0343a7f7 (diff)
UBUNTU: SAUCE: x86: implement cs-limit nx-emulation for ia32
OriginalAuthor: Kyle McMartin <kyle@redhat.com>, Dave Jones <djones@redhat.com>, Solar Designer <solar at openwall.com> OriginalLocation: http://cvs.fedoraproject.org/viewvc/devel/kernel/linux-2.6-execshield.patch?view=log Bug: #369978 This is a refresh from version 1.117 as carried by the Fedora Project. Implements NX emulation via CS-limits. It closes a gap in security protections on ia32 kernels without PAE, and for ia32 hardware that lacks the NX feature. Upstream feels this NX emulation is not appropriate for mainline, and as such, RedHat and others have carried it in their kernels for a long time now. Also reference https://blueprints.edge.launchpad.net/ubuntu/+spec/use-pae-when-possible Signed-off-by: Kees Cook <kees.cook@canonical.com> Signed-off-by: Leann Ogasawara <leann.ogasawara@canonical.com>
Diffstat (limited to 'arch')
-rw-r--r--arch/x86/include/asm/desc.h25
-rw-r--r--arch/x86/include/asm/mmu.h7
-rw-r--r--arch/x86/include/asm/paravirt.h6
-rw-r--r--arch/x86/include/asm/paravirt_types.h3
-rw-r--r--arch/x86/include/asm/processor.h3
-rw-r--r--arch/x86/kernel/cpu/common.c14
-rw-r--r--arch/x86/kernel/paravirt.c3
-rw-r--r--arch/x86/kernel/process_32.c48
-rw-r--r--arch/x86/kernel/traps.c131
-rw-r--r--arch/x86/mm/mmap.c5
-rw-r--r--arch/x86/mm/setup_nx.c6
-rw-r--r--arch/x86/mm/tlb.c7
-rw-r--r--arch/x86/vdso/vdso32-setup.c2
-rw-r--r--arch/x86/xen/enlighten.c21
14 files changed, 270 insertions, 11 deletions
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index 617bd56b307..526248dd31e 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -5,6 +5,7 @@
#include <asm/ldt.h>
#include <asm/mmu.h>
#include <linux/smp.h>
+#include <linux/mm_types.h>
static inline void fill_ldt(struct desc_struct *desc,
const struct user_desc *info)
@@ -93,6 +94,9 @@ static inline int desc_empty(const void *ptr)
#define load_TLS(t, cpu) native_load_tls(t, cpu)
#define set_ldt native_set_ldt
+#ifdef CONFIG_X86_32
+#define load_user_cs_desc native_load_user_cs_desc
+#endif /*CONFIG_X86_32*/
#define write_ldt_entry(dt, entry, desc) \
native_write_ldt_entry(dt, entry, desc)
@@ -392,4 +396,25 @@ static inline void set_system_intr_gate_ist(int n, void *addr, unsigned ist)
_set_gate(n, GATE_INTERRUPT, addr, 0x3, ist, __KERNEL_CS);
}
+#ifdef CONFIG_X86_32
+static inline void set_user_cs(struct desc_struct *desc, unsigned long limit)
+{
+ limit = (limit - 1) / PAGE_SIZE;
+ desc->a = limit & 0xffff;
+ desc->b = (limit & 0xf0000) | 0x00c0fb00;
+}
+
+static inline void native_load_user_cs_desc(int cpu, struct mm_struct *mm)
+{
+ get_cpu_gdt_table(cpu)[GDT_ENTRY_DEFAULT_USER_CS] = (mm)->context.user_cs;
+}
+
+#define arch_add_exec_range arch_add_exec_range
+#define arch_remove_exec_range arch_remove_exec_range
+#define arch_flush_exec_range arch_flush_exec_range
+extern void arch_add_exec_range(struct mm_struct *mm, unsigned long limit);
+extern void arch_remove_exec_range(struct mm_struct *mm, unsigned long limit);
+extern void arch_flush_exec_range(struct mm_struct *mm);
+#endif /* CONFIG_X86_32 */
+
#endif /* _ASM_X86_DESC_H */
diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
index 80a1dee5bea..8314c66c7c3 100644
--- a/arch/x86/include/asm/mmu.h
+++ b/arch/x86/include/asm/mmu.h
@@ -7,12 +7,19 @@
/*
* The x86 doesn't have a mmu context, but
* we put the segment information here.
+ *
+ * exec_limit is used to track the range PROT_EXEC
+ * mappings span.
*/
typedef struct {
void *ldt;
int size;
struct mutex lock;
void *vdso;
+#ifdef CONFIG_X86_32
+ struct desc_struct user_cs;
+ unsigned long exec_limit;
+#endif
} mm_context_t;
#ifdef CONFIG_SMP
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 5653f43d90e..55dadb2a61b 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -289,6 +289,12 @@ static inline void set_ldt(const void *addr, unsigned entries)
{
PVOP_VCALL2(pv_cpu_ops.set_ldt, addr, entries);
}
+#ifdef CONFIG_X86_32
+static inline void load_user_cs_desc(unsigned int cpu, struct mm_struct *mm)
+{
+ PVOP_VCALL2(pv_cpu_ops.load_user_cs_desc, cpu, mm);
+}
+#endif /*CONFIG_X86_32*/
static inline void store_gdt(struct desc_ptr *dtr)
{
PVOP_VCALL1(pv_cpu_ops.store_gdt, dtr);
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index db9ef553234..19c2793fb3c 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -118,6 +118,9 @@ struct pv_cpu_ops {
void (*store_gdt)(struct desc_ptr *);
void (*store_idt)(struct desc_ptr *);
void (*set_ldt)(const void *desc, unsigned entries);
+#ifdef CONFIG_X86_32
+ void (*load_user_cs_desc)(int cpu, struct mm_struct *mm);
+#endif
unsigned long (*store_tr)(void);
void (*load_tls)(struct thread_struct *t, unsigned int cpu);
#ifdef CONFIG_X86_64
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 7e5c6a60b8e..9de0cba46d9 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -158,6 +158,9 @@ static inline int hlt_works(int cpu)
#define cache_line_size() (boot_cpu_data.x86_cache_alignment)
+#define __HAVE_ARCH_ALIGN_STACK
+extern unsigned long arch_align_stack(unsigned long sp);
+
extern void cpu_detect(struct cpuinfo_x86 *c);
extern struct pt_regs *idle_regs(struct pt_regs *);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 68e4a6f2211..40f5bb7e8a4 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -802,6 +802,20 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
/* Filter out anything that depends on CPUID levels we don't have */
filter_cpuid_features(c, true);
+ /*
+ * emulation of NX with segment limits unfortunately means
+ * we have to disable the fast system calls, due to the way that
+ * sysexit clears the segment limits on return.
+ * If we have either disabled exec-shield on the boot command line,
+ * or we have NX, then we don't need to do this.
+ */
+ if (exec_shield != 0) {
+#ifdef CONFIG_X86_PAE
+ if (!test_cpu_cap(c, X86_FEATURE_NX))
+#endif
+ clear_cpu_cap(c, X86_FEATURE_SEP);
+ }
+
/* If the model name is still unset, do table lookup. */
if (!c->x86_model_id[0]) {
const char *p;
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 1db183ed7c0..238b97d2415 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -345,6 +345,9 @@ struct pv_cpu_ops pv_cpu_ops = {
.read_tscp = native_read_tscp,
.load_tr_desc = native_load_tr_desc,
.set_ldt = native_set_ldt,
+#ifdef CONFIG_X86_32
+ .load_user_cs_desc = native_load_user_cs_desc,
+#endif /*CONFIG_X86_32*/
.load_gdt = native_load_gdt,
.load_idt = native_load_idt,
.store_gdt = native_store_gdt,
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 8d128783af4..fde71dfe9cd 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -243,7 +243,10 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
void
start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
{
+ int cpu;
+
set_user_gs(regs, 0);
+
regs->fs = 0;
set_fs(USER_DS);
regs->ds = __USER_DS;
@@ -252,6 +255,11 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
regs->cs = __USER_CS;
regs->ip = new_ip;
regs->sp = new_sp;
+
+ cpu = get_cpu();
+ load_user_cs_desc(cpu, current->mm);
+ put_cpu();
+
/*
* Free the old FP and other extended state
*/
@@ -311,6 +319,9 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
if (preload_fpu)
prefetch(next->fpu.state);
+ if (next_p->mm)
+ load_user_cs_desc(cpu, next_p->mm);
+
/*
* Reload esp0.
*/
@@ -404,3 +415,40 @@ unsigned long get_wchan(struct task_struct *p)
return 0;
}
+static void modify_cs(struct mm_struct *mm, unsigned long limit)
+{
+ mm->context.exec_limit = limit;
+ set_user_cs(&mm->context.user_cs, limit);
+ if (mm == current->mm) {
+ int cpu;
+
+ cpu = get_cpu();
+ load_user_cs_desc(cpu, mm);
+ put_cpu();
+ }
+}
+
+void arch_add_exec_range(struct mm_struct *mm, unsigned long limit)
+{
+ if (limit > mm->context.exec_limit)
+ modify_cs(mm, limit);
+}
+
+void arch_remove_exec_range(struct mm_struct *mm, unsigned long old_end)
+{
+ struct vm_area_struct *vma;
+ unsigned long limit = PAGE_SIZE;
+
+ if (old_end == mm->context.exec_limit) {
+ for (vma = mm->mmap; vma; vma = vma->vm_next)
+ if ((vma->vm_flags & VM_EXEC) && (vma->vm_end > limit))
+ limit = vma->vm_end;
+ modify_cs(mm, limit);
+ }
+}
+
+void arch_flush_exec_range(struct mm_struct *mm)
+{
+ mm->context.exec_limit = 0;
+ set_user_cs(&mm->context.user_cs, 0);
+}
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 142d70c74b0..82a48b87d82 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -109,6 +109,78 @@ static inline void preempt_conditional_cli(struct pt_regs *regs)
dec_preempt_count();
}
+#ifdef CONFIG_X86_32
+static inline int
+__compare_user_cs_desc(const struct desc_struct *desc1,
+ const struct desc_struct *desc2)
+{
+ return ((desc1->limit0 != desc2->limit0) ||
+ (desc1->limit != desc2->limit) ||
+ (desc1->base0 != desc2->base0) ||
+ (desc1->base1 != desc2->base1) ||
+ (desc1->base2 != desc2->base2));
+}
+
+/*
+ * lazy-check for CS validity on exec-shield binaries:
+ *
+ * the original non-exec stack patch was written by
+ * Solar Designer <solar at openwall.com>. Thanks!
+ */
+static int
+check_lazy_exec_limit(int cpu, struct pt_regs *regs, long error_code)
+{
+ struct desc_struct *desc1, *desc2;
+ struct vm_area_struct *vma;
+ unsigned long limit;
+
+ if (current->mm == NULL)
+ return 0;
+
+ limit = -1UL;
+ if (current->mm->context.exec_limit != -1UL) {
+ limit = PAGE_SIZE;
+ spin_lock(&current->mm->page_table_lock);
+ for (vma = current->mm->mmap; vma; vma = vma->vm_next)
+ if ((vma->vm_flags & VM_EXEC) && (vma->vm_end > limit))
+ limit = vma->vm_end;
+ vma = get_gate_vma(current);
+ if (vma && (vma->vm_flags & VM_EXEC) && (vma->vm_end > limit))
+ limit = vma->vm_end;
+ spin_unlock(&current->mm->page_table_lock);
+ if (limit >= TASK_SIZE)
+ limit = -1UL;
+ current->mm->context.exec_limit = limit;
+ }
+ set_user_cs(&current->mm->context.user_cs, limit);
+
+ desc1 = &current->mm->context.user_cs;
+ desc2 = get_cpu_gdt_table(cpu) + GDT_ENTRY_DEFAULT_USER_CS;
+
+ if (__compare_user_cs_desc(desc1, desc2)) {
+ /*
+ * The CS was not in sync - reload it and retry the
+ * instruction. If the instruction still faults then
+ * we won't hit this branch next time around.
+ */
+ if (print_fatal_signals >= 2) {
+ printk(KERN_ERR "#GPF fixup (%ld[seg:%lx]) at %08lx, CPU#%d.\n",
+ error_code, error_code/8, regs->ip,
+ smp_processor_id());
+ printk(KERN_ERR "exec_limit: %08lx, user_cs: %08x/%08x, CPU_cs: %08x/%08x.\n",
+ current->mm->context.exec_limit,
+ desc1->a, desc1->b, desc2->a, desc2->b);
+ }
+
+ load_user_cs_desc(cpu, current->mm);
+
+ return 1;
+ }
+
+ return 0;
+}
+#endif
+
static void __kprobes
do_trap(int trapnr, int signr, char *str, struct pt_regs *regs,
long error_code, siginfo_t *info)
@@ -265,6 +337,29 @@ do_general_protection(struct pt_regs *regs, long error_code)
if (!user_mode(regs))
goto gp_in_kernel;
+#ifdef CONFIG_X86_32
+{
+ int cpu;
+ int ok;
+
+ cpu = get_cpu();
+ ok = check_lazy_exec_limit(cpu, regs, error_code);
+ put_cpu();
+
+ if (ok)
+ return;
+
+ if (print_fatal_signals) {
+ printk(KERN_ERR "#GPF(%ld[seg:%lx]) at %08lx, CPU#%d.\n",
+ error_code, error_code/8, regs->ip, smp_processor_id());
+ printk(KERN_ERR "exec_limit: %08lx, user_cs: %08x/%08x.\n",
+ current->mm->context.exec_limit,
+ current->mm->context.user_cs.a,
+ current->mm->context.user_cs.b);
+ }
+}
+#endif /*CONFIG_X86_32*/
+
tsk->thread.error_code = error_code;
tsk->thread.trap_no = 13;
@@ -792,19 +887,37 @@ do_device_not_available(struct pt_regs *regs, long error_code)
}
#ifdef CONFIG_X86_32
+/*
+ * The fixup code for errors in iret jumps to here (iret_exc). It loses
+ * the original trap number and erorr code. The bogus trap 32 and error
+ * code 0 are what the vanilla kernel delivers via:
+ * DO_ERROR_INFO(32, SIGSEGV, "iret exception", iret_error, ILL_BADSTK, 0, 1)
+ *
+ * NOTE: Because of the final "1" in the macro we need to enable interrupts.
+ *
+ * In case of a general protection fault in the iret instruction, we
+ * need to check for a lazy CS update for exec-shield.
+ */
dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
{
- siginfo_t info;
+ int ok;
+ int cpu;
+
local_irq_enable();
- info.si_signo = SIGILL;
- info.si_errno = 0;
- info.si_code = ILL_BADSTK;
- info.si_addr = NULL;
- if (notify_die(DIE_TRAP, "iret exception",
- regs, error_code, 32, SIGILL) == NOTIFY_STOP)
- return;
- do_trap(32, SIGILL, "iret exception", regs, error_code, &info);
+ cpu = get_cpu();
+ ok = check_lazy_exec_limit(cpu, regs, error_code);
+ put_cpu();
+
+ if (!ok && notify_die(DIE_TRAP, "iret exception", regs,
+ error_code, 32, SIGSEGV) != NOTIFY_STOP) {
+ siginfo_t info;
+ info.si_signo = SIGSEGV;
+ info.si_errno = 0;
+ info.si_code = ILL_BADSTK;
+ info.si_addr = 0;
+ do_trap(32, SIGSEGV, "iret exception", regs, error_code, &info);
+ }
}
#endif
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index 1dab5194fd9..360f39d585f 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -124,13 +124,16 @@ static unsigned long mmap_legacy_base(void)
*/
void arch_pick_mmap_layout(struct mm_struct *mm)
{
- if (mmap_is_legacy()) {
+ if (!(2 & exec_shield) && mmap_is_legacy()) {
mm->mmap_base = mmap_legacy_base();
mm->get_unmapped_area = arch_get_unmapped_area;
mm->unmap_area = arch_unmap_area;
} else {
mm->mmap_base = mmap_base();
mm->get_unmapped_area = arch_get_unmapped_area_topdown;
+ if (!(current->personality & READ_IMPLIES_EXEC)
+ && mmap_is_ia32())
+ mm->get_unmapped_exec_area = arch_get_unmapped_exec_area;
mm->unmap_area = arch_unmap_area_topdown;
}
}
diff --git a/arch/x86/mm/setup_nx.c b/arch/x86/mm/setup_nx.c
index a3250aa3408..e0d9cce0338 100644
--- a/arch/x86/mm/setup_nx.c
+++ b/arch/x86/mm/setup_nx.c
@@ -1,3 +1,4 @@
+#include <linux/sched.h>
#include <linux/spinlock.h>
#include <linux/errno.h>
#include <linux/init.h>
@@ -23,6 +24,7 @@ static int __init noexec_setup(char *str)
disable_nx = 0;
} else if (!strncmp(str, "off", 3)) {
disable_nx = 1;
+ exec_shield = 0;
}
x86_configure_nx();
return 0;
@@ -40,6 +42,10 @@ void __cpuinit x86_configure_nx(void)
void __init x86_report_nx(void)
{
if (!cpu_has_nx) {
+ if (exec_shield)
+ printk(KERN_INFO "Using x86 segment limits to approximate NX protection\n");
+ else
+
printk(KERN_NOTICE "Notice: NX (Execute Disable) protection "
"missing in CPU or disabled in BIOS!\n");
} else {
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 426f3a1a64d..e0286b18d49 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -6,6 +6,7 @@
#include <linux/interrupt.h>
#include <linux/module.h>
+#include <asm/desc.h>
#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
#include <asm/cache.h>
@@ -131,6 +132,12 @@ void smp_invalidate_interrupt(struct pt_regs *regs)
union smp_flush_state *f;
cpu = smp_processor_id();
+
+#ifdef CONFIG_X86_32
+ if (current->active_mm)
+ load_user_cs_desc(cpu, current->active_mm);
+#endif
+
/*
* orig_rax contains the negated interrupt vector.
* Use that to determine where the sender put the data.
diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c
index 02b442e9200..957bb679807 100644
--- a/arch/x86/vdso/vdso32-setup.c
+++ b/arch/x86/vdso/vdso32-setup.c
@@ -331,7 +331,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
if (compat)
addr = VDSO_HIGH_BASE;
else {
- addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0);
+ addr = get_unmapped_area_prot(NULL, 0, PAGE_SIZE, 0, 0, 1);
if (IS_ERR_VALUE(addr)) {
ret = addr;
goto up_fail;
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 65d8d79b46a..1ea06f842a9 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -335,6 +335,24 @@ static void xen_set_ldt(const void *addr, unsigned entries)
xen_mc_issue(PARAVIRT_LAZY_CPU);
}
+#ifdef CONFIG_X86_32
+static void xen_load_user_cs_desc(int cpu, struct mm_struct *mm)
+{
+ void *gdt;
+ xmaddr_t mgdt;
+ u64 descriptor;
+ struct desc_struct user_cs;
+
+ gdt = &get_cpu_gdt_table(cpu)[GDT_ENTRY_DEFAULT_USER_CS];
+ mgdt = virt_to_machine(gdt);
+
+ user_cs = mm->context.user_cs;
+ descriptor = (u64) user_cs.a | ((u64) user_cs.b) << 32;
+
+ HYPERVISOR_update_descriptor(mgdt.maddr, descriptor);
+}
+#endif /*CONFIG_X86_32*/
+
static void xen_load_gdt(const struct desc_ptr *dtr)
{
unsigned long va = dtr->address;
@@ -961,6 +979,9 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = {
.load_tr_desc = paravirt_nop,
.set_ldt = xen_set_ldt,
+#ifdef CONFIG_X86_32
+ .load_user_cs_desc = xen_load_user_cs_desc,
+#endif /*CONFIG_X86_32*/
.load_gdt = xen_load_gdt,
.load_idt = xen_load_idt,
.load_tls = xen_load_tls,