diff options
Diffstat (limited to 'kernel/events/core.c')
-rw-r--r-- | kernel/events/core.c | 180 |
1 files changed, 104 insertions, 76 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c index ea4f3f7a0c6f..f87d54270076 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -397,8 +397,13 @@ static cpumask_var_t perf_online_mask; * 0 - disallow raw tracepoint access for unpriv * 1 - disallow cpu events for unpriv * 2 - disallow kernel profiling for unpriv + * 3 - disallow all unpriv perf event use */ +#ifdef CONFIG_SECURITY_PERF_EVENTS_RESTRICT +int sysctl_perf_event_paranoid __read_mostly = 3; +#else int sysctl_perf_event_paranoid __read_mostly = 2; +#endif /* Minimum for 512 kiB + 1 user control page */ int sysctl_perf_event_mlock __read_mostly = 512 + (PAGE_SIZE / 1024); /* 'free' kiB per user */ @@ -1217,6 +1222,7 @@ static void put_ctx(struct perf_event_context *ctx) * perf_event_context::lock * perf_event::mmap_mutex * mmap_sem + * perf_addr_filters_head::lock */ static struct perf_event_context * perf_event_ctx_lock_nested(struct perf_event *event, int nesting) @@ -2723,7 +2729,7 @@ static int perf_event_stop(struct perf_event *event, int restart) * * (p1) when userspace mappings change as a result of (1) or (2) or (3) below, * we update the addresses of corresponding vmas in - * event::addr_filters_offs array and bump the event::addr_filters_gen; + * event::addr_filter_ranges array and bump the event::addr_filters_gen; * (p2) when an event is scheduled in (pmu::add), it calls * perf_event_addr_filters_sync() which calls pmu::addr_filters_sync() * if the generation has changed since the previous call. @@ -4232,7 +4238,7 @@ static void _free_event(struct perf_event *event) perf_event_free_bpf_prog(event); perf_addr_filters_splice(event, NULL); - kfree(event->addr_filters_offs); + kfree(event->addr_filter_ranges); if (event->destroy) event->destroy(event); @@ -6430,8 +6436,9 @@ static void perf_event_addr_filters_exec(struct perf_event *event, void *data) raw_spin_lock_irqsave(&ifh->lock, flags); list_for_each_entry(filter, &ifh->list, entry) { - if (filter->inode) { - event->addr_filters_offs[count] = 0; + if (filter->path.dentry) { + event->addr_filter_ranges[count].start = 0; + event->addr_filter_ranges[count].size = 0; restart++; } @@ -7097,7 +7104,7 @@ static bool perf_addr_filter_match(struct perf_addr_filter *filter, struct file *file, unsigned long offset, unsigned long size) { - if (filter->inode != file_inode(file)) + if (d_inode(filter->path.dentry) != file_inode(file)) return false; if (filter->offset > offset + size) @@ -7109,28 +7116,47 @@ static bool perf_addr_filter_match(struct perf_addr_filter *filter, return true; } +static bool perf_addr_filter_vma_adjust(struct perf_addr_filter *filter, + struct vm_area_struct *vma, + struct perf_addr_filter_range *fr) +{ + unsigned long vma_size = vma->vm_end - vma->vm_start; + unsigned long off = vma->vm_pgoff << PAGE_SHIFT; + struct file *file = vma->vm_file; + + if (!perf_addr_filter_match(filter, file, off, vma_size)) + return false; + + if (filter->offset < off) { + fr->start = vma->vm_start; + fr->size = min(vma_size, filter->size - (off - filter->offset)); + } else { + fr->start = vma->vm_start + filter->offset - off; + fr->size = min(vma->vm_end - fr->start, filter->size); + } + + return true; +} + static void __perf_addr_filters_adjust(struct perf_event *event, void *data) { struct perf_addr_filters_head *ifh = perf_event_addr_filters(event); struct vm_area_struct *vma = data; - unsigned long off = vma->vm_pgoff << PAGE_SHIFT, flags; - struct file *file = vma->vm_file; struct perf_addr_filter *filter; unsigned int restart = 0, count = 0; + unsigned long flags; if (!has_addr_filter(event)) return; - if (!file) + if (!vma->vm_file) return; raw_spin_lock_irqsave(&ifh->lock, flags); list_for_each_entry(filter, &ifh->list, entry) { - if (perf_addr_filter_match(filter, file, off, - vma->vm_end - vma->vm_start)) { - event->addr_filters_offs[count] = vma->vm_start; + if (perf_addr_filter_vma_adjust(filter, vma, + &event->addr_filter_ranges[count])) restart++; - } count++; } @@ -8011,11 +8037,9 @@ void perf_trace_run_bpf_submit(void *raw_data, int size, int rctx, struct pt_regs *regs, struct hlist_head *head, struct task_struct *task) { - struct bpf_prog *prog = call->prog; - - if (prog) { + if (bpf_prog_array_valid(call)) { *(struct pt_regs **)raw_data = regs; - if (!trace_call_bpf(prog, raw_data) || hlist_empty(head)) { + if (!trace_call_bpf(call, raw_data) || hlist_empty(head)) { perf_swevent_put_recursion_context(rctx); return; } @@ -8205,13 +8229,11 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd) { bool is_kprobe, is_tracepoint, is_syscall_tp; struct bpf_prog *prog; + int ret; if (event->attr.type != PERF_TYPE_TRACEPOINT) return perf_event_set_bpf_handler(event, prog_fd); - if (event->tp_event->prog) - return -EEXIST; - is_kprobe = event->tp_event->flags & TRACE_EVENT_FL_UKPROBE; is_tracepoint = event->tp_event->flags & TRACE_EVENT_FL_TRACEPOINT; is_syscall_tp = is_syscall_trace_event(event->tp_event); @@ -8239,26 +8261,20 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd) return -EACCES; } } - event->tp_event->prog = prog; - event->tp_event->bpf_prog_owner = event; - return 0; + ret = perf_event_attach_bpf_prog(event, prog); + if (ret) + bpf_prog_put(prog); + return ret; } static void perf_event_free_bpf_prog(struct perf_event *event) { - struct bpf_prog *prog; - - perf_event_free_bpf_handler(event); - - if (!event->tp_event) + if (event->attr.type != PERF_TYPE_TRACEPOINT) { + perf_event_free_bpf_handler(event); return; - - prog = event->tp_event->prog; - if (prog && event->tp_event->bpf_prog_owner == event) { - event->tp_event->prog = NULL; - bpf_prog_put(prog); } + perf_event_detach_bpf_prog(event); } #else @@ -8318,8 +8334,7 @@ static void free_filters_list(struct list_head *filters) struct perf_addr_filter *filter, *iter; list_for_each_entry_safe(filter, iter, filters, entry) { - if (filter->inode) - iput(filter->inode); + path_put(&filter->path); list_del(&filter->entry); kfree(filter); } @@ -8357,26 +8372,19 @@ static void perf_addr_filters_splice(struct perf_event *event, * @filter; if so, adjust filter's address range. * Called with mm::mmap_sem down for reading. */ -static unsigned long perf_addr_filter_apply(struct perf_addr_filter *filter, - struct mm_struct *mm) +static void perf_addr_filter_apply(struct perf_addr_filter *filter, + struct mm_struct *mm, + struct perf_addr_filter_range *fr) { struct vm_area_struct *vma; for (vma = mm->mmap; vma; vma = vma->vm_next) { - struct file *file = vma->vm_file; - unsigned long off = vma->vm_pgoff << PAGE_SHIFT; - unsigned long vma_size = vma->vm_end - vma->vm_start; - - if (!file) + if (!vma->vm_file) continue; - if (!perf_addr_filter_match(filter, file, off, vma_size)) - continue; - - return vma->vm_start; + if (perf_addr_filter_vma_adjust(filter, vma, fr)) + return; } - - return 0; } /* @@ -8399,26 +8407,29 @@ static void perf_event_addr_filters_apply(struct perf_event *event) if (task == TASK_TOMBSTONE) return; - if (!ifh->nr_file_filters) - return; - - mm = get_task_mm(event->ctx->task); - if (!mm) - goto restart; + if (ifh->nr_file_filters) { + mm = get_task_mm(event->ctx->task); + if (!mm) + goto restart; - down_read(&mm->mmap_sem); + down_read(&mm->mmap_sem); + } raw_spin_lock_irqsave(&ifh->lock, flags); list_for_each_entry(filter, &ifh->list, entry) { - event->addr_filters_offs[count] = 0; + if (filter->path.dentry) { + /* + * Adjust base offset if the filter is associated to a + * binary that needs to be mapped: + */ + event->addr_filter_ranges[count].start = 0; + event->addr_filter_ranges[count].size = 0; - /* - * Adjust base offset if the filter is associated to a binary - * that needs to be mapped: - */ - if (filter->inode) - event->addr_filters_offs[count] = - perf_addr_filter_apply(filter, mm); + perf_addr_filter_apply(filter, mm, &event->addr_filter_ranges[count]); + } else { + event->addr_filter_ranges[count].start = filter->offset; + event->addr_filter_ranges[count].size = filter->size; + } count++; } @@ -8426,9 +8437,11 @@ static void perf_event_addr_filters_apply(struct perf_event *event) event->addr_filters_gen++; raw_spin_unlock_irqrestore(&ifh->lock, flags); - up_read(&mm->mmap_sem); + if (ifh->nr_file_filters) { + up_read(&mm->mmap_sem); - mmput(mm); + mmput(mm); + } restart: perf_event_stop(event, 1); @@ -8489,7 +8502,6 @@ perf_event_parse_addr_filter(struct perf_event *event, char *fstr, { struct perf_addr_filter *filter = NULL; char *start, *orig, *filename = NULL; - struct path path; substring_t args[MAX_OPT_ARGS]; int state = IF_STATE_ACTION, token; unsigned int kernel = 0; @@ -8593,19 +8605,18 @@ perf_event_parse_addr_filter(struct perf_event *event, char *fstr, goto fail_free_name; /* look up the path and grab its inode */ - ret = kern_path(filename, LOOKUP_FOLLOW, &path); + ret = kern_path(filename, LOOKUP_FOLLOW, + &filter->path); if (ret) goto fail_free_name; - filter->inode = igrab(d_inode(path.dentry)); - path_put(&path); kfree(filename); filename = NULL; ret = -EINVAL; - if (!filter->inode || - !S_ISREG(filter->inode->i_mode)) - /* free_filters_list() will iput() */ + if (!filter->path.dentry || + !S_ISREG(d_inode(filter->path.dentry) + ->i_mode)) goto fail; event->addr_filters.nr_file_filters++; @@ -9648,14 +9659,28 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, goto err_pmu; if (has_addr_filter(event)) { - event->addr_filters_offs = kcalloc(pmu->nr_addr_filters, - sizeof(unsigned long), - GFP_KERNEL); - if (!event->addr_filters_offs) { + event->addr_filter_ranges = kcalloc(pmu->nr_addr_filters, + sizeof(struct perf_addr_filter_range), + GFP_KERNEL); + if (!event->addr_filter_ranges) { err = -ENOMEM; goto err_per_task; } + /* + * Clone the parent's vma offsets: they are valid until exec() + * even if the mm is not shared with the parent. + */ + if (event->parent) { + struct perf_addr_filters_head *ifh = perf_event_addr_filters(event); + + raw_spin_lock_irq(&ifh->lock); + memcpy(event->addr_filter_ranges, + event->parent->addr_filter_ranges, + pmu->nr_addr_filters * sizeof(struct perf_addr_filter_range)); + raw_spin_unlock_irq(&ifh->lock); + } + /* force hw sync on the address filters */ event->addr_filters_gen = 1; } @@ -9674,7 +9699,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, return event; err_addr_filters: - kfree(event->addr_filters_offs); + kfree(event->addr_filter_ranges); err_per_task: exclusive_event_destroy(event); @@ -10002,6 +10027,9 @@ SYSCALL_DEFINE5(perf_event_open, if (flags & ~PERF_FLAG_ALL) return -EINVAL; + if (perf_paranoid_any() && !capable(CAP_SYS_ADMIN)) + return -EACCES; + err = perf_copy_attr(attr_uptr, &attr); if (err) return err; |