From d691f9e8d4405c334aa10d556e73c8bf44cb0e01 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 4 Jun 2015 10:11:54 -0700 Subject: bpf: allow programs to write to certain skb fields allow programs read/write skb->mark, tc_index fields and ((struct qdisc_skb_cb *)cb)->data. mark and tc_index are generically useful in TC. cb[0]-cb[4] are primarily used to pass arguments from one program to another called via bpf_tail_call() which can be seen in sockex3_kern.c example. All fields of 'struct __sk_buff' are readable to socket and tc_cls_act progs. mark, tc_index are writeable from tc_cls_act only. cb[0]-cb[4] are writeable by both sockets and tc_cls_act. Add verifier tests and improve sample code. Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- net/core/filter.c | 94 ++++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 82 insertions(+), 12 deletions(-) (limited to 'net/core/filter.c') diff --git a/net/core/filter.c b/net/core/filter.c index 36a69e33d76b..d271c06bf01f 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -46,6 +46,7 @@ #include #include #include +#include /** * sk_filter - run a packet through a socket filter @@ -1463,13 +1464,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id) } } -static bool sk_filter_is_valid_access(int off, int size, - enum bpf_access_type type) +static bool __is_valid_access(int off, int size, enum bpf_access_type type) { - /* only read is allowed */ - if (type != BPF_READ) - return false; - /* check bounds */ if (off < 0 || off >= sizeof(struct __sk_buff)) return false; @@ -1485,8 +1481,42 @@ static bool sk_filter_is_valid_access(int off, int size, return true; } -static u32 sk_filter_convert_ctx_access(int dst_reg, int src_reg, int ctx_off, - struct bpf_insn *insn_buf) +static bool sk_filter_is_valid_access(int off, int size, + enum bpf_access_type type) +{ + if (type == BPF_WRITE) { + switch (off) { + case offsetof(struct __sk_buff, cb[0]) ... + offsetof(struct __sk_buff, cb[4]): + break; + default: + return false; + } + } + + return __is_valid_access(off, size, type); +} + +static bool tc_cls_act_is_valid_access(int off, int size, + enum bpf_access_type type) +{ + if (type == BPF_WRITE) { + switch (off) { + case offsetof(struct __sk_buff, mark): + case offsetof(struct __sk_buff, tc_index): + case offsetof(struct __sk_buff, cb[0]) ... + offsetof(struct __sk_buff, cb[4]): + break; + default: + return false; + } + } + return __is_valid_access(off, size, type); +} + +static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg, + int src_reg, int ctx_off, + struct bpf_insn *insn_buf) { struct bpf_insn *insn = insn_buf; @@ -1538,7 +1568,15 @@ static u32 sk_filter_convert_ctx_access(int dst_reg, int src_reg, int ctx_off, break; case offsetof(struct __sk_buff, mark): - return convert_skb_access(SKF_AD_MARK, dst_reg, src_reg, insn); + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4); + + if (type == BPF_WRITE) + *insn++ = BPF_STX_MEM(BPF_W, dst_reg, src_reg, + offsetof(struct sk_buff, mark)); + else + *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg, + offsetof(struct sk_buff, mark)); + break; case offsetof(struct __sk_buff, pkt_type): return convert_skb_access(SKF_AD_PKTTYPE, dst_reg, src_reg, insn); @@ -1553,6 +1591,38 @@ static u32 sk_filter_convert_ctx_access(int dst_reg, int src_reg, int ctx_off, case offsetof(struct __sk_buff, vlan_tci): return convert_skb_access(SKF_AD_VLAN_TAG, dst_reg, src_reg, insn); + + case offsetof(struct __sk_buff, cb[0]) ... + offsetof(struct __sk_buff, cb[4]): + BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, data) < 20); + + ctx_off -= offsetof(struct __sk_buff, cb[0]); + ctx_off += offsetof(struct sk_buff, cb); + ctx_off += offsetof(struct qdisc_skb_cb, data); + if (type == BPF_WRITE) + *insn++ = BPF_STX_MEM(BPF_W, dst_reg, src_reg, ctx_off); + else + *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg, ctx_off); + break; + + case offsetof(struct __sk_buff, tc_index): +#ifdef CONFIG_NET_SCHED + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, tc_index) != 2); + + if (type == BPF_WRITE) + *insn++ = BPF_STX_MEM(BPF_H, dst_reg, src_reg, + offsetof(struct sk_buff, tc_index)); + else + *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg, + offsetof(struct sk_buff, tc_index)); + break; +#else + if (type == BPF_WRITE) + *insn++ = BPF_MOV64_REG(dst_reg, dst_reg); + else + *insn++ = BPF_MOV64_IMM(dst_reg, 0); + break; +#endif } return insn - insn_buf; @@ -1561,13 +1631,13 @@ static u32 sk_filter_convert_ctx_access(int dst_reg, int src_reg, int ctx_off, static const struct bpf_verifier_ops sk_filter_ops = { .get_func_proto = sk_filter_func_proto, .is_valid_access = sk_filter_is_valid_access, - .convert_ctx_access = sk_filter_convert_ctx_access, + .convert_ctx_access = bpf_net_convert_ctx_access, }; static const struct bpf_verifier_ops tc_cls_act_ops = { .get_func_proto = tc_cls_act_func_proto, - .is_valid_access = sk_filter_is_valid_access, - .convert_ctx_access = sk_filter_convert_ctx_access, + .is_valid_access = tc_cls_act_is_valid_access, + .convert_ctx_access = bpf_net_convert_ctx_access, }; static struct bpf_prog_type_list sk_filter_type __read_mostly = { -- cgit v1.2.3