aboutsummaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-05-07 22:03:58 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2019-05-07 22:03:58 -0700
commit80f232121b69cc69a31ccb2b38c1665d770b0710 (patch)
tree106263eac4ff03b899df695e00dd11e593e74fe2 /include
parent82efe439599439a5e1e225ce5740e6cfb777a7dd (diff)
parenta9e41a529681b38087c91ebc0bb91e12f510ca2d (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller: "Highlights: 1) Support AES128-CCM ciphers in kTLS, from Vakul Garg. 2) Add fib_sync_mem to control the amount of dirty memory we allow to queue up between synchronize RCU calls, from David Ahern. 3) Make flow classifier more lockless, from Vlad Buslov. 4) Add PHY downshift support to aquantia driver, from Heiner Kallweit. 5) Add SKB cache for TCP rx and tx, from Eric Dumazet. This reduces contention on SLAB spinlocks in heavy RPC workloads. 6) Partial GSO offload support in XFRM, from Boris Pismenny. 7) Add fast link down support to ethtool, from Heiner Kallweit. 8) Use siphash for IP ID generator, from Eric Dumazet. 9) Pull nexthops even further out from ipv4/ipv6 routes and FIB entries, from David Ahern. 10) Move skb->xmit_more into a per-cpu variable, from Florian Westphal. 11) Improve eBPF verifier speed and increase maximum program size, from Alexei Starovoitov. 12) Eliminate per-bucket spinlocks in rhashtable, and instead use bit spinlocks. From Neil Brown. 13) Allow tunneling with GUE encap in ipvs, from Jacky Hu. 14) Improve link partner cap detection in generic PHY code, from Heiner Kallweit. 15) Add layer 2 encap support to bpf_skb_adjust_room(), from Alan Maguire. 16) Remove SKB list implementation assumptions in SCTP, your's truly. 17) Various cleanups, optimizations, and simplifications in r8169 driver. From Heiner Kallweit. 18) Add memory accounting on TX and RX path of SCTP, from Xin Long. 19) Switch PHY drivers over to use dynamic featue detection, from Heiner Kallweit. 20) Support flow steering without masking in dpaa2-eth, from Ioana Ciocoi. 21) Implement ndo_get_devlink_port in netdevsim driver, from Jiri Pirko. 22) Increase the strict parsing of current and future netlink attributes, also export such policies to userspace. From Johannes Berg. 23) Allow DSA tag drivers to be modular, from Andrew Lunn. 24) Remove legacy DSA probing support, also from Andrew Lunn. 25) Allow ll_temac driver to be used on non-x86 platforms, from Esben Haabendal. 26) Add a generic tracepoint for TX queue timeouts to ease debugging, from Cong Wang. 27) More indirect call optimizations, from Paolo Abeni" * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1763 commits) cxgb4: Fix error path in cxgb4_init_module net: phy: improve pause mode reporting in phy_print_status dt-bindings: net: Fix a typo in the phy-mode list for ethernet bindings net: macb: Change interrupt and napi enable order in open net: ll_temac: Improve error message on error IRQ net/sched: remove block pointer from common offload structure net: ethernet: support of_get_mac_address new ERR_PTR error net: usb: smsc: fix warning reported by kbuild test robot staging: octeon-ethernet: Fix of_get_mac_address ERR_PTR check net: dsa: support of_get_mac_address new ERR_PTR error net: dsa: sja1105: Fix status initialization in sja1105_get_ethtool_stats vrf: sit mtu should not be updated when vrf netdev is the link net: dsa: Fix error cleanup path in dsa_init_module l2tp: Fix possible NULL pointer dereference taprio: add null check on sched_nest to avoid potential null pointer dereference net: mvpp2: cls: fix less than zero check on a u32 variable net_sched: sch_fq: handle non connected flows net_sched: sch_fq: do not assume EDT packets are ordered net: hns3: use devm_kcalloc when allocating desc_cb net: hns3: some cleanup for struct hns3_enet_ring ...
Diffstat (limited to 'include')
-rw-r--r--include/linux/bpf-cgroup.h21
-rw-r--r--include/linux/bpf.h84
-rw-r--r--include/linux/bpf_types.h3
-rw-r--r--include/linux/bpf_verifier.h32
-rw-r--r--include/linux/btf.h1
-rw-r--r--include/linux/dsa/8021q.h76
-rw-r--r--include/linux/dsa/sja1105.h40
-rw-r--r--include/linux/etherdevice.h2
-rw-r--r--include/linux/filter.h16
-rw-r--r--include/linux/genl_magic_func.h4
-rw-r--r--include/linux/ieee80211.h14
-rw-r--r--include/linux/if_bridge.h3
-rw-r--r--include/linux/inetdevice.h14
-rw-r--r--include/linux/jiffies.h1
-rw-r--r--include/linux/marvell_phy.h1
-rw-r--r--include/linux/mdio.h3
-rw-r--r--include/linux/mlx5/cq.h2
-rw-r--r--include/linux/mlx5/device.h11
-rw-r--r--include/linux/mlx5/doorbell.h39
-rw-r--r--include/linux/mlx5/driver.h13
-rw-r--r--include/linux/mlx5/eswitch.h2
-rw-r--r--include/linux/mlx5/fs.h7
-rw-r--r--include/linux/mlx5/mlx5_ifc.h183
-rw-r--r--include/linux/mlx5/port.h1
-rw-r--r--include/linux/mlx5/qp.h1
-rw-r--r--include/linux/mlx5/transobj.h3
-rw-r--r--include/linux/mlx5/vport.h4
-rw-r--r--include/linux/mmc/sdio_func.h12
-rw-r--r--include/linux/mmc/sdio_ids.h2
-rw-r--r--include/linux/net.h2
-rw-r--r--include/linux/netdevice.h95
-rw-r--r--include/linux/netfilter.h18
-rw-r--r--include/linux/netfilter/ipset/ip_set.h11
-rw-r--r--include/linux/netfilter/nfnetlink_osf.h11
-rw-r--r--include/linux/netfilter/x_tables.h1
-rw-r--r--include/linux/netfilter_ipv6.h15
-rw-r--r--include/linux/packing.h49
-rw-r--r--include/linux/phy.h14
-rw-r--r--include/linux/platform_data/macb.h9
-rw-r--r--include/linux/platform_data/mv88e6xxx.h1
-rw-r--r--include/linux/platform_data/xilinx-ll-temac.h32
-rw-r--r--include/linux/rhashtable-types.h2
-rw-r--r--include/linux/rhashtable.h358
-rw-r--r--include/linux/siphash.h5
-rw-r--r--include/linux/skbuff.h48
-rw-r--r--include/linux/tracepoint-defs.h1
-rw-r--r--include/net/addrconf.h55
-rw-r--r--include/net/arp.h8
-rw-r--r--include/net/bluetooth/hci.h1
-rw-r--r--include/net/bluetooth/hci_core.h3
-rw-r--r--include/net/bpf_sk_storage.h13
-rw-r--r--include/net/cfg80211.h104
-rw-r--r--include/net/compat.h3
-rw-r--r--include/net/devlink.h534
-rw-r--r--include/net/dsa.h223
-rw-r--r--include/net/dst.h11
-rw-r--r--include/net/fib_notifier.h3
-rw-r--r--include/net/flow_dissector.h7
-rw-r--r--include/net/flow_offload.h23
-rw-r--r--include/net/fq_impl.h18
-rw-r--r--include/net/genetlink.h34
-rw-r--r--include/net/geneve.h2
-rw-r--r--include/net/ife.h1
-rw-r--r--include/net/ip.h4
-rw-r--r--include/net/ip6_fib.h62
-rw-r--r--include/net/ip6_route.h20
-rw-r--r--include/net/ip_fib.h124
-rw-r--r--include/net/ip_vs.h5
-rw-r--r--include/net/ipv6_frag.h1
-rw-r--r--include/net/ipv6_stubs.h68
-rw-r--r--include/net/lwtunnel.h7
-rw-r--r--include/net/mac80211.h30
-rw-r--r--include/net/ndisc.h40
-rw-r--r--include/net/neighbour.h7
-rw-r--r--include/net/netfilter/ipv4/nf_nat_masquerade.h15
-rw-r--r--include/net/netfilter/ipv6/nf_nat_masquerade.h11
-rw-r--r--include/net/netfilter/nf_conntrack_expect.h2
-rw-r--r--include/net/netfilter/nf_conntrack_helper.h24
-rw-r--r--include/net/netfilter/nf_conntrack_timeout.h15
-rw-r--r--include/net/netfilter/nf_nat.h7
-rw-r--r--include/net/netfilter/nf_nat_masquerade.h19
-rw-r--r--include/net/netfilter/nf_queue.h3
-rw-r--r--include/net/netfilter/nf_tables.h24
-rw-r--r--include/net/netlink.h372
-rw-r--r--include/net/netns/conntrack.h6
-rw-r--r--include/net/netns/ipv4.h2
-rw-r--r--include/net/netns/ipv6.h5
-rw-r--r--include/net/pkt_cls.h33
-rw-r--r--include/net/psample.h1
-rw-r--r--include/net/request_sock.h10
-rw-r--r--include/net/route.h43
-rw-r--r--include/net/rtnh.h (renamed from include/net/nexthop.h)4
-rw-r--r--include/net/sch_generic.h100
-rw-r--r--include/net/sctp/sctp.h2
-rw-r--r--include/net/sctp/ulpqueue.h2
-rw-r--r--include/net/sock.h27
-rw-r--r--include/net/tc_act/tc_ife.h3
-rw-r--r--include/net/tc_act/tc_police.h70
-rw-r--r--include/net/tcp.h2
-rw-r--r--include/net/tls.h36
-rw-r--r--include/net/udp.h6
-rw-r--r--include/net/udp_tunnel.h2
-rw-r--r--include/net/vxlan.h2
-rw-r--r--include/net/xfrm.h116
-rw-r--r--include/trace/bpf_probe.h27
-rw-r--r--include/trace/events/bpf_test_run.h50
-rw-r--r--include/trace/events/fib.h44
-rw-r--r--include/trace/events/fib6.h16
-rw-r--r--include/trace/events/mlxsw.h2
-rw-r--r--include/trace/events/nbd.h107
-rw-r--r--include/trace/events/net.h23
-rw-r--r--include/uapi/asm-generic/sockios.h4
-rw-r--r--include/uapi/linux/batadv_packet.h12
-rw-r--r--include/uapi/linux/batman_adv.h25
-rw-r--r--include/uapi/linux/bpf.h291
-rw-r--r--include/uapi/linux/btf.h32
-rw-r--r--include/uapi/linux/ethtool.h11
-rw-r--r--include/uapi/linux/fou.h6
-rw-r--r--include/uapi/linux/icmpv6.h4
-rw-r--r--include/uapi/linux/if_ether.h1
-rw-r--r--include/uapi/linux/if_tun.h1
-rw-r--r--include/uapi/linux/if_vlan.h9
-rw-r--r--include/uapi/linux/ip_vs.h11
-rw-r--r--include/uapi/linux/netfilter/nf_tables.h8
-rw-r--r--include/uapi/linux/nl80211.h86
-rw-r--r--include/uapi/linux/openvswitch.h46
-rw-r--r--include/uapi/linux/pkt_sched.h13
-rw-r--r--include/uapi/linux/sockios.h21
-rw-r--r--include/uapi/linux/tcp.h27
-rw-r--r--include/uapi/linux/tipc.h1
-rw-r--r--include/uapi/linux/tipc_netlink.h2
-rw-r--r--include/uapi/linux/tls.h15
132 files changed, 3203 insertions, 1334 deletions
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index a4c644c1c091..cb3c6b3b89c8 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -17,6 +17,8 @@ struct bpf_map;
struct bpf_prog;
struct bpf_sock_ops_kern;
struct bpf_cgroup_storage;
+struct ctl_table;
+struct ctl_table_header;
#ifdef CONFIG_CGROUP_BPF
@@ -109,6 +111,12 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
short access, enum bpf_attach_type type);
+int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
+ struct ctl_table *table, int write,
+ void __user *buf, size_t *pcount,
+ loff_t *ppos, void **new_buf,
+ enum bpf_attach_type type);
+
static inline enum bpf_cgroup_storage_type cgroup_storage_type(
struct bpf_map *map)
{
@@ -253,6 +261,18 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
\
__ret; \
})
+
+
+#define BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write, buf, count, pos, nbuf) \
+({ \
+ int __ret = 0; \
+ if (cgroup_bpf_enabled) \
+ __ret = __cgroup_bpf_run_filter_sysctl(head, table, write, \
+ buf, count, pos, nbuf, \
+ BPF_CGROUP_SYSCTL); \
+ __ret; \
+})
+
int cgroup_bpf_prog_attach(const union bpf_attr *attr,
enum bpf_prog_type ptype, struct bpf_prog *prog);
int cgroup_bpf_prog_detach(const union bpf_attr *attr,
@@ -321,6 +341,7 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map,
#define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, t_ctx) ({ 0; })
#define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; })
#define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type,major,minor,access) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_SYSCTL(head,table,write,buf,count,pos,nbuf) ({ 0; })
#define for_each_cgroup_storage_type(stype) for (; false; )
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 944ccc310201..59631dd0777c 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -57,6 +57,12 @@ struct bpf_map_ops {
const struct btf *btf,
const struct btf_type *key_type,
const struct btf_type *value_type);
+
+ /* Direct value access helpers. */
+ int (*map_direct_value_addr)(const struct bpf_map *map,
+ u64 *imm, u32 off);
+ int (*map_direct_value_meta)(const struct bpf_map *map,
+ u64 imm, u32 *off);
};
struct bpf_map {
@@ -81,7 +87,8 @@ struct bpf_map {
struct btf *btf;
u32 pages;
bool unpriv_array;
- /* 51 bytes hole */
+ bool frozen; /* write-once */
+ /* 48 bytes hole */
/* The 3rd and 4th cacheline with misc members to avoid false sharing
* particularly with refcounting.
@@ -177,6 +184,7 @@ enum bpf_arg_type {
ARG_PTR_TO_MAP_KEY, /* pointer to stack used as map key */
ARG_PTR_TO_MAP_VALUE, /* pointer to stack used as map value */
ARG_PTR_TO_UNINIT_MAP_VALUE, /* pointer to valid memory used to store a map value */
+ ARG_PTR_TO_MAP_VALUE_OR_NULL, /* pointer to stack used as map value or NULL */
/* the following constraints used to prototype bpf_memcmp() and other
* functions that access data on eBPF program stack
@@ -195,6 +203,9 @@ enum bpf_arg_type {
ARG_ANYTHING, /* any (initialized) argument is ok */
ARG_PTR_TO_SPIN_LOCK, /* pointer to bpf_spin_lock */
ARG_PTR_TO_SOCK_COMMON, /* pointer to sock_common */
+ ARG_PTR_TO_INT, /* pointer to int */
+ ARG_PTR_TO_LONG, /* pointer to long */
+ ARG_PTR_TO_SOCKET, /* pointer to bpf_sock (fullsock) */
};
/* type of values returned from helper functions */
@@ -205,6 +216,7 @@ enum bpf_return_type {
RET_PTR_TO_MAP_VALUE_OR_NULL, /* returns a pointer to map elem value or NULL */
RET_PTR_TO_SOCKET_OR_NULL, /* returns a pointer to a socket or NULL */
RET_PTR_TO_TCP_SOCK_OR_NULL, /* returns a pointer to a tcp_sock or NULL */
+ RET_PTR_TO_SOCK_COMMON_OR_NULL, /* returns a pointer to a sock_common or NULL */
};
/* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs
@@ -262,6 +274,7 @@ enum bpf_reg_type {
PTR_TO_SOCK_COMMON_OR_NULL, /* reg points to sock_common or NULL */
PTR_TO_TCP_SOCK, /* reg points to struct tcp_sock */
PTR_TO_TCP_SOCK_OR_NULL, /* reg points to struct tcp_sock or NULL */
+ PTR_TO_TP_BUFFER, /* reg points to a writable raw tp's buffer */
};
/* The information passed from prog-specific *_is_valid_access
@@ -351,6 +364,7 @@ struct bpf_prog_aux {
u32 used_map_cnt;
u32 max_ctx_offset;
u32 max_pkt_offset;
+ u32 max_tp_access;
u32 stack_depth;
u32 id;
u32 func_cnt; /* used by non-func prog as the number of func progs */
@@ -420,8 +434,38 @@ struct bpf_array {
};
};
+#define BPF_COMPLEXITY_LIMIT_INSNS 1000000 /* yes. 1M insns */
#define MAX_TAIL_CALL_CNT 32
+#define BPF_F_ACCESS_MASK (BPF_F_RDONLY | \
+ BPF_F_RDONLY_PROG | \
+ BPF_F_WRONLY | \
+ BPF_F_WRONLY_PROG)
+
+#define BPF_MAP_CAN_READ BIT(0)
+#define BPF_MAP_CAN_WRITE BIT(1)
+
+static inline u32 bpf_map_flags_to_cap(struct bpf_map *map)
+{
+ u32 access_flags = map->map_flags & (BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG);
+
+ /* Combination of BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG is
+ * not possible.
+ */
+ if (access_flags & BPF_F_RDONLY_PROG)
+ return BPF_MAP_CAN_READ;
+ else if (access_flags & BPF_F_WRONLY_PROG)
+ return BPF_MAP_CAN_WRITE;
+ else
+ return BPF_MAP_CAN_READ | BPF_MAP_CAN_WRITE;
+}
+
+static inline bool bpf_map_flags_access_ok(u32 access_flags)
+{
+ return (access_flags & (BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG)) !=
+ (BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG);
+}
+
struct bpf_event_entry {
struct perf_event *event;
struct file *perf_file;
@@ -445,14 +489,6 @@ typedef u32 (*bpf_convert_ctx_access_t)(enum bpf_access_type type,
u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy);
-int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
- union bpf_attr __user *uattr);
-int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
- union bpf_attr __user *uattr);
-int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
- const union bpf_attr *kattr,
- union bpf_attr __user *uattr);
-
/* an array of programs to be executed under rcu_lock.
*
* Typical usage:
@@ -643,6 +679,13 @@ static inline int bpf_map_attr_numa_node(const union bpf_attr *attr)
struct bpf_prog *bpf_prog_get_type_path(const char *name, enum bpf_prog_type type);
int array_map_alloc_check(union bpf_attr *attr);
+int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
+ union bpf_attr __user *uattr);
+int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
+ union bpf_attr __user *uattr);
+int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
+ const union bpf_attr *kattr,
+ union bpf_attr __user *uattr);
#else /* !CONFIG_BPF_SYSCALL */
static inline struct bpf_prog *bpf_prog_get(u32 ufd)
{
@@ -754,6 +797,27 @@ static inline struct bpf_prog *bpf_prog_get_type_path(const char *name,
{
return ERR_PTR(-EOPNOTSUPP);
}
+
+static inline int bpf_prog_test_run_xdp(struct bpf_prog *prog,
+ const union bpf_attr *kattr,
+ union bpf_attr __user *uattr)
+{
+ return -ENOTSUPP;
+}
+
+static inline int bpf_prog_test_run_skb(struct bpf_prog *prog,
+ const union bpf_attr *kattr,
+ union bpf_attr __user *uattr)
+{
+ return -ENOTSUPP;
+}
+
+static inline int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
+ const union bpf_attr *kattr,
+ union bpf_attr __user *uattr)
+{
+ return -ENOTSUPP;
+}
#endif /* CONFIG_BPF_SYSCALL */
static inline struct bpf_prog *bpf_prog_get_type(u32 ufd,
@@ -929,6 +993,8 @@ extern const struct bpf_func_proto bpf_sk_redirect_map_proto;
extern const struct bpf_func_proto bpf_spin_lock_proto;
extern const struct bpf_func_proto bpf_spin_unlock_proto;
extern const struct bpf_func_proto bpf_get_local_storage_proto;
+extern const struct bpf_func_proto bpf_strtol_proto;
+extern const struct bpf_func_proto bpf_strtoul_proto;
/* Shared helpers among cBPF and eBPF. */
void bpf_user_rnd_init_once(void);
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 08bf2f1fe553..5a9975678d6f 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -25,9 +25,11 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe)
BPF_PROG_TYPE(BPF_PROG_TYPE_TRACEPOINT, tracepoint)
BPF_PROG_TYPE(BPF_PROG_TYPE_PERF_EVENT, perf_event)
BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT, raw_tracepoint)
+BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, raw_tracepoint_writable)
#endif
#ifdef CONFIG_CGROUP_BPF
BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_DEVICE, cg_dev)
+BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SYSCTL, cg_sysctl)
#endif
#ifdef CONFIG_BPF_LIRC_MODE2
BPF_PROG_TYPE(BPF_PROG_TYPE_LIRC_MODE2, lirc_mode2)
@@ -59,6 +61,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY_OF_MAPS, array_of_maps_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, htab_of_maps_map_ops)
#ifdef CONFIG_NET
BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP, dev_map_ops)
+BPF_MAP_TYPE(BPF_MAP_TYPE_SK_STORAGE, sk_storage_map_ops)
#if defined(CONFIG_BPF_STREAM_PARSER)
BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKMAP, sock_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKHASH, sock_hash_ops)
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 7d8228d1c898..1305ccbd8fe6 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -207,6 +207,7 @@ struct bpf_verifier_state {
struct bpf_verifier_state_list {
struct bpf_verifier_state state;
struct bpf_verifier_state_list *next;
+ int miss_cnt, hit_cnt;
};
/* Possible states for alu_state member. */
@@ -223,6 +224,10 @@ struct bpf_insn_aux_data {
unsigned long map_state; /* pointer/poison value for maps */
s32 call_imm; /* saved imm field of call insn */
u32 alu_limit; /* limit for add/sub register with pointer */
+ struct {
+ u32 map_index; /* index into used_maps[] */
+ u32 map_off; /* offset from value base address */
+ };
};
int ctx_field_size; /* the ctx field size for load insn, maybe 0 */
int sanitize_stack_off; /* stack slot to be cleared */
@@ -248,6 +253,12 @@ static inline bool bpf_verifier_log_full(const struct bpf_verifier_log *log)
return log->len_used >= log->len_total - 1;
}
+#define BPF_LOG_LEVEL1 1
+#define BPF_LOG_LEVEL2 2
+#define BPF_LOG_STATS 4
+#define BPF_LOG_LEVEL (BPF_LOG_LEVEL1 | BPF_LOG_LEVEL2)
+#define BPF_LOG_MASK (BPF_LOG_LEVEL | BPF_LOG_STATS)
+
static inline bool bpf_verifier_log_needed(const struct bpf_verifier_log *log)
{
return log->level && log->ubuf && !bpf_verifier_log_full(log);
@@ -274,6 +285,7 @@ struct bpf_verifier_env {
bool strict_alignment; /* perform strict pointer alignment checks */
struct bpf_verifier_state *cur_state; /* current verifier state */
struct bpf_verifier_state_list **explored_states; /* search pruning optimization */
+ struct bpf_verifier_state_list *free_list;
struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */
u32 used_map_cnt; /* number of used maps */
u32 id_gen; /* used to generate unique reg IDs */
@@ -283,7 +295,27 @@ struct bpf_verifier_env {
const struct bpf_line_info *prev_linfo;
struct bpf_verifier_log log;
struct bpf_subprog_info subprog_info[BPF_MAX_SUBPROGS + 1];
+ struct {
+ int *insn_state;
+ int *insn_stack;
+ int cur_stack;
+ } cfg;
u32 subprog_cnt;
+ /* number of instructions analyzed by the verifier */
+ u32 insn_processed;
+ /* total verification time */
+ u64 verification_time;
+ /* maximum number of verifier states kept in 'branching' instructions */
+ u32 max_states_per_insn;
+ /* total number of allocated verifier states */
+ u32 total_states;
+ /* some states are freed during program analysis.
+ * this is peak number of states. this number dominates kernel
+ * memory consumption during verification
+ */
+ u32 peak_states;
+ /* longest register parentage chain walked for liveness marking */
+ u32 longest_mark_read_walk;
};
__printf(2, 0) void bpf_verifier_vlog(struct bpf_verifier_log *log,
diff --git a/include/linux/btf.h b/include/linux/btf.h
index 455d31b55828..64cdf2a23d42 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -51,6 +51,7 @@ bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s,
const struct btf_member *m,
u32 expected_offset, u32 expected_size);
int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t);
+bool btf_type_is_void(const struct btf_type *t);
#ifdef CONFIG_BPF_SYSCALL
const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id);
diff --git a/include/linux/dsa/8021q.h b/include/linux/dsa/8021q.h
new file mode 100644
index 000000000000..3911e0586478
--- /dev/null
+++ b/include/linux/dsa/8021q.h
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: GPL-2.0
+ * Copyright (c) 2019, Vladimir Oltean <olteanv@gmail.com>
+ */
+
+#ifndef _NET_DSA_8021Q_H
+#define _NET_DSA_8021Q_H
+
+#include <linux/types.h>
+
+struct dsa_switch;
+struct sk_buff;
+struct net_device;
+struct packet_type;
+
+#if IS_ENABLED(CONFIG_NET_DSA_TAG_8021Q)
+
+int dsa_port_setup_8021q_tagging(struct dsa_switch *ds, int index,
+ bool enabled);
+
+struct sk_buff *dsa_8021q_xmit(struct sk_buff *skb, struct net_device *netdev,
+ u16 tpid, u16 tci);
+
+struct sk_buff *dsa_8021q_rcv(struct sk_buff *skb, struct net_device *netdev,
+ struct packet_type *pt, u16 *tpid, u16 *tci);
+
+u16 dsa_8021q_tx_vid(struct dsa_switch *ds, int port);
+
+u16 dsa_8021q_rx_vid(struct dsa_switch *ds, int port);
+
+int dsa_8021q_rx_switch_id(u16 vid);
+
+int dsa_8021q_rx_source_port(u16 vid);
+
+#else
+
+int dsa_port_setup_8021q_tagging(struct dsa_switch *ds, int index,
+ bool enabled)
+{
+ return 0;
+}
+
+struct sk_buff *dsa_8021q_xmit(struct sk_buff *skb, struct net_device *netdev,
+ u16 tpid, u16 tci)
+{
+ return NULL;
+}
+
+struct sk_buff *dsa_8021q_rcv(struct sk_buff *skb, struct net_device *netdev,
+ struct packet_type *pt, u16 *tpid, u16 *tci)
+{
+ return NULL;
+}
+
+u16 dsa_8021q_tx_vid(struct dsa_switch *ds, int port)
+{
+ return 0;
+}
+
+u16 dsa_8021q_rx_vid(struct dsa_switch *ds, int port)
+{
+ return 0;
+}
+
+int dsa_8021q_rx_switch_id(u16 vid)
+{
+ return 0;
+}
+
+int dsa_8021q_rx_source_port(u16 vid)
+{
+ return 0;
+}
+
+#endif /* IS_ENABLED(CONFIG_NET_DSA_TAG_8021Q) */
+
+#endif /* _NET_DSA_8021Q_H */
diff --git a/include/linux/dsa/sja1105.h b/include/linux/dsa/sja1105.h
new file mode 100644
index 000000000000..603a02e5a8cb
--- /dev/null
+++ b/include/linux/dsa/sja1105.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0
+ * Copyright (c) 2019, Vladimir Oltean <olteanv@gmail.com>
+ */
+
+/* Included by drivers/net/dsa/sja1105/sja1105.h and net/dsa/tag_sja1105.c */
+
+#ifndef _NET_DSA_SJA1105_H
+#define _NET_DSA_SJA1105_H
+
+#include <linux/skbuff.h>
+#include <linux/etherdevice.h>
+#include <net/dsa.h>
+
+#define ETH_P_SJA1105 ETH_P_DSA_8021Q
+
+/* IEEE 802.3 Annex 57A: Slow Protocols PDUs (01:80:C2:xx:xx:xx) */
+#define SJA1105_LINKLOCAL_FILTER_A 0x0180C2000000ull
+#define SJA1105_LINKLOCAL_FILTER_A_MASK 0xFFFFFF000000ull
+/* IEEE 1588 Annex F: Transport of PTP over Ethernet (01:1B:19:xx:xx:xx) */
+#define SJA1105_LINKLOCAL_FILTER_B 0x011B19000000ull
+#define SJA1105_LINKLOCAL_FILTER_B_MASK 0xFFFFFF000000ull
+
+enum sja1105_frame_type {
+ SJA1105_FRAME_TYPE_NORMAL = 0,
+ SJA1105_FRAME_TYPE_LINK_LOCAL,
+};
+
+struct sja1105_skb_cb {
+ enum sja1105_frame_type type;
+};
+
+#define SJA1105_SKB_CB(skb) \
+ ((struct sja1105_skb_cb *)DSA_SKB_CB_PRIV(skb))
+
+struct sja1105_port {
+ struct dsa_port *dp;
+ int mgmt_slot;
+};
+
+#endif /* _NET_DSA_SJA1105_H */
diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index aa8bfd6f738c..25c0d049336f 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -33,7 +33,7 @@ struct device;
int eth_platform_get_mac_address(struct device *dev, u8 *mac_addr);
unsigned char *arch_get_platform_mac_address(void);
int nvmem_get_mac_address(struct device *dev, void *addrbuf);
-u32 eth_get_headlen(void *data, unsigned int max_len);
+u32 eth_get_headlen(const struct net_device *dev, void *data, unsigned int len);
__be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev);
extern const struct header_ops eth_header_ops;
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 7d3abde3f183..7148bab96943 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -34,6 +34,8 @@ struct bpf_prog_aux;
struct xdp_rxq_info;
struct xdp_buff;
struct sock_reuseport;
+struct ctl_table;
+struct ctl_table_header;
/* ArgX, context and stack frame pointer register positions. Note,
* Arg1, Arg2, Arg3, etc are used as argument mappings of function
@@ -1167,4 +1169,18 @@ struct bpf_sock_ops_kern {
*/
};
+struct bpf_sysctl_kern {
+ struct ctl_table_header *head;
+ struct ctl_table *table;
+ void *cur_val;
+ size_t cur_len;
+ void *new_val;
+ size_t new_len;
+ int new_updated;
+ int write;
+ loff_t *ppos;
+ /* Temporary "register" for indirect stores to ppos. */
+ u64 tmp_reg;
+};
+
#endif /* __LINUX_FILTER_H__ */
diff --git a/include/linux/genl_magic_func.h b/include/linux/genl_magic_func.h
index 83f81ac53282..6cb82301d8e9 100644
--- a/include/linux/genl_magic_func.h
+++ b/include/linux/genl_magic_func.h
@@ -233,7 +233,6 @@ const char *CONCAT_(GENL_MAGIC_FAMILY, _genl_cmd_to_str)(__u8 cmd)
{ \
handler \
.cmd = op_name, \
- .policy = CONCAT_(GENL_MAGIC_FAMILY, _tla_nl_policy), \
},
#define ZZZ_genl_ops CONCAT_(GENL_MAGIC_FAMILY, _genl_ops)
@@ -290,7 +289,8 @@ static struct genl_family ZZZ_genl_family __ro_after_init = {
#ifdef GENL_MAGIC_FAMILY_HDRSZ
.hdrsize = NLA_ALIGN(GENL_MAGIC_FAMILY_HDRSZ),
#endif
- .maxattr = ARRAY_SIZE(drbd_tla_nl_policy)-1,
+ .maxattr = ARRAY_SIZE(CONCAT_(GENL_MAGIC_FAMILY, _tla_nl_policy))-1,
+ .policy = CONCAT_(GENL_MAGIC_FAMILY, _tla_nl_policy),
.ops = ZZZ_genl_ops,
.n_ops = ARRAY_SIZE(ZZZ_genl_ops),
.mcgrps = ZZZ_genl_mcgrps,
diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 48703ec60d06..61f0a316c6ac 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1557,7 +1557,7 @@ struct ieee80211_vht_operation {
* struct ieee80211_he_cap_elem - HE capabilities element
*
* This structure is the "HE capabilities element" fixed fields as
- * described in P802.11ax_D3.0 section 9.4.2.237.2 and 9.4.2.237.3
+ * described in P802.11ax_D4.0 section 9.4.2.242.2 and 9.4.2.242.3
*/
struct ieee80211_he_cap_elem {
u8 mac_cap_info[6];
@@ -1619,12 +1619,12 @@ struct ieee80211_he_mcs_nss_supp {
* struct ieee80211_he_operation - HE capabilities element
*
* This structure is the "HE operation element" fields as
- * described in P802.11ax_D3.0 section 9.4.2.238
+ * described in P802.11ax_D4.0 section 9.4.2.243
*/
struct ieee80211_he_operation {
__le32 he_oper_params;
__le16 he_mcs_nss_set;
- /* Optional 0,1,3 or 4 bytes: depends on @he_oper_params */
+ /* Optional 0,1,3,4,5,7 or 8 bytes: depends on @he_oper_params */
u8 optional[0];
} __packed;
@@ -1632,7 +1632,7 @@ struct ieee80211_he_operation {
* struct ieee80211_he_mu_edca_param_ac_rec - MU AC Parameter Record field
*
* This structure is the "MU AC Parameter Record" fields as
- * described in P802.11ax_D2.0 section 9.4.2.240
+ * described in P802.11ax_D4.0 section 9.4.2.245
*/
struct ieee80211_he_mu_edca_param_ac_rec {
u8 aifsn;
@@ -1644,7 +1644,7 @@ struct ieee80211_he_mu_edca_param_ac_rec {
* struct ieee80211_mu_edca_param_set - MU EDCA Parameter Set element
*
* This structure is the "MU EDCA Parameter Set element" fields as
- * described in P802.11ax_D2.0 section 9.4.2.240
+ * described in P802.11ax_D4.0 section 9.4.2.245
*/
struct ieee80211_mu_edca_param_set {
u8 mu_qos_info;
@@ -2026,6 +2026,7 @@ ieee80211_he_ppe_size(u8 ppe_thres_hdr, const u8 *phy_cap_info)
#define IEEE80211_HE_OPERATION_VHT_OPER_INFO 0x00004000
#define IEEE80211_HE_OPERATION_CO_HOSTED_BSS 0x00008000
#define IEEE80211_HE_OPERATION_ER_SU_DISABLE 0x00010000
+#define IEEE80211_HE_OPERATION_6GHZ_OP_INFO 0x00020000
#define IEEE80211_HE_OPERATION_BSS_COLOR_MASK 0x3f000000
#define IEEE80211_HE_OPERATION_BSS_COLOR_OFFSET 24
#define IEEE80211_HE_OPERATION_PARTIAL_BSS_COLOR 0x40000000
@@ -2056,6 +2057,8 @@ ieee80211_he_oper_size(const u8 *he_oper_ie)
oper_len += 3;
if (he_oper_params & IEEE80211_HE_OPERATION_CO_HOSTED_BSS)
oper_len++;
+ if (he_oper_params & IEEE80211_HE_OPERATION_6GHZ_OP_INFO)
+ oper_len += 4;
/* Add the first byte (extension ID) to the total length */
oper_len++;
@@ -2487,6 +2490,7 @@ enum ieee80211_eid_ext {
WLAN_EID_EXT_HE_MU_EDCA = 38,
WLAN_EID_EXT_MAX_CHANNEL_SWITCH_TIME = 52,
WLAN_EID_EXT_MULTIPLE_BSSID_CONFIGURATION = 55,
+ WLAN_EID_EXT_NON_INHERITANCE = 56,
};
/* Action category code */
diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
index 627b788ba0ff..ef0819ced0fc 100644
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h
@@ -56,9 +56,6 @@ struct br_ip_list {
extern void brioctl_set(int (*ioctl_hook)(struct net *, unsigned int, void __user *));
-typedef int br_should_route_hook_t(struct sk_buff *skb);
-extern br_should_route_hook_t __rcu *br_should_route_hook;
-
#if IS_ENABLED(CONFIG_BRIDGE) && IS_ENABLED(CONFIG_BRIDGE_IGMP_SNOOPING)
int br_multicast_list_adjacent(struct net_device *dev,
struct list_head *br_ip_list);
diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h
index a64f21a97369..367dc2a0f84a 100644
--- a/include/linux/inetdevice.h
+++ b/include/linux/inetdevice.h
@@ -237,6 +237,20 @@ static inline struct in_device *__in_dev_get_rtnl(const struct net_device *dev)
return rtnl_dereference(dev->ip_ptr);
}
+/* called with rcu_read_lock or rtnl held */
+static inline bool ip_ignore_linkdown(const struct net_device *dev)
+{
+ struct in_device *in_dev;
+ bool rc = false;
+
+ in_dev = rcu_dereference_rtnl(dev->ip_ptr);
+ if (in_dev &&
+ IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev))
+ rc = true;
+
+ return rc;
+}
+
static inline struct neigh_parms *__in_dev_arp_parms_get_rcu(const struct net_device *dev)
{
struct in_device *in_dev = __in_dev_get_rcu(dev);
diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h
index fa928242567d..1b6d31da7cbc 100644
--- a/include/linux/jiffies.h
+++ b/include/linux/jiffies.h
@@ -297,6 +297,7 @@ static inline u64 jiffies_to_nsecs(const unsigned long j)
}
extern u64 jiffies64_to_nsecs(u64 j);
+extern u64 jiffies64_to_msecs(u64 j);
extern unsigned long __msecs_to_jiffies(const unsigned int m);
#if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ)
diff --git a/include/linux/marvell_phy.h b/include/linux/marvell_phy.h
index 73d04743a2bb..af6b11d4d673 100644
--- a/include/linux/marvell_phy.h
+++ b/include/linux/marvell_phy.h
@@ -34,5 +34,6 @@
/* struct phy_device dev_flags definitions */
#define MARVELL_PHY_M1145_FLAGS_RESISTANCE 0x00000001
#define MARVELL_PHY_M1118_DNS323_LEDS 0x00000002
+#define MARVELL_PHY_LED0_LINK_LED1_ACTIVE 0x00000004
#endif /* _MARVELL_PHY_H */
diff --git a/include/linux/mdio.h b/include/linux/mdio.h
index 3e99ae3ed87f..9dc16d5705a1 100644
--- a/include/linux/mdio.h
+++ b/include/linux/mdio.h
@@ -39,7 +39,8 @@ struct mdio_device {
/* Bus address of the MDIO device (0-31) */
int addr;
int flags;
- struct gpio_desc *reset;
+ struct gpio_desc *reset_gpio;
+ struct reset_control *reset_ctrl;
unsigned int reset_assert_delay;
unsigned int reset_deassert_delay;
};
diff --git a/include/linux/mlx5/cq.h b/include/linux/mlx5/cq.h
index 612c8c2f2466..769326ea1d9b 100644
--- a/include/linux/mlx5/cq.h
+++ b/include/linux/mlx5/cq.h
@@ -170,7 +170,7 @@ static inline void mlx5_cq_arm(struct mlx5_core_cq *cq, u32 cmd,
doorbell[0] = cpu_to_be32(sn << 28 | cmd | ci);
doorbell[1] = cpu_to_be32(cq->cqn);
- mlx5_write64(doorbell, uar_page + MLX5_CQ_DOORBELL, NULL);
+ mlx5_write64(doorbell, uar_page + MLX5_CQ_DOORBELL);
}
static inline void mlx5_cq_hold(struct mlx5_core_cq *cq)
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index f93a5598b942..fc2b6e807f06 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -361,6 +361,7 @@ enum {
enum {
MLX5_GENERAL_SUBTYPE_DELAY_DROP_TIMEOUT = 0x1,
+ MLX5_GENERAL_SUBTYPE_PCI_POWER_CHANGE_EVENT = 0x5,
};
enum {
@@ -1001,7 +1002,8 @@ enum {
MLX5_MATCH_OUTER_HEADERS = 1 << 0,
MLX5_MATCH_MISC_PARAMETERS = 1 << 1,
MLX5_MATCH_INNER_HEADERS = 1 << 2,
-
+ MLX5_MATCH_MISC_PARAMETERS_2 = 1 << 3,
+ MLX5_MATCH_MISC_PARAMETERS_3 = 1 << 4,
};
enum {
@@ -1045,6 +1047,7 @@ enum mlx5_mpls_supported_fields {
};
enum mlx5_flex_parser_protos {
+ MLX5_FLEX_PROTO_GENEVE = 1 << 3,
MLX5_FLEX_PROTO_CW_MPLS_GRE = 1 << 4,
MLX5_FLEX_PROTO_CW_MPLS_UDP = 1 << 5,
};
@@ -1166,6 +1169,12 @@ enum mlx5_qcam_feature_groups {
#define MLX5_CAP_FLOWTABLE_SNIFFER_TX_MAX(mdev, cap) \
MLX5_CAP_FLOWTABLE_MAX(mdev, flow_table_properties_nic_transmit_sniffer.cap)
+#define MLX5_CAP_FLOWTABLE_RDMA_RX(mdev, cap) \
+ MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_receive_rdma.cap)
+
+#define MLX5_CAP_FLOWTABLE_RDMA_RX_MAX(mdev, cap) \
+ MLX5_CAP_FLOWTABLE_MAX(mdev, flow_table_properties_nic_receive_rdma.cap)
+
#define MLX5_CAP_ESW_FLOWTABLE(mdev, cap) \
MLX5_GET(flow_table_eswitch_cap, \
mdev->caps.hca_cur[MLX5_CAP_ESWITCH_FLOW_TABLE], cap)
diff --git a/include/linux/mlx5/doorbell.h b/include/linux/mlx5/doorbell.h
index 0787de28f2fc..5c267707e1df 100644
--- a/include/linux/mlx5/doorbell.h
+++ b/include/linux/mlx5/doorbell.h
@@ -36,46 +36,25 @@
#define MLX5_BF_OFFSET 0x800
#define MLX5_CQ_DOORBELL 0x20
-#if BITS_PER_LONG == 64
/* Assume that we can just write a 64-bit doorbell atomically. s390
* actually doesn't have writeq() but S/390 systems don't even have
* PCI so we won't worry about it.
+ *
+ * Note that the write is not atomic on 32-bit systems! In contrast to 64-bit
+ * ones, it requires proper locking. mlx5_write64 doesn't do any locking, so use
+ * it at your own discretion, protected by some kind of lock on 32 bits.
+ *
+ * TODO: use write{q,l}_relaxed()
*/
-#define MLX5_DECLARE_DOORBELL_LOCK(name)
-#define MLX5_INIT_DOORBELL_LOCK(ptr) do { } while (0)
-#define MLX5_GET_DOORBELL_LOCK(ptr) (NULL)
-
-static inline void mlx5_write64(__be32 val[2], void __iomem *dest,
- spinlock_t *doorbell_lock)
+static inline void mlx5_write64(__be32 val[2], void __iomem *dest)
{
+#if BITS_PER_LONG == 64
__raw_writeq(*(u64 *)val, dest);
-}
-
#else
-
-/* Just fall back to a spinlock to protect the doorbell if
- * BITS_PER_LONG is 32 -- there's no portable way to do atomic 64-bit
- * MMIO writes.
- */
-
-#define MLX5_DECLARE_DOORBELL_LOCK(name) spinlock_t name;
-#define MLX5_INIT_DOORBELL_LOCK(ptr) spin_lock_init(ptr)
-#define MLX5_GET_DOORBELL_LOCK(ptr) (ptr)
-
-static inline void mlx5_write64(__be32 val[2], void __iomem *dest,
- spinlock_t *doorbell_lock)
-{
- unsigned long flags;
-
- if (doorbell_lock)
- spin_lock_irqsave(doorbell_lock, flags);
__raw_writel((__force u32) val[0], dest);
__raw_writel((__force u32) val[1], dest + 4);
- if (doorbell_lock)
- spin_unlock_irqrestore(doorbell_lock, flags);
-}
-
#endif
+}
#endif /* MLX5_DOORBELL_H */
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 0d0729648844..5a39b323c52e 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -56,7 +56,6 @@
enum {
MLX5_BOARD_ID_LEN = 64,
- MLX5_MAX_NAME_LEN = 16,
};
enum {
@@ -133,6 +132,7 @@ enum {
MLX5_REG_MTRC_CONF = 0x9041,
MLX5_REG_MTRC_STDB = 0x9042,
MLX5_REG_MTRC_CTRL = 0x9043,
+ MLX5_REG_MPEIN = 0x9050,
MLX5_REG_MPCNT = 0x9051,
MLX5_REG_MTPPS = 0x9053,
MLX5_REG_MTPPSE = 0x9054,
@@ -512,8 +512,13 @@ struct mlx5_rl_table {
struct mlx5_rl_entry *rl_entry;
};
+struct mlx5_core_roce {
+ struct mlx5_flow_table *ft;
+ struct mlx5_flow_group *fg;
+ struct mlx5_flow_handle *allow_rule;
+};
+
struct mlx5_priv {
- char name[MLX5_MAX_NAME_LEN];
struct mlx5_eq_table *eq_table;
/* pages stuff */
@@ -566,6 +571,7 @@ struct mlx5_priv {
struct mlx5_lag *lag;
struct mlx5_devcom *devcom;
unsigned long pci_dev_data;
+ struct mlx5_core_roce roce;
struct mlx5_fc_stats fc_stats;
struct mlx5_rl_table rl_table;
@@ -642,6 +648,7 @@ struct mlx5_fw_tracer;
struct mlx5_vxlan;
struct mlx5_core_dev {
+ struct device *device;
struct pci_dev *pdev;
/* sync pci state */
struct mutex pci_status_mutex;
@@ -662,6 +669,7 @@ struct mlx5_core_dev {
u64 sys_image_guid;
phys_addr_t iseg_base;
struct mlx5_init_seg __iomem *iseg;
+ phys_addr_t bar_addr;
enum mlx5_device_state state;
/* sync interface state */
struct mutex intf_state_mutex;
@@ -887,6 +895,7 @@ void mlx5_cmd_mbox_status(void *out, u8 *status, u32 *syndrome);
int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type);
int mlx5_cmd_alloc_uar(struct mlx5_core_dev *dev, u32 *uarn);
int mlx5_cmd_free_uar(struct mlx5_core_dev *dev, u32 uarn);
+void mlx5_health_flush(struct mlx5_core_dev *dev);
void mlx5_health_cleanup(struct mlx5_core_dev *dev);
int mlx5_health_init(struct mlx5_core_dev *dev);
void mlx5_start_health_poll(struct mlx5_core_dev *dev);
diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h
index 96d8435421de..0ca77dd1429c 100644
--- a/include/linux/mlx5/eswitch.h
+++ b/include/linux/mlx5/eswitch.h
@@ -35,7 +35,7 @@ struct mlx5_eswitch_rep_if {
void (*unload)(struct mlx5_eswitch_rep *rep);
void *(*get_proto_dev)(struct mlx5_eswitch_rep *rep);
void *priv;
- u8 state;
+ atomic_t state;
};
struct mlx5_eswitch_rep {
diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index 9df51da04621..e690ba0f965c 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -73,6 +73,13 @@ enum mlx5_flow_namespace_type {
MLX5_FLOW_NAMESPACE_SNIFFER_RX,
MLX5_FLOW_NAMESPACE_SNIFFER_TX,
MLX5_FLOW_NAMESPACE_EGRESS,
+ MLX5_FLOW_NAMESPACE_RDMA_RX,
+};
+
+enum {
+ FDB_BYPASS_PATH,
+ FDB_FAST_PATH,
+ FDB_SLOW_PATH,
};
struct mlx5_flow_table;
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 3b83288749c6..82612741b29e 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -81,6 +81,19 @@ enum {
};
enum {
+ MLX5_OBJ_TYPE_SW_ICM = 0x0008,
+};
+
+enum {
+ MLX5_GENERAL_OBJ_TYPES_CAP_SW_ICM = (1ULL << MLX5_OBJ_TYPE_SW_ICM),
+ MLX5_GENERAL_OBJ_TYPES_CAP_GENEVE_TLV_OPT = (1ULL << 11),
+};
+
+enum {
+ MLX5_OBJ_TYPE_GENEVE_TLV_OPT = 0x000b,
+};
+
+enum {
MLX5_CMD_OP_QUERY_HCA_CAP = 0x100,
MLX5_CMD_OP_QUERY_ADAPTER = 0x101,
MLX5_CMD_OP_INIT_HCA = 0x102,
@@ -299,7 +312,11 @@ struct mlx5_ifc_flow_table_fields_supported_bits {
u8 outer_gre_protocol[0x1];
u8 outer_gre_key[0x1];
u8 outer_vxlan_vni[0x1];
- u8 reserved_at_1a[0x5];
+ u8 outer_geneve_vni[0x1];
+ u8 outer_geneve_oam[0x1];
+ u8 outer_geneve_protocol_type[0x1];
+ u8 outer_geneve_opt_len[0x1];
+ u8 reserved_at_1e[0x1];
u8 source_eswitch_port[0x1];
u8 inner_dmac[0x1];
@@ -327,7 +344,8 @@ struct mlx5_ifc_flow_table_fields_supported_bits {
u8 inner_tcp_flags[0x1];
u8 reserved_at_37[0x9];
- u8 reserved_at_40[0x5];
+ u8 geneve_tlv_option_0_data[0x1];
+ u8 reserved_at_41[0x4];
u8 outer_first_mpls_over_udp[0x4];
u8 outer_first_mpls_over_gre[0x4];
u8 inner_first_mpls[0x4];
@@ -357,11 +375,14 @@ struct mlx5_ifc_flow_table_prop_layout_bits {
u8 pop_vlan_2[0x1];
u8 push_vlan_2[0x1];
u8 reformat_and_vlan_action[0x1];
- u8 reserved_at_10[0x2];
+ u8 reserved_at_10[0x1];
+ u8 sw_owner[0x1];
u8 reformat_l3_tunnel_to_l2[0x1];
u8 reformat_l2_to_l3_tunnel[0x1];
u8 reformat_and_modify_action[0x1];
- u8 reserved_at_15[0xb];
+ u8 reserved_at_15[0x2];
+ u8 table_miss_action_domain[0x1];
+ u8 reserved_at_18[0x8];
u8 reserved_at_20[0x2];
u8 log_max_ft_size[0x6];
u8 log_max_modify_header_context[0x8];
@@ -469,7 +490,9 @@ struct mlx5_ifc_fte_match_set_misc_bits {
u8 vxlan_vni[0x18];
u8 reserved_at_b8[0x8];
- u8 reserved_at_c0[0x20];
+ u8 geneve_vni[0x18];
+ u8 reserved_at_d8[0x7];
+ u8 geneve_oam[0x1];
u8 reserved_at_e0[0xc];
u8 outer_ipv6_flow_label[0x14];
@@ -477,7 +500,11 @@ struct mlx5_ifc_fte_match_set_misc_bits {
u8 reserved_at_100[0xc];
u8 inner_ipv6_flow_label[0x14];
- u8 reserved_at_120[0x28];
+ u8 reserved_at_120[0xa];
+ u8 geneve_opt_len[0x6];
+ u8 geneve_protocol_type[0x10];
+
+ u8 reserved_at_140[0x8];
u8 bth_dst_qp[0x18];
u8 reserved_at_160[0x20];
u8 outer_esp_spi[0x20];
@@ -507,6 +534,12 @@ struct mlx5_ifc_fte_match_set_misc2_bits {
u8 reserved_at_1a0[0x60];
};
+struct mlx5_ifc_fte_match_set_misc3_bits {
+ u8 reserved_at_0[0x120];
+ u8 geneve_tlv_option_0_data[0x20];
+ u8 reserved_at_140[0xc0];
+};
+
struct mlx5_ifc_cmd_pas_bits {
u8 pa_h[0x20];
@@ -589,7 +622,7 @@ struct mlx5_ifc_flow_table_nic_cap_bits {
struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_receive;
- u8 reserved_at_400[0x200];
+ struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_receive_rdma;
struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_receive_sniffer;
@@ -770,7 +803,19 @@ struct mlx5_ifc_device_mem_cap_bits {
u8 max_memic_size[0x20];
- u8 reserved_at_c0[0x740];
+ u8 steering_sw_icm_start_address[0x40];
+
+ u8 reserved_at_100[0x8];
+ u8 log_header_modify_sw_icm_size[0x8];
+ u8 reserved_at_110[0x2];
+ u8 log_sw_icm_alloc_granularity[0x6];
+ u8 log_steering_sw_icm_size[0x8];
+
+ u8 reserved_at_120[0x20];
+
+ u8 header_modify_sw_icm_start_address[0x40];
+
+ u8 reserved_at_180[0x680];
};
enum {
@@ -919,6 +964,7 @@ enum {
enum {
MLX5_UCTX_CAP_RAW_TX = 1UL << 0,
+ MLX5_UCTX_CAP_INTERNAL_DEV_RES = 1UL << 1,
};
struct mlx5_ifc_cmd_hca_cap_bits {
@@ -929,7 +975,9 @@ struct mlx5_ifc_cmd_hca_cap_bits {
u8 log_max_srq_sz[0x8];
u8 log_max_qp_sz[0x8];
- u8 reserved_at_90[0xb];
+ u8 reserved_at_90[0x8];
+ u8 prio_tag_required[0x1];
+ u8 reserved_at_99[0x2];
u8 log_max_qp[0x5];
u8 reserved_at_a0[0xb];
@@ -1211,7 +1259,11 @@ struct mlx5_ifc_cmd_hca_cap_bits {
u8 num_of_uars_per_page[0x20];
u8 flex_parser_protocols[0x20];
- u8 reserved_at_560[0x20];
+
+ u8 max_geneve_tlv_options[0x8];
+ u8 reserved_at_568[0x3];
+ u8 max_geneve_tlv_option_data_len[0x5];
+ u8 reserved_at_570[0x10];
u8 reserved_at_580[0x3c];
u8 mini_cqe_resp_stride_index[0x1];
@@ -1247,7 +1299,9 @@ struct mlx5_ifc_cmd_hca_cap_bits {
u8 uctx_cap[0x20];
- u8 reserved_at_6c0[0x140];
+ u8 reserved_at_6c0[0x4];
+ u8 flex_parser_id_geneve_tlv_option_0[0x4];
+ u8 reserved_at_6c8[0x138];
};
enum mlx5_flow_destination_type {
@@ -1260,6 +1314,12 @@ enum mlx5_flow_destination_type {
MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM = 0x101,
};
+enum mlx5_flow_table_miss_action {
+ MLX5_FLOW_TABLE_MISS_ACTION_DEF,
+ MLX5_FLOW_TABLE_MISS_ACTION_FWD,
+ MLX5_FLOW_TABLE_MISS_ACTION_SWITCH_DOMAIN,
+};
+
struct mlx5_ifc_dest_format_struct_bits {
u8 destination_type[0x8];
u8 destination_id[0x18];
@@ -1299,7 +1359,9 @@ struct mlx5_ifc_fte_match_param_bits {
struct mlx5_ifc_fte_match_set_misc2_bits misc_parameters_2;
- u8 reserved_at_800[0x800];
+ struct mlx5_ifc_fte_match_set_misc3_bits misc_parameters_3;
+
+ u8 reserved_at_a00[0x600];
};
enum {
@@ -2920,6 +2982,7 @@ enum {
MLX5_MKC_ACCESS_MODE_MTT = 0x1,
MLX5_MKC_ACCESS_MODE_KLMS = 0x2,
MLX5_MKC_ACCESS_MODE_KSM = 0x3,
+ MLX5_MKC_ACCESS_MODE_SW_ICM = 0x4,
MLX5_MKC_ACCESS_MODE_MEMIC = 0x5,
};
@@ -4807,6 +4870,7 @@ enum {
MLX5_QUERY_FLOW_GROUP_OUT_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS = 0x1,
MLX5_QUERY_FLOW_GROUP_OUT_MATCH_CRITERIA_ENABLE_INNER_HEADERS = 0x2,
MLX5_QUERY_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS_2 = 0x3,
+ MLX5_QUERY_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS_3 = 0x4,
};
struct mlx5_ifc_query_flow_group_out_bits {
@@ -5110,6 +5174,7 @@ enum {
MLX5_ACTION_IN_FIELD_OUT_DIPV6_31_0 = 0x14,
MLX5_ACTION_IN_FIELD_OUT_SIPV4 = 0x15,
MLX5_ACTION_IN_FIELD_OUT_DIPV4 = 0x16,
+ MLX5_ACTION_IN_FIELD_OUT_FIRST_VID = 0x17,
MLX5_ACTION_IN_FIELD_OUT_IPV6_HOPLIMIT = 0x47,
};
@@ -6874,14 +6939,14 @@ struct mlx5_ifc_create_tis_in_bits {
struct mlx5_ifc_create_tir_out_bits {
u8 status[0x8];
- u8 reserved_at_8[0x18];
+ u8 icm_address_63_40[0x18];
u8 syndrome[0x20];
- u8 reserved_at_40[0x8];
+ u8 icm_address_39_32[0x8];
u8 tirn[0x18];
- u8 reserved_at_60[0x20];
+ u8 icm_address_31_0[0x20];
};
struct mlx5_ifc_create_tir_in_bits {
@@ -8026,6 +8091,52 @@ struct mlx5_ifc_ppcnt_reg_bits {
union mlx5_ifc_eth_cntrs_grp_data_layout_auto_bits counter_set;
};
+struct mlx5_ifc_mpein_reg_bits {
+ u8 reserved_at_0[0x2];
+ u8 depth[0x6];
+ u8 pcie_index[0x8];
+ u8 node[0x8];
+ u8 reserved_at_18[0x8];
+
+ u8 capability_mask[0x20];
+
+ u8 reserved_at_40[0x8];
+ u8 link_width_enabled[0x8];
+ u8 link_speed_enabled[0x10];
+
+ u8 lane0_physical_position[0x8];
+ u8 link_width_active[0x8];
+ u8 link_speed_active[0x10];
+
+ u8 num_of_pfs[0x10];
+ u8 num_of_vfs[0x10];
+
+ u8 bdf0[0x10];
+ u8 reserved_at_b0[0x10];
+
+ u8 max_read_request_size[0x4];
+ u8 max_payload_size[0x4];
+ u8 reserved_at_c8[0x5];
+ u8 pwr_status[0x3];
+ u8 port_type[0x4];
+ u8 reserved_at_d4[0xb];
+ u8 lane_reversal[0x1];
+
+ u8 reserved_at_e0[0x14];
+ u8 pci_power[0xc];
+
+ u8 reserved_at_100[0x20];
+
+ u8 device_status[0x10];
+ u8 port_state[0x8];
+ u8 reserved_at_138[0x8];
+
+ u8 reserved_at_140[0x10];
+ u8 receiver_detect_result[0x10];
+
+ u8 reserved_at_160[0x20];
+};
+
struct mlx5_ifc_mpcnt_reg_bits {
u8 reserved_at_0[0x8];
u8 pcie_index[0x8];
@@ -8345,7 +8456,9 @@ struct mlx5_ifc_pcam_reg_bits {
};
struct mlx5_ifc_mcam_enhanced_features_bits {
- u8 reserved_at_0[0x74];
+ u8 reserved_at_0[0x6e];
+ u8 pci_status_and_power[0x1];
+ u8 reserved_at_6f[0x5];
u8 mark_tx_action_cnp[0x1];
u8 mark_tx_action_cqe[0x1];
u8 dynamic_tx_overflow[0x1];
@@ -8953,6 +9066,7 @@ union mlx5_ifc_ports_control_registers_document_bits {
struct mlx5_ifc_pmtu_reg_bits pmtu_reg;
struct mlx5_ifc_ppad_reg_bits ppad_reg;
struct mlx5_ifc_ppcnt_reg_bits ppcnt_reg;
+ struct mlx5_ifc_mpein_reg_bits mpein_reg;
struct mlx5_ifc_mpcnt_reg_bits mpcnt_reg;
struct mlx5_ifc_pplm_reg_bits pplm_reg;
struct mlx5_ifc_pplr_reg_bits pplr_reg;
@@ -9442,6 +9556,33 @@ struct mlx5_ifc_uctx_bits {
u8 reserved_at_20[0x160];
};
+struct mlx5_ifc_sw_icm_bits {
+ u8 modify_field_select[0x40];
+
+ u8 reserved_at_40[0x18];
+ u8 log_sw_icm_size[0x8];
+
+ u8 reserved_at_60[0x20];
+
+ u8 sw_icm_start_addr[0x40];
+
+ u8 reserved_at_c0[0x140];
+};
+
+struct mlx5_ifc_geneve_tlv_option_bits {
+ u8 modify_field_select[0x40];
+
+ u8 reserved_at_40[0x18];
+ u8 geneve_option_fte_index[0x8];
+
+ u8 option_class[0x10];
+ u8 option_type[0x8];
+ u8 reserved_at_78[0x3];
+ u8 option_data_length[0x5];
+
+ u8 reserved_at_80[0x180];
+};
+
struct mlx5_ifc_create_umem_in_bits {
u8 opcode[0x10];
u8 uid[0x10];
@@ -9479,6 +9620,16 @@ struct mlx5_ifc_destroy_uctx_in_bits {
u8 reserved_at_60[0x20];
};
+struct mlx5_ifc_create_sw_icm_in_bits {
+ struct mlx5_ifc_general_obj_in_cmd_hdr_bits hdr;
+ struct mlx5_ifc_sw_icm_bits sw_icm;
+};
+
+struct mlx5_ifc_create_geneve_tlv_option_in_bits {
+ struct mlx5_ifc_general_obj_in_cmd_hdr_bits hdr;
+ struct mlx5_ifc_geneve_tlv_option_bits geneve_tlv_opt;
+};
+
struct mlx5_ifc_mtrc_string_db_param_bits {
u8 string_db_base_address[0x20];
diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h
index 64e78394fc9c..de9a272c9f3d 100644
--- a/include/linux/mlx5/port.h
+++ b/include/linux/mlx5/port.h
@@ -60,6 +60,7 @@ enum mlx5_an_status {
#define MLX5_I2C_ADDR_LOW 0x50
#define MLX5_I2C_ADDR_HIGH 0x51
#define MLX5_EEPROM_PAGE_LENGTH 256
+#define MLX5_EEPROM_HIGH_PAGE_LENGTH 128
enum mlx5e_link_mode {
MLX5E_1000BASE_CX_SGMII = 0,
diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h
index 0343c81d4c5f..3ba4edbd17a6 100644
--- a/include/linux/mlx5/qp.h
+++ b/include/linux/mlx5/qp.h
@@ -395,6 +395,7 @@ struct mlx5_wqe_signature_seg {
struct mlx5_wqe_inline_seg {
__be32 byte_count;
+ __be32 data[0];
};
enum mlx5_sig_type {
diff --git a/include/linux/mlx5/transobj.h b/include/linux/mlx5/transobj.h
index a261d5528ff7..dc6b1e7cb8c4 100644
--- a/include/linux/mlx5/transobj.h
+++ b/include/linux/mlx5/transobj.h
@@ -50,6 +50,9 @@ int mlx5_core_query_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *out);
int mlx5_core_query_sq_state(struct mlx5_core_dev *dev, u32 sqn, u8 *state);
int mlx5_core_create_tir(struct mlx5_core_dev *dev, u32 *in, int inlen,
u32 *tirn);
+int mlx5_core_create_tir_out(struct mlx5_core_dev *dev,
+ u32 *in, int inlen,
+ u32 *out, int outlen);
int mlx5_core_modify_tir(struct mlx5_core_dev *dev, u32 tirn, u32 *in,
int inlen);
void mlx5_core_destroy_tir(struct mlx5_core_dev *dev, u32 tirn);
diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h
index 0eef548b9946..3d1c6cdbbba7 100644
--- a/include/linux/mlx5/vport.h
+++ b/include/linux/mlx5/vport.h
@@ -118,10 +118,6 @@ int mlx5_modify_nic_vport_promisc(struct mlx5_core_dev *mdev,
int promisc_uc,
int promisc_mc,
int promisc_all);
-int mlx5_query_nic_vport_vlans(struct mlx5_core_dev *dev,
- u16 vport,
- u16 vlans[],
- int *size);
int mlx5_modify_nic_vport_vlans(struct mlx5_core_dev *dev,
u16 vlans[],
int list_size);
diff --git a/include/linux/mmc/sdio_func.h b/include/linux/mmc/sdio_func.h
index 97ca105347a6..5685805533b5 100644
--- a/include/linux/mmc/sdio_func.h
+++ b/include/linux/mmc/sdio_func.h
@@ -111,6 +111,18 @@ struct sdio_driver {
extern int sdio_register_driver(struct sdio_driver *);
extern void sdio_unregister_driver(struct sdio_driver *);
+/**
+ * module_sdio_driver() - Helper macro for registering a SDIO driver
+ * @__sdio_driver: sdio_driver struct
+ *
+ * Helper macro for SDIO drivers which do not do anything special in module
+ * init/exit. This eliminates a lot of boilerplate. Each module may only
+ * use this macro once, and calling it replaces module_init() and module_exit()
+ */
+#define module_sdio_driver(__sdio_driver) \
+ module_driver(__sdio_driver, sdio_register_driver, \
+ sdio_unregister_driver)
+
/*
* SDIO I/O operations
*/
diff --git a/include/linux/mmc/sdio_ids.h b/include/linux/mmc/sdio_ids.h
index 4332199c71c2..d1a5d5df02f5 100644
--- a/include/linux/mmc/sdio_ids.h
+++ b/include/linux/mmc/sdio_ids.h
@@ -59,6 +59,8 @@
#define SDIO_DEVICE_ID_MARVELL_8797_F0 0x9128
#define SDIO_DEVICE_ID_MARVELL_8887WLAN 0x9134
+#define SDIO_VENDOR_ID_MEDIATEK 0x037a
+
#define SDIO_VENDOR_ID_SIANO 0x039a
#define SDIO_DEVICE_ID_SIANO_NOVA_B0 0x0201
#define SDIO_DEVICE_ID_SIANO_NICE 0x0202
diff --git a/include/linux/net.h b/include/linux/net.h
index c606c72311d0..50bf5206ead6 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -161,6 +161,8 @@ struct proto_ops {
int (*compat_ioctl) (struct socket *sock, unsigned int cmd,
unsigned long arg);
#endif
+ int (*gettstamp) (struct socket *sock, void __user *userstamp,
+ bool timeval, bool time32);
int (*listen) (struct socket *sock, int len);
int (*shutdown) (struct socket *sock, int flags);
int (*setsockopt)(struct socket *sock, int level,
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 324e872c91d1..44b47e9df94a 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -194,8 +194,8 @@ struct net_device_stats {
#ifdef CONFIG_RPS
#include <linux/static_key.h>
-extern struct static_key rps_needed;
-extern struct static_key rfs_needed;
+extern struct static_key_false rps_needed;
+extern struct static_key_false rfs_needed;
#endif
struct neighbour;
@@ -914,34 +914,13 @@ struct xfrmdev_ops {
};
#endif
-#if IS_ENABLED(CONFIG_TLS_DEVICE)
-enum tls_offload_ctx_dir {
- TLS_OFFLOAD_CTX_DIR_RX,
- TLS_OFFLOAD_CTX_DIR_TX,
-};
-
-struct tls_crypto_info;
-struct tls_context;
-
-struct tlsdev_ops {
- int (*tls_dev_add)(struct net_device *netdev, struct sock *sk,
- enum tls_offload_ctx_dir direction,
- struct tls_crypto_info *crypto_info,
- u32 start_offload_tcp_sn);
- void (*tls_dev_del)(struct net_device *netdev,
- struct tls_context *ctx,
- enum tls_offload_ctx_dir direction);
- void (*tls_dev_resync_rx)(struct net_device *netdev,
- struct sock *sk, u32 seq, u64 rcd_sn);
-};
-#endif
-
struct dev_ifalias {
struct rcu_head rcuhead;
char ifalias[];
};
struct devlink;
+struct tlsdev_ops;
/*
* This structure defines the management hooks for network devices.
@@ -986,8 +965,7 @@ struct devlink;
* those the driver believes to be appropriate.
*
* u16 (*ndo_select_queue)(struct net_device *dev, struct sk_buff *skb,
- * struct net_device *sb_dev,
- * select_queue_fallback_t fallback);
+ * struct net_device *sb_dev);
* Called to decide which queue to use when device supports multiple
* transmit queues.
*
@@ -1251,8 +1229,8 @@ struct devlink;
* that got dropped are freed/returned via xdp_return_frame().
* Returns negative number, means general error invoking ndo, meaning
* no frames were xmit'ed and core-caller will free all frames.
- * struct devlink *(*ndo_get_devlink)(struct net_device *dev);
- * Get devlink instance associated with a given netdev.
+ * struct devlink_port *(*ndo_get_devlink_port)(struct net_device *dev);
+ * Get devlink port instance associated with a given netdev.
* Called with a reference on the netdevice and devlink locks only,
* rtnl_lock is not held.
*/
@@ -1268,8 +1246,7 @@ struct net_device_ops {
netdev_features_t features);
u16 (*ndo_select_queue)(struct net_device *dev,
struct sk_buff *skb,
- struct net_device *sb_dev,
- select_queue_fallback_t fallback);
+ struct net_device *sb_dev);
void (*ndo_change_rx_flags)(struct net_device *dev,
int flags);
void (*ndo_set_rx_mode)(struct net_device *dev);
@@ -1453,7 +1430,7 @@ struct net_device_ops {
u32 flags);
int (*ndo_xsk_async_xmit)(struct net_device *dev,
u32 queue_id);
- struct devlink * (*ndo_get_devlink)(struct net_device *dev);
+ struct devlink_port * (*ndo_get_devlink_port)(struct net_device *dev);
};
/**
@@ -2155,9 +2132,11 @@ static inline void netdev_for_each_tx_queue(struct net_device *dev,
&qdisc_xmit_lock_key); \
}
-struct netdev_queue *netdev_pick_tx(struct net_device *dev,
- struct sk_buff *skb,
- struct net_device *sb_dev);
+u16 netdev_pick_tx(struct net_device *dev, struct sk_buff *skb,
+ struct net_device *sb_dev);
+struct netdev_queue *netdev_core_pick_tx(struct net_device *dev,
+ struct sk_buff *skb,
+ struct net_device *sb_dev);
/* returns the headroom that the master device needs to take in account
* when forwarding to this dev
@@ -2642,11 +2621,9 @@ void dev_close_many(struct list_head *head, bool unlink);
void dev_disable_lro(struct net_device *dev);
int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *newskb);
u16 dev_pick_tx_zero(struct net_device *dev, struct sk_buff *skb,
- struct net_device *sb_dev,
- select_queue_fallback_t fallback);
+ struct net_device *sb_dev);
u16 dev_pick_tx_cpu_id(struct net_device *dev, struct sk_buff *skb,
- struct net_device *sb_dev,
- select_queue_fallback_t fallback);
+ struct net_device *sb_dev);
int dev_queue_xmit(struct sk_buff *skb);
int dev_queue_xmit_accel(struct sk_buff *skb, struct net_device *sb_dev);
int dev_direct_xmit(struct sk_buff *skb, u16 queue_id);
@@ -2664,14 +2641,6 @@ void netdev_freemem(struct net_device *dev);
void synchronize_net(void);
int init_dummy_netdev(struct net_device *dev);
-DECLARE_PER_CPU(int, xmit_recursion);
-#define XMIT_RECURSION_LIMIT 10
-
-static inline int dev_recursion_level(void)
-{
- return this_cpu_read(xmit_recursion);
-}
-
struct net_device *dev_get_by_index(struct net *net, int ifindex);
struct net_device *__dev_get_by_index(struct net *net, int ifindex);
struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex);
@@ -3020,6 +2989,11 @@ struct softnet_data {
#ifdef CONFIG_XFRM_OFFLOAD
struct sk_buff_head xfrm_backlog;
#endif
+ /* written and read only by owning cpu: */
+ struct {
+ u16 recursion;
+ u8 more;
+ } xmit;
#ifdef CONFIG_RPS
/* input_queue_head should be written by cpu owning this struct,
* and only read by other cpus. Worth using a cache line.
@@ -3055,6 +3029,28 @@ static inline void input_queue_tail_incr_save(struct softnet_data *sd,
DECLARE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);
+static inline int dev_recursion_level(void)
+{
+ return this_cpu_read(softnet_data.xmit.recursion);
+}
+
+#define XMIT_RECURSION_LIMIT 10
+static inline bool dev_xmit_recursion(void)
+{
+ return unlikely(__this_cpu_read(softnet_data.xmit.recursion) >
+ XMIT_RECURSION_LIMIT);
+}
+
+static inline void dev_xmit_recursion_inc(void)
+{
+ __this_cpu_inc(softnet_data.xmit.recursion);
+}
+
+static inline void dev_xmit_recursion_dec(void)
+{
+ __this_cpu_dec(softnet_data.xmit.recursion);
+}
+
void __netif_schedule(struct Qdisc *q);
void netif_schedule_queue(struct netdev_queue *txq);
@@ -4410,10 +4406,15 @@ static inline netdev_tx_t __netdev_start_xmit(const struct net_device_ops *ops,
struct sk_buff *skb, struct net_device *dev,
bool more)
{
- skb->xmit_more = more ? 1 : 0;
+ __this_cpu_write(softnet_data.xmit.more, more);
return ops->ndo_start_xmit(skb, dev);
}
+static inline bool netdev_xmit_more(void)
+{
+ return __this_cpu_read(softnet_data.xmit.more);
+}
+
static inline netdev_tx_t netdev_start_xmit(struct sk_buff *skb, struct net_device *dev,
struct netdev_queue *txq, bool more)
{
diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 72cb19c3db6a..996bc247ef6e 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -24,20 +24,36 @@ static inline int NF_DROP_GETERR(int verdict)
static inline int nf_inet_addr_cmp(const union nf_inet_addr *a1,
const union nf_inet_addr *a2)
{
+#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64
+ const unsigned long *ul1 = (const unsigned long *)a1;
+ const unsigned long *ul2 = (const unsigned long *)a2;
+
+ return ((ul1[0] ^ ul2[0]) | (ul1[1] ^ ul2[1])) == 0UL;
+#else
return a1->all[0] == a2->all[0] &&
a1->all[1] == a2->all[1] &&
a1->all[2] == a2->all[2] &&
a1->all[3] == a2->all[3];
+#endif
}
static inline void nf_inet_addr_mask(const union nf_inet_addr *a1,
union nf_inet_addr *result,
const union nf_inet_addr *mask)
{
+#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64
+ const unsigned long *ua = (const unsigned long *)a1;
+ unsigned long *ur = (unsigned long *)result;
+ const unsigned long *um = (const unsigned long *)mask;
+
+ ur[0] = ua[0] & um[0];
+ ur[1] = ua[1] & um[1];
+#else
result->all[0] = a1->all[0] & mask->all[0];
result->all[1] = a1->all[1] & mask->all[1];
result->all[2] = a1->all[2] & mask->all[2];
result->all[3] = a1->all[3] & mask->all[3];
+#endif
}
int netfilter_init(void);
@@ -360,7 +376,7 @@ extern struct nf_nat_hook __rcu *nf_nat_hook;
static inline void
nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family)
{
-#ifdef CONFIG_NF_NAT_NEEDED
+#if IS_ENABLED(CONFIG_NF_NAT)
struct nf_nat_hook *nat_hook;
rcu_read_lock();
diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index f2e1e6b13ca4..e499d170f12d 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -401,33 +401,30 @@ ip_set_get_h16(const struct nlattr *attr)
return ntohs(nla_get_be16(attr));
}
-#define ipset_nest_start(skb, attr) nla_nest_start(skb, attr | NLA_F_NESTED)
-#define ipset_nest_end(skb, start) nla_nest_end(skb, start)
-
static inline int nla_put_ipaddr4(struct sk_buff *skb, int type, __be32 ipaddr)
{
- struct nlattr *__nested = ipset_nest_start(skb, type);
+ struct nlattr *__nested = nla_nest_start(skb, type);
int ret;
if (!__nested)
return -EMSGSIZE;
ret = nla_put_in_addr(skb, IPSET_ATTR_IPADDR_IPV4, ipaddr);
if (!ret)
- ipset_nest_end(skb, __nested);
+ nla_nest_end(skb, __nested);
return ret;
}
static inline int nla_put_ipaddr6(struct sk_buff *skb, int type,
const struct in6_addr *ipaddrptr)
{
- struct nlattr *__nested = ipset_nest_start(skb, type);
+ struct nlattr *__nested = nla_nest_start(skb, type);
int ret;
if (!__nested)
return -EMSGSIZE;
ret = nla_put_in6_addr(skb, IPSET_ATTR_IPADDR_IPV6, ipaddrptr);
if (!ret)
- ipset_nest_end(skb, __nested);
+ nla_nest_end(skb, __nested);
return ret;
}
diff --git a/include/linux/netfilter/nfnetlink_osf.h b/include/linux/netfilter/nfnetlink_osf.h
index c6000046c966..788613f36935 100644
--- a/include/linux/netfilter/nfnetlink_osf.h
+++ b/include/linux/netfilter/nfnetlink_osf.h
@@ -21,13 +21,18 @@ struct nf_osf_finger {
struct nf_osf_user_finger finger;
};
+struct nf_osf_data {
+ const char *genre;
+ const char *version;
+};
+
bool nf_osf_match(const struct sk_buff *skb, u_int8_t family,
int hooknum, struct net_device *in, struct net_device *out,
const struct nf_osf_info *info, struct net *net,
const struct list_head *nf_osf_fingers);
-const char *nf_osf_find(const struct sk_buff *skb,
- const struct list_head *nf_osf_fingers,
- const int ttl_check);
+bool nf_osf_find(const struct sk_buff *skb,
+ const struct list_head *nf_osf_fingers,
+ const int ttl_check, struct nf_osf_data *data);
#endif /* _NFOSF_H */
diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index bf384b3eedb8..1f852ef7b098 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -317,7 +317,6 @@ struct xt_table_info *xt_replace_table(struct xt_table *table,
int *error);
struct xt_match *xt_find_match(u8 af, const char *name, u8 revision);
-struct xt_target *xt_find_target(u8 af, const char *name, u8 revision);
struct xt_match *xt_request_find_match(u8 af, const char *name, u8 revision);
struct xt_target *xt_request_find_target(u8 af, const char *name, u8 revision);
int xt_find_revision(u8 af, const char *name, u8 revision, int target,
diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h
index 471e9467105b..12113e502656 100644
--- a/include/linux/netfilter_ipv6.h
+++ b/include/linux/netfilter_ipv6.h
@@ -87,6 +87,21 @@ static inline int nf_ip6_route(struct net *net, struct dst_entry **dst,
}
int ip6_route_me_harder(struct net *net, struct sk_buff *skb);
+
+static inline int nf_ip6_route_me_harder(struct net *net, struct sk_buff *skb)
+{
+#if IS_MODULE(CONFIG_IPV6)
+ const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops();
+
+ if (!v6_ops)
+ return -EHOSTUNREACH;
+
+ return v6_ops->route_me_harder(net, skb);
+#else
+ return ip6_route_me_harder(net, skb);
+#endif
+}
+
__sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook,
unsigned int dataoff, u_int8_t protocol);
diff --git a/include/linux/packing.h b/include/linux/packing.h
new file mode 100644
index 000000000000..54667735cc67
--- /dev/null
+++ b/include/linux/packing.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2016-2018, NXP Semiconductors
+ * Copyright (c) 2018-2019, Vladimir Oltean <olteanv@gmail.com>
+ */
+#ifndef _LINUX_PACKING_H
+#define _LINUX_PACKING_H
+
+#include <linux/types.h>
+#include <linux/bitops.h>
+
+#define QUIRK_MSB_ON_THE_RIGHT BIT(0)
+#define QUIRK_LITTLE_ENDIAN BIT(1)
+#define QUIRK_LSW32_IS_FIRST BIT(2)
+
+enum packing_op {
+ PACK,
+ UNPACK,
+};
+
+/**
+ * packing - Convert numbers (currently u64) between a packed and an unpacked
+ * format. Unpacked means laid out in memory in the CPU's native
+ * understanding of integers, while packed means anything else that
+ * requires translation.
+ *
+ * @pbuf: Pointer to a buffer holding the packed value.
+ * @uval: Pointer to an u64 holding the unpacked value.
+ * @startbit: The index (in logical notation, compensated for quirks) where
+ * the packed value starts within pbuf. Must be larger than, or
+ * equal to, endbit.
+ * @endbit: The index (in logical notation, compensated for quirks) where
+ * the packed value ends within pbuf. Must be smaller than, or equal
+ * to, startbit.
+ * @op: If PACK, then uval will be treated as const pointer and copied (packed)
+ * into pbuf, between startbit and endbit.
+ * If UNPACK, then pbuf will be treated as const pointer and the logical
+ * value between startbit and endbit will be copied (unpacked) to uval.
+ * @quirks: A bit mask of QUIRK_LITTLE_ENDIAN, QUIRK_LSW32_IS_FIRST and
+ * QUIRK_MSB_ON_THE_RIGHT.
+ *
+ * Return: 0 on success, EINVAL or ERANGE if called incorrectly. Assuming
+ * correct usage, return code may be discarded.
+ * If op is PACK, pbuf is modified.
+ * If op is UNPACK, uval is modified.
+ */
+int packing(void *pbuf, u64 *uval, int startbit, int endbit, size_t pbuflen,
+ enum packing_op op, u8 quirks);
+
+#endif
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 34084892a466..073fb151b5a9 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -308,13 +308,7 @@ struct phy_device *mdiobus_scan(struct mii_bus *bus, int addr);
*
* HALTED: PHY is up, but no polling or interrupts are done. Or
* PHY is in an error state.
- *
- * - phy_start moves to RESUMING
- *
- * RESUMING: PHY was halted, but now wants to run again.
- * - If we are forcing, or aneg is done, timer moves to RUNNING
- * - If aneg is not done, timer moves to AN
- * - phy_stop moves to HALTED
+ * - phy_start moves to UP
*/
enum phy_state {
PHY_DOWN = 0,
@@ -324,7 +318,6 @@ enum phy_state {
PHY_RUNNING,
PHY_NOLINK,
PHY_FORCING,
- PHY_RESUMING
};
/**
@@ -345,6 +338,7 @@ struct phy_c45_device_ids {
* is_c45: Set to true if this phy uses clause 45 addressing.
* is_internal: Set to true if this phy is internal to a MAC.
* is_pseudo_fixed_link: Set to true if this phy is an Ethernet switch, etc.
+ * is_gigabit_capable: Set to true if PHY supports 1000Mbps
* has_fixups: Set to true if this phy has fixups/quirks.
* suspended: Set to true if this phy has been suspended successfully.
* sysfs_links: Internal boolean tracking sysfs symbolic links setup/removal.
@@ -382,6 +376,7 @@ struct phy_device {
unsigned is_c45:1;
unsigned is_internal:1;
unsigned is_pseudo_fixed_link:1;
+ unsigned is_gigabit_capable:1;
unsigned has_fixups:1;
unsigned suspended:1;
unsigned sysfs_links:1;
@@ -390,6 +385,7 @@ struct phy_device {
unsigned autoneg:1;
/* The most recently read link state */
unsigned link:1;
+ unsigned autoneg_complete:1;
/* Interrupts are enabled */
unsigned interrupts:1;
@@ -1075,6 +1071,7 @@ void phy_attached_info(struct phy_device *phydev);
/* Clause 22 PHY */
int genphy_config_init(struct phy_device *phydev);
+int genphy_read_abilities(struct phy_device *phydev);
int genphy_setup_forced(struct phy_device *phydev);
int genphy_restart_aneg(struct phy_device *phydev);
int genphy_config_eee_advert(struct phy_device *phydev);
@@ -1150,6 +1147,7 @@ void phy_request_interrupt(struct phy_device *phydev);
void phy_print_status(struct phy_device *phydev);
int phy_set_max_speed(struct phy_device *phydev, u32 max_speed);
void phy_remove_link_mode(struct phy_device *phydev, u32 link_mode);
+void phy_advertise_supported(struct phy_device *phydev);
void phy_support_sym_pause(struct phy_device *phydev);
void phy_support_asym_pause(struct phy_device *phydev);
void phy_set_sym_pause(struct phy_device *phydev, bool rx, bool tx,
diff --git a/include/linux/platform_data/macb.h b/include/linux/platform_data/macb.h
index 7815d50c26ff..2bc51b822956 100644
--- a/include/linux/platform_data/macb.h
+++ b/include/linux/platform_data/macb.h
@@ -12,19 +12,10 @@
/**
* struct macb_platform_data - platform data for MACB Ethernet
- * @phy_mask: phy mask passed when register the MDIO bus
- * within the driver
- * @phy_irq_pin: PHY IRQ
- * @is_rmii: using RMII interface?
- * @rev_eth_addr: reverse Ethernet address byte order
* @pclk: platform clock
* @hclk: AHB clock
*/
struct macb_platform_data {
- u32 phy_mask;
- int phy_irq_pin;
- u8 is_rmii;
- u8 rev_eth_addr;
struct clk *pclk;
struct clk *hclk;
};
diff --git a/include/linux/platform_data/mv88e6xxx.h b/include/linux/platform_data/mv88e6xxx.h
index 963730b44aea..21452a9365e1 100644
--- a/include/linux/platform_data/mv88e6xxx.h
+++ b/include/linux/platform_data/mv88e6xxx.h
@@ -13,6 +13,7 @@ struct dsa_mv88e6xxx_pdata {
unsigned int enabled_ports;
struct net_device *netdev;
u32 eeprom_len;
+ int irq;
};
#endif
diff --git a/include/linux/platform_data/xilinx-ll-temac.h b/include/linux/platform_data/xilinx-ll-temac.h
new file mode 100644
index 000000000000..368530f98176
--- /dev/null
+++ b/include/linux/platform_data/xilinx-ll-temac.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __LINUX_XILINX_LL_TEMAC_H
+#define __LINUX_XILINX_LL_TEMAC_H
+
+#include <linux/if_ether.h>
+#include <linux/phy.h>
+
+struct ll_temac_platform_data {
+ bool txcsum; /* Enable/disable TX checksum */
+ bool rxcsum; /* Enable/disable RX checksum */
+ u8 mac_addr[ETH_ALEN]; /* MAC address (6 bytes) */
+ /* Clock frequency for input to MDIO clock generator */
+ u32 mdio_clk_freq;
+ unsigned long long mdio_bus_id; /* Unique id for MDIO bus */
+ int phy_addr; /* Address of the PHY to connect to */
+ phy_interface_t phy_interface; /* PHY interface mode */
+ bool reg_little_endian; /* Little endian TEMAC register access */
+ bool dma_little_endian; /* Little endian DMA register access */
+ /* Pre-initialized mutex to use for synchronizing indirect
+ * register access. When using both interfaces of a single
+ * TEMAC IP block, the same mutex should be passed here, as
+ * they share the same DCR bus bridge.
+ */
+ struct mutex *indirect_mutex;
+ /* DMA channel control setup */
+ u8 tx_irq_timeout; /* TX Interrupt Delay Time-out */
+ u8 tx_irq_count; /* TX Interrupt Coalescing Threshold Count */
+ u8 rx_irq_timeout; /* RX Interrupt Delay Time-out */
+ u8 rx_irq_count; /* RX Interrupt Coalescing Threshold Count */
+};
+
+#endif /* __LINUX_XILINX_LL_TEMAC_H */
diff --git a/include/linux/rhashtable-types.h b/include/linux/rhashtable-types.h
index 763d613ce2c2..57467cbf4c5b 100644
--- a/include/linux/rhashtable-types.h
+++ b/include/linux/rhashtable-types.h
@@ -48,7 +48,6 @@ typedef int (*rht_obj_cmpfn_t)(struct rhashtable_compare_arg *arg,
* @head_offset: Offset of rhash_head in struct to be hashed
* @max_size: Maximum size while expanding
* @min_size: Minimum size while shrinking
- * @locks_mul: Number of bucket locks to allocate per cpu (default: 32)
* @automatic_shrinking: Enable automatic shrinking of tables
* @hashfn: Hash function (default: jhash2 if !(key_len % 4), or jhash)
* @obj_hashfn: Function to hash object
@@ -62,7 +61,6 @@ struct rhashtable_params {
unsigned int max_size;
u16 min_size;
bool automatic_shrinking;
- u8 locks_mul;
rht_hashfn_t hashfn;
rht_obj_hashfn_t obj_hashfn;
rht_obj_cmpfn_t obj_cmpfn;
diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index ae9c0f71f311..f7714d3b46bd 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -24,12 +24,27 @@
#include <linux/list_nulls.h>
#include <linux/workqueue.h>
#include <linux/rculist.h>
+#include <linux/bit_spinlock.h>
#include <linux/rhashtable-types.h>
/*
+ * Objects in an rhashtable have an embedded struct rhash_head
+ * which is linked into as hash chain from the hash table - or one
+ * of two or more hash tables when the rhashtable is being resized.
* The end of the chain is marked with a special nulls marks which has
- * the least significant bit set.
+ * the least significant bit set but otherwise stores the address of
+ * the hash bucket. This allows us to be be sure we've found the end
+ * of the right list.
+ * The value stored in the hash bucket has BIT(0) used as a lock bit.
+ * This bit must be atomically set before any changes are made to
+ * the chain. To avoid dereferencing this pointer without clearing
+ * the bit first, we use an opaque 'struct rhash_lock_head *' for the
+ * pointer stored in the bucket. This struct needs to be defined so
+ * that rcu_dereference() works on it, but it has no content so a
+ * cast is needed for it to be useful. This ensures it isn't
+ * used by mistake with clearing the lock bit first.
*/
+struct rhash_lock_head {};
/* Maximum chain length before rehash
*
@@ -52,8 +67,6 @@
* @nest: Number of bits of first-level nested table.
* @rehash: Current bucket being rehashed
* @hash_rnd: Random seed to fold into hash
- * @locks_mask: Mask to apply before accessing locks[]
- * @locks: Array of spinlocks protecting individual buckets
* @walkers: List of active walkers
* @rcu: RCU structure for freeing the table
* @future_tbl: Table under construction during rehashing
@@ -63,31 +76,34 @@
struct bucket_table {
unsigned int size;
unsigned int nest;
- unsigned int rehash;
u32 hash_rnd;
- unsigned int locks_mask;
- spinlock_t *locks;
struct list_head walkers;
struct rcu_head rcu;
struct bucket_table __rcu *future_tbl;
- struct rhash_head __rcu *buckets[] ____cacheline_aligned_in_smp;
+ struct lockdep_map dep_map;
+
+ struct rhash_lock_head __rcu *buckets[] ____cacheline_aligned_in_smp;
};
/*
* NULLS_MARKER() expects a hash value with the low
* bits mostly likely to be significant, and it discards
* the msb.
- * We git it an address, in which the bottom 2 bits are
+ * We give it an address, in which the bottom bit is
* always 0, and the msb might be significant.
* So we shift the address down one bit to align with
* expectations and avoid losing a significant bit.
+ *
+ * We never store the NULLS_MARKER in the hash table
+ * itself as we need the lsb for locking.
+ * Instead we store a NULL
*/
#define RHT_NULLS_MARKER(ptr) \
((void *)NULLS_MARKER(((unsigned long) (ptr)) >> 1))
#define INIT_RHT_NULLS_HEAD(ptr) \
- ((ptr) = RHT_NULLS_MARKER(&(ptr)))
+ ((ptr) = NULL)
static inline bool rht_is_a_nulls(const struct rhash_head *ptr)
{
@@ -207,25 +223,6 @@ static inline bool rht_grow_above_max(const struct rhashtable *ht,
return atomic_read(&ht->nelems) >= ht->max_elems;
}
-/* The bucket lock is selected based on the hash and protects mutations
- * on a group of hash buckets.
- *
- * A maximum of tbl->size/2 bucket locks is allocated. This ensures that
- * a single lock always covers both buckets which may both contains
- * entries which link to the same bucket of the old table during resizing.
- * This allows to simplify the locking as locking the bucket in both
- * tables during resize always guarantee protection.
- *
- * IMPORTANT: When holding the bucket lock of both the old and new table
- * during expansions and shrinking, the old bucket lock must always be
- * acquired first.
- */
-static inline spinlock_t *rht_bucket_lock(const struct bucket_table *tbl,
- unsigned int hash)
-{
- return &tbl->locks[hash & tbl->locks_mask];
-}
-
#ifdef CONFIG_PROVE_LOCKING
int lockdep_rht_mutex_is_held(struct rhashtable *ht);
int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash);
@@ -264,11 +261,13 @@ void rhashtable_free_and_destroy(struct rhashtable *ht,
void *arg);
void rhashtable_destroy(struct rhashtable *ht);
-struct rhash_head __rcu **rht_bucket_nested(const struct bucket_table *tbl,
- unsigned int hash);
-struct rhash_head __rcu **rht_bucket_nested_insert(struct rhashtable *ht,
- struct bucket_table *tbl,
+struct rhash_lock_head __rcu **rht_bucket_nested(const struct bucket_table *tbl,
+ unsigned int hash);
+struct rhash_lock_head __rcu **__rht_bucket_nested(const struct bucket_table *tbl,
unsigned int hash);
+struct rhash_lock_head __rcu **rht_bucket_nested_insert(struct rhashtable *ht,
+ struct bucket_table *tbl,
+ unsigned int hash);
#define rht_dereference(p, ht) \
rcu_dereference_protected(p, lockdep_rht_mutex_is_held(ht))
@@ -285,37 +284,136 @@ struct rhash_head __rcu **rht_bucket_nested_insert(struct rhashtable *ht,
#define rht_entry(tpos, pos, member) \
({ tpos = container_of(pos, typeof(*tpos), member); 1; })
-static inline struct rhash_head __rcu *const *rht_bucket(
+static inline struct rhash_lock_head __rcu *const *rht_bucket(
const struct bucket_table *tbl, unsigned int hash)
{
return unlikely(tbl->nest) ? rht_bucket_nested(tbl, hash) :
&tbl->buckets[hash];
}
-static inline struct rhash_head __rcu **rht_bucket_var(
+static inline struct rhash_lock_head __rcu **rht_bucket_var(
struct bucket_table *tbl, unsigned int hash)
{
- return unlikely(tbl->nest) ? rht_bucket_nested(tbl, hash) :
+ return unlikely(tbl->nest) ? __rht_bucket_nested(tbl, hash) :
&tbl->buckets[hash];
}
-static inline struct rhash_head __rcu **rht_bucket_insert(
+static inline struct rhash_lock_head __rcu **rht_bucket_insert(
struct rhashtable *ht, struct bucket_table *tbl, unsigned int hash)
{
return unlikely(tbl->nest) ? rht_bucket_nested_insert(ht, tbl, hash) :
&tbl->buckets[hash];
}
+/*
+ * We lock a bucket by setting BIT(0) in the pointer - this is always
+ * zero in real pointers. The NULLS mark is never stored in the bucket,
+ * rather we store NULL if the bucket is empty.
+ * bit_spin_locks do not handle contention well, but the whole point
+ * of the hashtable design is to achieve minimum per-bucket contention.
+ * A nested hash table might not have a bucket pointer. In that case
+ * we cannot get a lock. For remove and replace the bucket cannot be
+ * interesting and doesn't need locking.
+ * For insert we allocate the bucket if this is the last bucket_table,
+ * and then take the lock.
+ * Sometimes we unlock a bucket by writing a new pointer there. In that
+ * case we don't need to unlock, but we do need to reset state such as
+ * local_bh. For that we have rht_assign_unlock(). As rcu_assign_pointer()
+ * provides the same release semantics that bit_spin_unlock() provides,
+ * this is safe.
+ * When we write to a bucket without unlocking, we use rht_assign_locked().
+ */
+
+static inline void rht_lock(struct bucket_table *tbl,
+ struct rhash_lock_head **bkt)
+{
+ local_bh_disable();
+ bit_spin_lock(0, (unsigned long *)bkt);
+ lock_map_acquire(&tbl->dep_map);
+}
+
+static inline void rht_lock_nested(struct bucket_table *tbl,
+ struct rhash_lock_head **bucket,
+ unsigned int subclass)
+{
+ local_bh_disable();
+ bit_spin_lock(0, (unsigned long *)bucket);
+ lock_acquire_exclusive(&tbl->dep_map, subclass, 0, NULL, _THIS_IP_);
+}
+
+static inline void rht_unlock(struct bucket_table *tbl,
+ struct rhash_lock_head **bkt)
+{
+ lock_map_release(&tbl->dep_map);
+ bit_spin_unlock(0, (unsigned long *)bkt);
+ local_bh_enable();
+}
+
+/*
+ * Where 'bkt' is a bucket and might be locked:
+ * rht_ptr() dereferences that pointer and clears the lock bit.
+ * rht_ptr_exclusive() dereferences in a context where exclusive
+ * access is guaranteed, such as when destroying the table.
+ */
+static inline struct rhash_head *rht_ptr(
+ struct rhash_lock_head __rcu * const *bkt,
+ struct bucket_table *tbl,
+ unsigned int hash)
+{
+ const struct rhash_lock_head *p =
+ rht_dereference_bucket_rcu(*bkt, tbl, hash);
+
+ if ((((unsigned long)p) & ~BIT(0)) == 0)
+ return RHT_NULLS_MARKER(bkt);
+ return (void *)(((unsigned long)p) & ~BIT(0));
+}
+
+static inline struct rhash_head *rht_ptr_exclusive(
+ struct rhash_lock_head __rcu * const *bkt)
+{
+ const struct rhash_lock_head *p =
+ rcu_dereference_protected(*bkt, 1);
+
+ if (!p)
+ return RHT_NULLS_MARKER(bkt);
+ return (void *)(((unsigned long)p) & ~BIT(0));
+}
+
+static inline void rht_assign_locked(struct rhash_lock_head __rcu **bkt,
+ struct rhash_head *obj)
+{
+ struct rhash_head __rcu **p = (struct rhash_head __rcu **)bkt;
+
+ if (rht_is_a_nulls(obj))
+ obj = NULL;
+ rcu_assign_pointer(*p, (void *)((unsigned long)obj | BIT(0)));
+}
+
+static inline void rht_assign_unlock(struct bucket_table *tbl,
+ struct rhash_lock_head __rcu **bkt,
+ struct rhash_head *obj)
+{
+ struct rhash_head __rcu **p = (struct rhash_head __rcu **)bkt;
+
+ if (rht_is_a_nulls(obj))
+ obj = NULL;
+ lock_map_release(&tbl->dep_map);
+ rcu_assign_pointer(*p, obj);
+ preempt_enable();
+ __release(bitlock);
+ local_bh_enable();
+}
+
/**
- * rht_for_each_continue - continue iterating over hash chain
+ * rht_for_each_from - iterate over hash chain from given head
* @pos: the &struct rhash_head to use as a loop cursor.
- * @head: the previous &struct rhash_head to continue from
+ * @head: the &struct rhash_head to start from
* @tbl: the &struct bucket_table
* @hash: the hash value / bucket index
*/
-#define rht_for_each_continue(pos, head, tbl, hash) \
- for (pos = rht_dereference_bucket(head, tbl, hash); \
- !rht_is_a_nulls(pos); \
+#define rht_for_each_from(pos, head, tbl, hash) \
+ for (pos = head; \
+ !rht_is_a_nulls(pos); \
pos = rht_dereference_bucket((pos)->next, tbl, hash))
/**
@@ -325,19 +423,20 @@ static inline struct rhash_head __rcu **rht_bucket_insert(
* @hash: the hash value / bucket index
*/
#define rht_for_each(pos, tbl, hash) \
- rht_for_each_continue(pos, *rht_bucket(tbl, hash), tbl, hash)
+ rht_for_each_from(pos, rht_ptr(rht_bucket(tbl, hash), tbl, hash), \
+ tbl, hash)
/**
- * rht_for_each_entry_continue - continue iterating over hash chain
+ * rht_for_each_entry_from - iterate over hash chain from given head
* @tpos: the type * to use as a loop cursor.
* @pos: the &struct rhash_head to use as a loop cursor.
- * @head: the previous &struct rhash_head to continue from
+ * @head: the &struct rhash_head to start from
* @tbl: the &struct bucket_table
* @hash: the hash value / bucket index
* @member: name of the &struct rhash_head within the hashable struct.
*/
-#define rht_for_each_entry_continue(tpos, pos, head, tbl, hash, member) \
- for (pos = rht_dereference_bucket(head, tbl, hash); \
+#define rht_for_each_entry_from(tpos, pos, head, tbl, hash, member) \
+ for (pos = head; \
(!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member); \
pos = rht_dereference_bucket((pos)->next, tbl, hash))
@@ -350,8 +449,9 @@ static inline struct rhash_head __rcu **rht_bucket_insert(
* @member: name of the &struct rhash_head within the hashable struct.
*/
#define rht_for_each_entry(tpos, pos, tbl, hash, member) \
- rht_for_each_entry_continue(tpos, pos, *rht_bucket(tbl, hash), \
- tbl, hash, member)
+ rht_for_each_entry_from(tpos, pos, \
+ rht_ptr(rht_bucket(tbl, hash), tbl, hash), \
+ tbl, hash, member)
/**
* rht_for_each_entry_safe - safely iterate over hash chain of given type
@@ -366,7 +466,7 @@ static inline struct rhash_head __rcu **rht_bucket_insert(
* remove the loop cursor from the list.
*/
#define rht_for_each_entry_safe(tpos, pos, next, tbl, hash, member) \
- for (pos = rht_dereference_bucket(*rht_bucket(tbl, hash), tbl, hash), \
+ for (pos = rht_ptr(rht_bucket(tbl, hash), tbl, hash), \
next = !rht_is_a_nulls(pos) ? \
rht_dereference_bucket(pos->next, tbl, hash) : NULL; \
(!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member); \
@@ -375,9 +475,9 @@ static inline struct rhash_head __rcu **rht_bucket_insert(
rht_dereference_bucket(pos->next, tbl, hash) : NULL)
/**
- * rht_for_each_rcu_continue - continue iterating over rcu hash chain
+ * rht_for_each_rcu_from - iterate over rcu hash chain from given head
* @pos: the &struct rhash_head to use as a loop cursor.
- * @head: the previous &struct rhash_head to continue from
+ * @head: the &struct rhash_head to start from
* @tbl: the &struct bucket_table
* @hash: the hash value / bucket index
*
@@ -385,9 +485,9 @@ static inline struct rhash_head __rcu **rht_bucket_insert(
* the _rcu mutation primitives such as rhashtable_insert() as long as the
* traversal is guarded by rcu_read_lock().
*/
-#define rht_for_each_rcu_continue(pos, head, tbl, hash) \
+#define rht_for_each_rcu_from(pos, head, tbl, hash) \
for (({barrier(); }), \
- pos = rht_dereference_bucket_rcu(head, tbl, hash); \
+ pos = head; \
!rht_is_a_nulls(pos); \
pos = rcu_dereference_raw(pos->next))
@@ -401,14 +501,17 @@ static inline struct rhash_head __rcu **rht_bucket_insert(
* the _rcu mutation primitives such as rhashtable_insert() as long as the
* traversal is guarded by rcu_read_lock().
*/
-#define rht_for_each_rcu(pos, tbl, hash) \
- rht_for_each_rcu_continue(pos, *rht_bucket(tbl, hash), tbl, hash)
+#define rht_for_each_rcu(pos, tbl, hash) \
+ for (({barrier(); }), \
+ pos = rht_ptr(rht_bucket(tbl, hash), tbl, hash); \
+ !rht_is_a_nulls(pos); \
+ pos = rcu_dereference_raw(pos->next))
/**
- * rht_for_each_entry_rcu_continue - continue iterating over rcu hash chain
+ * rht_for_each_entry_rcu_from - iterated over rcu hash chain from given head
* @tpos: the type * to use as a loop cursor.
* @pos: the &struct rhash_head to use as a loop cursor.
- * @head: the previous &struct rhash_head to continue from
+ * @head: the &struct rhash_head to start from
* @tbl: the &struct bucket_table
* @hash: the hash value / bucket index
* @member: name of the &struct rhash_head within the hashable struct.
@@ -417,9 +520,9 @@ static inline struct rhash_head __rcu **rht_bucket_insert(
* the _rcu mutation primitives such as rhashtable_insert() as long as the
* traversal is guarded by rcu_read_lock().
*/
-#define rht_for_each_entry_rcu_continue(tpos, pos, head, tbl, hash, member) \
+#define rht_for_each_entry_rcu_from(tpos, pos, head, tbl, hash, member) \
for (({barrier(); }), \
- pos = rht_dereference_bucket_rcu(head, tbl, hash); \
+ pos = head; \
(!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member); \
pos = rht_dereference_bucket_rcu(pos->next, tbl, hash))
@@ -436,8 +539,10 @@ static inline struct rhash_head __rcu **rht_bucket_insert(
* traversal is guarded by rcu_read_lock().
*/
#define rht_for_each_entry_rcu(tpos, pos, tbl, hash, member) \
- rht_for_each_entry_rcu_continue(tpos, pos, *rht_bucket(tbl, hash), \
- tbl, hash, member)
+ rht_for_each_entry_rcu_from(tpos, pos, \
+ rht_ptr(rht_bucket(tbl, hash), \
+ tbl, hash), \
+ tbl, hash, member)
/**
* rhl_for_each_rcu - iterate over rcu hash table list
@@ -482,7 +587,7 @@ static inline struct rhash_head *__rhashtable_lookup(
.ht = ht,
.key = key,
};
- struct rhash_head __rcu * const *head;
+ struct rhash_lock_head __rcu * const *bkt;
struct bucket_table *tbl;
struct rhash_head *he;
unsigned int hash;
@@ -490,9 +595,9 @@ static inline struct rhash_head *__rhashtable_lookup(
tbl = rht_dereference_rcu(ht->tbl, ht);
restart:
hash = rht_key_hashfn(ht, tbl, key, params);
- head = rht_bucket(tbl, hash);
+ bkt = rht_bucket(tbl, hash);
do {
- rht_for_each_rcu_continue(he, *head, tbl, hash) {
+ rht_for_each_rcu_from(he, rht_ptr(bkt, tbl, hash), tbl, hash) {
if (params.obj_cmpfn ?
params.obj_cmpfn(&arg, rht_obj(ht, he)) :
rhashtable_compare(&arg, rht_obj(ht, he)))
@@ -502,7 +607,7 @@ restart:
/* An object might have been moved to a different hash chain,
* while we walk along it - better check and retry.
*/
- } while (he != RHT_NULLS_MARKER(head));
+ } while (he != RHT_NULLS_MARKER(bkt));
/* Ensure we see any new tables. */
smp_rmb();
@@ -598,10 +703,10 @@ static inline void *__rhashtable_insert_fast(
.ht = ht,
.key = key,
};
+ struct rhash_lock_head __rcu **bkt;
struct rhash_head __rcu **pprev;
struct bucket_table *tbl;
struct rhash_head *head;
- spinlock_t *lock;
unsigned int hash;
int elasticity;
void *data;
@@ -610,23 +715,22 @@ static inline void *__rhashtable_insert_fast(
tbl = rht_dereference_rcu(ht->tbl, ht);
hash = rht_head_hashfn(ht, tbl, obj, params);
- lock = rht_bucket_lock(tbl, hash);
- spin_lock_bh(lock);
+ elasticity = RHT_ELASTICITY;
+ bkt = rht_bucket_insert(ht, tbl, hash);
+ data = ERR_PTR(-ENOMEM);
+ if (!bkt)
+ goto out;
+ pprev = NULL;
+ rht_lock(tbl, bkt);
if (unlikely(rcu_access_pointer(tbl->future_tbl))) {
slow_path:
- spin_unlock_bh(lock);
+ rht_unlock(tbl, bkt);
rcu_read_unlock();
return rhashtable_insert_slow(ht, key, obj);
}
- elasticity = RHT_ELASTICITY;
- pprev = rht_bucket_insert(ht, tbl, hash);
- data = ERR_PTR(-ENOMEM);
- if (!pprev)
- goto out;
-
- rht_for_each_continue(head, *pprev, tbl, hash) {
+ rht_for_each_from(head, rht_ptr(bkt, tbl, hash), tbl, hash) {
struct rhlist_head *plist;
struct rhlist_head *list;
@@ -642,7 +746,7 @@ slow_path:
data = rht_obj(ht, head);
if (!rhlist)
- goto out;
+ goto out_unlock;
list = container_of(obj, struct rhlist_head, rhead);
@@ -651,9 +755,13 @@ slow_path:
RCU_INIT_POINTER(list->next, plist);
head = rht_dereference_bucket(head->next, tbl, hash);
RCU_INIT_POINTER(list->rhead.next, head);
- rcu_assign_pointer(*pprev, obj);
-
- goto good;
+ if (pprev) {
+ rcu_assign_pointer(*pprev, obj);
+ rht_unlock(tbl, bkt);
+ } else
+ rht_assign_unlock(tbl, bkt, obj);
+ data = NULL;
+ goto out;
}
if (elasticity <= 0)
@@ -661,12 +769,13 @@ slow_path:
data = ERR_PTR(-E2BIG);
if (unlikely(rht_grow_above_max(ht, tbl)))
- goto out;
+ goto out_unlock;
if (unlikely(rht_grow_above_100(ht, tbl)))
goto slow_path;
- head = rht_dereference_bucket(*pprev, tbl, hash);
+ /* Inserting at head of list makes unlocking free. */
+ head = rht_ptr(bkt, tbl, hash);
RCU_INIT_POINTER(obj->next, head);
if (rhlist) {
@@ -676,20 +785,21 @@ slow_path:
RCU_INIT_POINTER(list->next, NULL);
}
- rcu_assign_pointer(*pprev, obj);
-
atomic_inc(&ht->nelems);
+ rht_assign_unlock(tbl, bkt, obj);
+
if (rht_grow_above_75(ht, tbl))
schedule_work(&ht->run_work);
-good:
data = NULL;
-
out:
- spin_unlock_bh(lock);
rcu_read_unlock();
return data;
+
+out_unlock:
+ rht_unlock(tbl, bkt);
+ goto out;
}
/**
@@ -698,9 +808,9 @@ out:
* @obj: pointer to hash head inside object
* @params: hash table parameters
*
- * Will take a per bucket spinlock to protect against mutual mutations
+ * Will take the per bucket bitlock to protect against mutual mutations
* on the same bucket. Multiple insertions may occur in parallel unless
- * they map to the same bucket lock.
+ * they map to the same bucket.
*
* It is safe to call this function from atomic context.
*
@@ -727,9 +837,9 @@ static inline int rhashtable_insert_fast(
* @list: pointer to hash list head inside object
* @params: hash table parameters
*
- * Will take a per bucket spinlock to protect against mutual mutations
+ * Will take the per bucket bitlock to protect against mutual mutations
* on the same bucket. Multiple insertions may occur in parallel unless
- * they map to the same bucket lock.
+ * they map to the same bucket.
*
* It is safe to call this function from atomic context.
*
@@ -750,9 +860,9 @@ static inline int rhltable_insert_key(
* @list: pointer to hash list head inside object
* @params: hash table parameters
*
- * Will take a per bucket spinlock to protect against mutual mutations
+ * Will take the per bucket bitlock to protect against mutual mutations
* on the same bucket. Multiple insertions may occur in parallel unless
- * they map to the same bucket lock.
+ * they map to the same bucket.
*
* It is safe to call this function from atomic context.
*
@@ -776,12 +886,6 @@ static inline int rhltable_insert(
* @obj: pointer to hash head inside object
* @params: hash table parameters
*
- * Locks down the bucket chain in both the old and new table if a resize
- * is in progress to ensure that writers can't remove from the old table
- * and can't insert to the new table during the atomic operation of search
- * and insertion. Searches for duplicates in both the old and new table if
- * a resize is in progress.
- *
* This lookup function may only be used for fixed key hash table (key_len
* parameter set). It will BUG() if used inappropriately.
*
@@ -837,12 +941,6 @@ static inline void *rhashtable_lookup_get_insert_fast(
* @obj: pointer to hash head inside object
* @params: hash table parameters
*
- * Locks down the bucket chain in both the old and new table if a resize
- * is in progress to ensure that writers can't remove from the old table
- * and can't insert to the new table during the atomic operation of search
- * and insertion. Searches for duplicates in both the old and new table if
- * a resize is in progress.
- *
* Lookups may occur in parallel with hashtable mutations and resizing.
*
* Will trigger an automatic deferred table resizing if residency in the
@@ -891,19 +989,20 @@ static inline int __rhashtable_remove_fast_one(
struct rhash_head *obj, const struct rhashtable_params params,
bool rhlist)
{
+ struct rhash_lock_head __rcu **bkt;
struct rhash_head __rcu **pprev;
struct rhash_head *he;
- spinlock_t * lock;
unsigned int hash;
int err = -ENOENT;
hash = rht_head_hashfn(ht, tbl, obj, params);
- lock = rht_bucket_lock(tbl, hash);
-
- spin_lock_bh(lock);
+ bkt = rht_bucket_var(tbl, hash);
+ if (!bkt)
+ return -ENOENT;
+ pprev = NULL;
+ rht_lock(tbl, bkt);
- pprev = rht_bucket_var(tbl, hash);
- rht_for_each_continue(he, *pprev, tbl, hash) {
+ rht_for_each_from(he, rht_ptr(bkt, tbl, hash), tbl, hash) {
struct rhlist_head *list;
list = container_of(he, struct rhlist_head, rhead);
@@ -943,12 +1042,17 @@ static inline int __rhashtable_remove_fast_one(
}
}
- rcu_assign_pointer(*pprev, obj);
- break;
+ if (pprev) {
+ rcu_assign_pointer(*pprev, obj);
+ rht_unlock(tbl, bkt);
+ } else {
+ rht_assign_unlock(tbl, bkt, obj);
+ }
+ goto unlocked;
}
- spin_unlock_bh(lock);
-
+ rht_unlock(tbl, bkt);
+unlocked:
if (err > 0) {
atomic_dec(&ht->nelems);
if (unlikely(ht->p.automatic_shrinking &&
@@ -1037,9 +1141,9 @@ static inline int __rhashtable_replace_fast(
struct rhash_head *obj_old, struct rhash_head *obj_new,
const struct rhashtable_params params)
{
+ struct rhash_lock_head __rcu **bkt;
struct rhash_head __rcu **pprev;
struct rhash_head *he;
- spinlock_t *lock;
unsigned int hash;
int err = -ENOENT;
@@ -1050,25 +1154,33 @@ static inline int __rhashtable_replace_fast(
if (hash != rht_head_hashfn(ht, tbl, obj_new, params))
return -EINVAL;
- lock = rht_bucket_lock(tbl, hash);
+ bkt = rht_bucket_var(tbl, hash);
+ if (!bkt)
+ return -ENOENT;
- spin_lock_bh(lock);
+ pprev = NULL;
+ rht_lock(tbl, bkt);
- pprev = rht_bucket_var(tbl, hash);
- rht_for_each_continue(he, *pprev, tbl, hash) {
+ rht_for_each_from(he, rht_ptr(bkt, tbl, hash), tbl, hash) {
if (he != obj_old) {
pprev = &he->next;
continue;
}
rcu_assign_pointer(obj_new->next, obj_old->next);
- rcu_assign_pointer(*pprev, obj_new);
+ if (pprev) {
+ rcu_assign_pointer(*pprev, obj_new);
+ rht_unlock(tbl, bkt);
+ } else {
+ rht_assign_unlock(tbl, bkt, obj_new);
+ }
err = 0;
- break;
+ goto unlocked;
}
- spin_unlock_bh(lock);
+ rht_unlock(tbl, bkt);
+unlocked:
return err;
}
diff --git a/include/linux/siphash.h b/include/linux/siphash.h
index fa7a6b9cedbf..bf21591a9e5e 100644
--- a/include/linux/siphash.h
+++ b/include/linux/siphash.h
@@ -21,6 +21,11 @@ typedef struct {
u64 key[2];
} siphash_key_t;
+static inline bool siphash_key_is_zero(const siphash_key_t *key)
+{
+ return !(key->key[0] | key->key[1]);
+}
+
u64 __siphash_aligned(const void *data, size_t len, const siphash_key_t *key);
#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
u64 __siphash_unaligned(const void *data, size_t len, const siphash_key_t *key);
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 9027a8c4219f..6d58fa8a65fd 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -657,7 +657,6 @@ typedef unsigned char *sk_buff_data_t;
* @tc_index: Traffic control index
* @hash: the packet hash
* @queue_mapping: Queue mapping for multiqueue devices
- * @xmit_more: More SKBs are pending for this queue
* @pfmemalloc: skbuff was allocated from PFMEMALLOC reserves
* @active_extensions: active extensions (skb_ext_id types)
* @ndisc_nodetype: router type (from link layer)
@@ -764,7 +763,6 @@ struct sk_buff {
fclone:2,
peeked:1,
head_frag:1,
- xmit_more:1,
pfmemalloc:1;
#ifdef CONFIG_SKB_EXTENSIONS
__u8 active_extensions;
@@ -1044,6 +1042,8 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t priority, int flags,
int node);
struct sk_buff *__build_skb(void *data, unsigned int frag_size);
struct sk_buff *build_skb(void *data, unsigned int frag_size);
+struct sk_buff *build_skb_around(struct sk_buff *skb,
+ void *data, unsigned int frag_size);
/**
* alloc_skb - allocate a network buffer
@@ -1258,11 +1258,19 @@ void skb_flow_dissector_init(struct flow_dissector *flow_dissector,
unsigned int key_count);
#ifdef CONFIG_NET
+int skb_flow_dissector_prog_query(const union bpf_attr *attr,
+ union bpf_attr __user *uattr);
int skb_flow_dissector_bpf_prog_attach(const union bpf_attr *attr,
struct bpf_prog *prog);
int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr);
#else
+static inline int skb_flow_dissector_prog_query(const union bpf_attr *attr,
+ union bpf_attr __user *uattr)
+{
+ return -EOPNOTSUPP;
+}
+
static inline int skb_flow_dissector_bpf_prog_attach(const union bpf_attr *attr,
struct bpf_prog *prog)
{
@@ -1275,12 +1283,12 @@ static inline int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr)
}
#endif
-struct bpf_flow_keys;
-bool __skb_flow_bpf_dissect(struct bpf_prog *prog,
- const struct sk_buff *skb,
- struct flow_dissector *flow_dissector,
- struct bpf_flow_keys *flow_keys);
-bool __skb_flow_dissect(const struct sk_buff *skb,
+struct bpf_flow_dissector;
+bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx,
+ __be16 proto, int nhoff, int hlen);
+
+bool __skb_flow_dissect(const struct net *net,
+ const struct sk_buff *skb,
struct flow_dissector *flow_dissector,
void *target_container,
void *data, __be16 proto, int nhoff, int hlen,
@@ -1290,8 +1298,8 @@ static inline bool skb_flow_dissect(const struct sk_buff *skb,
struct flow_dissector *flow_dissector,
void *target_container, unsigned int flags)
{
- return __skb_flow_dissect(skb, flow_dissector, target_container,
- NULL, 0, 0, 0, flags);
+ return __skb_flow_dissect(NULL, skb, flow_dissector,
+ target_container, NULL, 0, 0, 0, flags);
}
static inline bool skb_flow_dissect_flow_keys(const struct sk_buff *skb,
@@ -1299,18 +1307,19 @@ static inline bool skb_flow_dissect_flow_keys(const struct sk_buff *skb,
unsigned int flags)
{
memset(flow, 0, sizeof(*flow));
- return __skb_flow_dissect(skb, &flow_keys_dissector, flow,
- NULL, 0, 0, 0, flags);
+ return __skb_flow_dissect(NULL, skb, &flow_keys_dissector,
+ flow, NULL, 0, 0, 0, flags);
}
static inline bool
-skb_flow_dissect_flow_keys_basic(const struct sk_buff *skb,
+skb_flow_dissect_flow_keys_basic(const struct net *net,
+ const struct sk_buff *skb,
struct flow_keys_basic *flow, void *data,
__be16 proto, int nhoff, int hlen,
unsigned int flags)
{
memset(flow, 0, sizeof(*flow));
- return __skb_flow_dissect(skb, &flow_keys_basic_dissector, flow,
+ return __skb_flow_dissect(net, skb, &flow_keys_basic_dissector, flow,
data, proto, nhoff, hlen, flags);
}
@@ -2102,8 +2111,6 @@ void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off,
void skb_coalesce_rx_frag(struct sk_buff *skb, int i, int size,
unsigned int truesize);
-#define SKB_PAGE_ASSERT(skb) BUG_ON(skb_shinfo(skb)->nr_frags)
-#define SKB_FRAG_ASSERT(skb) BUG_ON(skb_has_frag_list(skb))
#define SKB_LINEAR_ASSERT(skb) BUG_ON(skb_is_nonlinear(skb))
#ifdef NET_SKBUFF_DATA_USES_OFFSET
@@ -2490,7 +2497,8 @@ static inline void skb_probe_transport_header(struct sk_buff *skb)
if (skb_transport_header_was_set(skb))
return;
- if (skb_flow_dissect_flow_keys_basic(skb, &keys, NULL, 0, 0, 0, 0))
+ if (skb_flow_dissect_flow_keys_basic(NULL, skb, &keys,
+ NULL, 0, 0, 0, 0))
skb_set_transport_header(skb, keys.control.thoff);
}
@@ -3372,17 +3380,17 @@ struct sk_buff *__skb_try_recv_from_queue(struct sock *sk,
unsigned int flags,
void (*destructor)(struct sock *sk,
struct sk_buff *skb),
- int *peeked, int *off, int *err,
+ int *off, int *err,
struct sk_buff **last);
struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned flags,
void (*destructor)(struct sock *sk,
struct sk_buff *skb),
- int *peeked, int *off, int *err,
+ int *off, int *err,
struct sk_buff **last);
struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned flags,
void (*destructor)(struct sock *sk,
struct sk_buff *skb),
- int *peeked, int *off, int *err);
+ int *off, int *err);
struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, int noblock,
int *err);
__poll_t datagram_poll(struct file *file, struct socket *sock,
diff --git a/include/linux/tracepoint-defs.h b/include/linux/tracepoint-defs.h
index 49ba9cde7e4b..b29950a19205 100644
--- a/include/linux/tracepoint-defs.h
+++ b/include/linux/tracepoint-defs.h
@@ -45,6 +45,7 @@ struct bpf_raw_event_map {
struct tracepoint *tp;
void *bpf_func;
u32 num_args;
+ u32 writable_size;
} __aligned(32);
#endif
diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 269ec27385e9..2f67ae854ff0 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -238,53 +238,6 @@ bool ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group,
void ipv6_mc_dad_complete(struct inet6_dev *idev);
-/* A stub used by vxlan module. This is ugly, ideally these
- * symbols should be built into the core kernel.
- */
-struct ipv6_stub {
- int (*ipv6_sock_mc_join)(struct sock *sk, int ifindex,
- const struct in6_addr *addr);
- int (*ipv6_sock_mc_drop)(struct sock *sk, int ifindex,
- const struct in6_addr *addr);
- int (*ipv6_dst_lookup)(struct net *net, struct sock *sk,
- struct dst_entry **dst, struct flowi6 *fl6);
- int (*ipv6_route_input)(struct sk_buff *skb);
-
- struct fib6_table *(*fib6_get_table)(struct net *net, u32 id);
- struct fib6_info *(*fib6_lookup)(struct net *net, int oif,
- struct flowi6 *fl6, int flags);
- struct fib6_info *(*fib6_table_lookup)(struct net *net,
- struct fib6_table *table,
- int oif, struct flowi6 *fl6,
- int flags);
- struct fib6_info *(*fib6_multipath_select)(const struct net *net,
- struct fib6_info *f6i,
- struct flowi6 *fl6, int oif,
- const struct sk_buff *skb,
- int strict);
- u32 (*ip6_mtu_from_fib6)(struct fib6_info *f6i, struct in6_addr *daddr,
- struct in6_addr *saddr);
-
- void (*udpv6_encap_enable)(void);
- void (*ndisc_send_na)(struct net_device *dev, const struct in6_addr *daddr,
- const struct in6_addr *solicited_addr,
- bool router, bool solicited, bool override, bool inc_opt);
- struct neigh_table *nd_tbl;
-};
-extern const struct ipv6_stub *ipv6_stub __read_mostly;
-
-/* A stub used by bpf helpers. Similarly ugly as ipv6_stub */
-struct ipv6_bpf_stub {
- int (*inet6_bind)(struct sock *sk, struct sockaddr *uaddr, int addr_len,
- bool force_bind_address_no_port, bool with_lock);
- struct sock *(*udp6_lib_lookup)(struct net *net,
- const struct in6_addr *saddr, __be16 sport,
- const struct in6_addr *daddr, __be16 dport,
- int dif, int sdif, struct udp_table *tbl,
- struct sk_buff *skb);
-};
-extern const struct ipv6_bpf_stub *ipv6_bpf_stub __read_mostly;
-
/*
* identify MLD packets for MLD filter exceptions
*/
@@ -425,6 +378,14 @@ static inline void in6_dev_hold(struct inet6_dev *idev)
refcount_inc(&idev->refcnt);
}
+/* called with rcu_read_lock held */
+static inline bool ip6_ignore_linkdown(const struct net_device *dev)
+{
+ const struct inet6_dev *idev = __in6_dev_get(dev);
+
+ return !!idev->cnf.ignore_routes_with_linkdown;
+}
+
void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp);
static inline void in6_ifa_put(struct inet6_ifaddr *ifp)
diff --git a/include/net/arp.h b/include/net/arp.h
index 977aabfcdc03..c8f580a0e6b1 100644
--- a/include/net/arp.h
+++ b/include/net/arp.h
@@ -18,6 +18,7 @@ static inline u32 arp_hashfn(const void *pkey, const struct net_device *dev, u32
return val * hash_rnd[0];
}
+#ifdef CONFIG_INET
static inline struct neighbour *__ipv4_neigh_lookup_noref(struct net_device *dev, u32 key)
{
if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT))
@@ -25,6 +26,13 @@ static inline struct neighbour *__ipv4_neigh_lookup_noref(struct net_device *dev
return ___neigh_lookup_noref(&arp_tbl, neigh_key_eq32, arp_hashfn, &key, dev);
}
+#else
+static inline
+struct neighbour *__ipv4_neigh_lookup_noref(struct net_device *dev, u32 key)
+{
+ return NULL;
+}
+#endif
static inline struct neighbour *__ipv4_neigh_lookup(struct net_device *dev, u32 key)
{
diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index fbba43e9bef5..9a5330eed794 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -282,6 +282,7 @@ enum {
HCI_FORCE_BREDR_SMP,
HCI_FORCE_STATIC_ADDR,
HCI_LL_RPA_RESOLUTION,
+ HCI_CMD_PENDING,
__HCI_NUM_FLAGS,
};
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 094e61e07030..05b1b96f4d9e 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -190,6 +190,9 @@ struct adv_info {
#define HCI_MAX_SHORT_NAME_LENGTH 10
+/* Min encryption key size to match with SMP */
+#define HCI_MIN_ENC_KEY_SIZE 7
+
/* Default LE RPA expiry time, 15 minutes */
#define HCI_DEFAULT_RPA_TIMEOUT (15 * 60)
diff --git a/include/net/bpf_sk_storage.h b/include/net/bpf_sk_storage.h
new file mode 100644
index 000000000000..b9dcb02e756b
--- /dev/null
+++ b/include/net/bpf_sk_storage.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019 Facebook */
+#ifndef _BPF_SK_STORAGE_H
+#define _BPF_SK_STORAGE_H
+
+struct sock;
+
+void bpf_sk_storage_free(struct sock *sk);
+
+extern const struct bpf_func_proto bpf_sk_storage_get_proto;
+extern const struct bpf_func_proto bpf_sk_storage_delete_proto;
+
+#endif /* _BPF_SK_STORAGE_H */
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 13bfeb712d36..87dae868707e 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -485,6 +485,7 @@ struct vif_params {
* with the get_key() callback, must be in little endian,
* length given by @seq_len.
* @seq_len: length of @seq.
+ * @mode: key install mode (RX_TX, NO_TX or SET_TX)
*/
struct key_params {
const u8 *key;
@@ -492,6 +493,7 @@ struct key_params {
int key_len;
int seq_len;
u32 cipher;
+ enum nl80211_key_mode mode;
};
/**
@@ -973,6 +975,27 @@ enum station_parameters_apply_mask {
STATION_PARAM_APPLY_UAPSD = BIT(0),
STATION_PARAM_APPLY_CAPABILITY = BIT(1),
STATION_PARAM_APPLY_PLINK_STATE = BIT(2),
+ STATION_PARAM_APPLY_STA_TXPOWER = BIT(3),
+};
+
+/**
+ * struct sta_txpwr - station txpower configuration
+ *
+ * Used to configure txpower for station.
+ *
+ * @power: tx power (in dBm) to be used for sending data traffic. If tx power
+ * is not provided, the default per-interface tx power setting will be
+ * overriding. Driver should be picking up the lowest tx power, either tx
+ * power per-interface or per-station.
+ * @type: In particular if TPC %type is NL80211_TX_POWER_LIMITED then tx power
+ * will be less than or equal to specified from userspace, whereas if TPC
+ * %type is NL80211_TX_POWER_AUTOMATIC then it indicates default tx power.
+ * NL80211_TX_POWER_FIXED is not a valid configuration option for
+ * per peer TPC.
+ */
+struct sta_txpwr {
+ s16 power;
+ enum nl80211_tx_power_setting type;
};
/**
@@ -1047,6 +1070,7 @@ struct station_parameters {
const struct ieee80211_he_cap_elem *he_capa;
u8 he_capa_len;
u16 airtime_weight;
+ struct sta_txpwr txpwr;
};
/**
@@ -1327,6 +1351,7 @@ struct cfg80211_tid_stats {
* @fcs_err_count: number of packets (MPDUs) received from this station with
* an FCS error. This counter should be incremented only when TA of the
* received packet with an FCS error matches the peer MAC address.
+ * @airtime_link_metric: mesh airtime link metric.
*/
struct station_info {
u64 filled;
@@ -1381,6 +1406,8 @@ struct station_info {
u32 rx_mpdu_count;
u32 fcs_err_count;
+
+ u32 airtime_link_metric;
};
#if IS_ENABLED(CONFIG_CFG80211)
@@ -1832,11 +1859,19 @@ static inline void get_random_mask_addr(u8 *buf, const u8 *addr, const u8 *mask)
* @bssid: BSSID to be matched; may be all-zero BSSID in case of SSID match
* or no match (RSSI only)
* @rssi_thold: don't report scan results below this threshold (in s32 dBm)
+ * @per_band_rssi_thold: Minimum rssi threshold for each band to be applied
+ * for filtering out scan results received. Drivers advertize this support
+ * of band specific rssi based filtering through the feature capability
+ * %NL80211_EXT_FEATURE_SCHED_SCAN_BAND_SPECIFIC_RSSI_THOLD. These band
+ * specific rssi thresholds take precedence over rssi_thold, if specified.
+ * If not specified for any band, it will be assigned with rssi_thold of
+ * corresponding matchset.
*/
struct cfg80211_match_set {
struct cfg80211_ssid ssid;
u8 bssid[ETH_ALEN];
s32 rssi_thold;
+ s32 per_band_rssi_thold[NUM_NL80211_BANDS];
};
/**
@@ -3100,6 +3135,32 @@ struct cfg80211_pmsr_request {
};
/**
+ * struct cfg80211_update_owe_info - OWE Information
+ *
+ * This structure provides information needed for the drivers to offload OWE
+ * (Opportunistic Wireless Encryption) processing to the user space.
+ *
+ * Commonly used across update_owe_info request and event interfaces.
+ *
+ * @peer: MAC address of the peer device for which the OWE processing
+ * has to be done.
+ * @status: status code, %WLAN_STATUS_SUCCESS for successful OWE info
+ * processing, use %WLAN_STATUS_UNSPECIFIED_FAILURE if user space
+ * cannot give you the real status code for failures. Used only for
+ * OWE update request command interface (user space to driver).
+ * @ie: IEs obtained from the peer or constructed by the user space. These are
+ * the IEs of the remote peer in the event from the host driver and
+ * the constructed IEs by the user space in the request interface.
+ * @ie_len: Length of IEs in octets.
+ */
+struct cfg80211_update_owe_info {
+ u8 peer[ETH_ALEN] __aligned(2);
+ u16 status;
+ const u8 *ie;
+ size_t ie_len;
+};
+
+/**
* struct cfg80211_ops - backend description for wireless configuration
*
* This struct is registered by fullmac card drivers and/or wireless stacks
@@ -3436,6 +3497,13 @@ struct cfg80211_pmsr_request {
* Statistics should be cumulative, currently no way to reset is provided.
* @start_pmsr: start peer measurement (e.g. FTM)
* @abort_pmsr: abort peer measurement
+ *
+ * @update_owe_info: Provide updated OWE info to driver. Driver implementing SME
+ * but offloading OWE processing to the user space will get the updated
+ * DH IE through this interface.
+ *
+ * @probe_mesh_link: Probe direct Mesh peer's link quality by sending data frame
+ * and overrule HWMP path selection algorithm.
*/
struct cfg80211_ops {
int (*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow);
@@ -3750,6 +3818,10 @@ struct cfg80211_ops {
struct cfg80211_pmsr_request *request);
void (*abort_pmsr)(struct wiphy *wiphy, struct wireless_dev *wdev,
struct cfg80211_pmsr_request *request);
+ int (*update_owe_info)(struct wiphy *wiphy, struct net_device *dev,
+ struct cfg80211_update_owe_info *owe_info);
+ int (*probe_mesh_link)(struct wiphy *wiphy, struct net_device *dev,
+ const u8 *buf, size_t len);
};
/*
@@ -5492,6 +5564,28 @@ static inline void cfg80211_gen_new_bssid(const u8 *bssid, u8 max_bssid,
}
/**
+ * cfg80211_is_element_inherited - returns if element ID should be inherited
+ * @element: element to check
+ * @non_inherit_element: non inheritance element
+ */
+bool cfg80211_is_element_inherited(const struct element *element,
+ const struct element *non_inherit_element);
+
+/**
+ * cfg80211_merge_profile - merges a MBSSID profile if it is split between IEs
+ * @ie: ies
+ * @ielen: length of IEs
+ * @mbssid_elem: current MBSSID element
+ * @sub_elem: current MBSSID subelement (profile)
+ * @merged_ie: location of the merged profile
+ * @max_copy_len: max merged profile length
+ */
+size_t cfg80211_merge_profile(const u8 *ie, size_t ielen,
+ const struct element *mbssid_elem,
+ const struct element *sub_elem,
+ u8 *merged_ie, size_t max_copy_len);
+
+/**
* enum cfg80211_bss_frame_type - frame type that the BSS data came from
* @CFG80211_BSS_FTYPE_UNKNOWN: driver doesn't know whether the data is
* from a beacon or probe response
@@ -7213,4 +7307,14 @@ void cfg80211_pmsr_complete(struct wireless_dev *wdev,
#define wiphy_WARN(wiphy, format, args...) \
WARN(1, "wiphy: %s\n" format, wiphy_name(wiphy), ##args);
+/**
+ * cfg80211_update_owe_info_event - Notify the peer's OWE info to user space
+ * @netdev: network device
+ * @owe_info: peer's owe info
+ * @gfp: allocation flags
+ */
+void cfg80211_update_owe_info_event(struct net_device *netdev,
+ struct cfg80211_update_owe_info *owe_info,
+ gfp_t gfp);
+
#endif /* __NET_CFG80211_H */
diff --git a/include/net/compat.h b/include/net/compat.h
index 4c6d75612b6c..f277653c7e17 100644
--- a/include/net/compat.h
+++ b/include/net/compat.h
@@ -30,9 +30,6 @@ struct compat_cmsghdr {
compat_int_t cmsg_type;
};
-int compat_sock_get_timestamp(struct sock *, struct timeval __user *);
-int compat_sock_get_timestampns(struct sock *, struct timespec __user *);
-
#else /* defined(CONFIG_COMPAT) */
/*
* To avoid compiler warnings:
diff --git a/include/net/devlink.h b/include/net/devlink.h
index 63de99e09f04..1c4adfb4195a 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -16,6 +16,7 @@
#include <linux/gfp.h>
#include <linux/list.h>
#include <linux/netdevice.h>
+#include <linux/spinlock.h>
#include <net/net_namespace.h>
#include <uapi/linux/devlink.h>
@@ -31,6 +32,7 @@ struct devlink {
struct list_head region_list;
u32 snapshot_id;
struct list_head reporter_list;
+ struct mutex reporters_lock; /* protects reporter_list */
struct devlink_dpipe_headers *dpipe_headers;
const struct devlink_ops *ops;
struct device *dev;
@@ -40,11 +42,13 @@ struct devlink {
};
struct devlink_port_attrs {
- bool set;
+ u8 set:1,
+ split:1,
+ switch_port:1;
enum devlink_port_flavour flavour;
u32 port_number; /* same value as "split group" */
- bool split;
u32 split_subport_number;
+ struct netdev_phys_item_id switch_id;
};
struct devlink_port {
@@ -53,6 +57,9 @@ struct devlink_port {
struct devlink *devlink;
unsigned index;
bool registered;
+ spinlock_t type_lock; /* Protects type and type_dev
+ * pointer consistency.
+ */
enum devlink_port_type type;
enum devlink_port_type desired_type;
void *type_dev;
@@ -485,13 +492,14 @@ struct devlink_ops {
struct devlink_sb_pool_info *pool_info);
int (*sb_pool_set)(struct devlink *devlink, unsigned int sb_index,
u16 pool_index, u32 size,
- enum devlink_sb_threshold_type threshold_type);
+ enum devlink_sb_threshold_type threshold_type,
+ struct netlink_ext_ack *extack);
int (*sb_port_pool_get)(struct devlink_port *devlink_port,
unsigned int sb_index, u16 pool_index,
u32 *p_threshold);
int (*sb_port_pool_set)(struct devlink_port *devlink_port,
unsigned int sb_index, u16 pool_index,
- u32 threshold);
+ u32 threshold, struct netlink_ext_ack *extack);
int (*sb_tc_pool_bind_get)(struct devlink_port *devlink_port,
unsigned int sb_index,
u16 tc_index,
@@ -501,7 +509,8 @@ struct devlink_ops {
unsigned int sb_index,
u16 tc_index,
enum devlink_sb_pool_type pool_type,
- u16 pool_index, u32 threshold);
+ u16 pool_index, u32 threshold,
+ struct netlink_ext_ack *extack);
int (*sb_occ_snapshot)(struct devlink *devlink,
unsigned int sb_index);
int (*sb_occ_max_clear)(struct devlink *devlink,
@@ -543,19 +552,25 @@ static inline struct devlink *priv_to_devlink(void *priv)
return container_of(priv, struct devlink, priv);
}
+static inline struct devlink_port *
+netdev_to_devlink_port(struct net_device *dev)
+{
+ if (dev->netdev_ops->ndo_get_devlink_port)
+ return dev->netdev_ops->ndo_get_devlink_port(dev);
+ return NULL;
+}
+
static inline struct devlink *netdev_to_devlink(struct net_device *dev)
{
-#if IS_ENABLED(CONFIG_NET_DEVLINK)
- if (dev->netdev_ops->ndo_get_devlink)
- return dev->netdev_ops->ndo_get_devlink(dev);
-#endif
+ struct devlink_port *devlink_port = netdev_to_devlink_port(dev);
+
+ if (devlink_port)
+ return devlink_port->devlink;
return NULL;
}
struct ib_device;
-#if IS_ENABLED(CONFIG_NET_DEVLINK)
-
struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size);
int devlink_register(struct devlink *devlink, struct device *dev);
void devlink_unregister(struct devlink *devlink);
@@ -572,9 +587,9 @@ void devlink_port_type_clear(struct devlink_port *devlink_port);
void devlink_port_attrs_set(struct devlink_port *devlink_port,
enum devlink_port_flavour flavour,
u32 port_number, bool split,
- u32 split_subport_number);
-int devlink_port_get_phys_port_name(struct devlink_port *devlink_port,
- char *name, size_t len);
+ u32 split_subport_number,
+ const unsigned char *switch_id,
+ unsigned char switch_id_len);
int devlink_sb_register(struct devlink *devlink, unsigned int sb_index,
u32 size, u16 ingress_pools_count,
u16 egress_pools_count, u16 ingress_tc_count,
@@ -724,510 +739,43 @@ void
devlink_health_reporter_state_update(struct devlink_health_reporter *reporter,
enum devlink_health_reporter_state state);
+#if IS_ENABLED(CONFIG_NET_DEVLINK)
+
void devlink_compat_running_version(struct net_device *dev,
char *buf, size_t len);
int devlink_compat_flash_update(struct net_device *dev, const char *file_name);
+int devlink_compat_phys_port_name_get(struct net_device *dev,
+ char *name, size_t len);
+int devlink_compat_switch_id_get(struct net_device *dev,
+ struct netdev_phys_item_id *ppid);
#else
-static inline struct devlink *devlink_alloc(const struct devlink_ops *ops,
- size_t priv_size)
-{
- return kzalloc(sizeof(struct devlink) + priv_size, GFP_KERNEL);
-}
-
-static inline int devlink_register(struct devlink *devlink, struct device *dev)
-{
- return 0;
-}
-
-static inline void devlink_unregister(struct devlink *devlink)
-{
-}
-
-static inline void devlink_params_publish(struct devlink *devlink)
-{
-}
-
-static inline void devlink_params_unpublish(struct devlink *devlink)
-{
-}
-
-static inline void devlink_free(struct devlink *devlink)
-{
- kfree(devlink);
-}
-
-static inline int devlink_port_register(struct devlink *devlink,
- struct devlink_port *devlink_port,
- unsigned int port_index)
-{
- return 0;
-}
-
-static inline void devlink_port_unregister(struct devlink_port *devlink_port)
-{
-}
-
-static inline void devlink_port_type_eth_set(struct devlink_port *devlink_port,
- struct net_device *netdev)
-{
-}
-
-static inline void devlink_port_type_ib_set(struct devlink_port *devlink_port,
- struct ib_device *ibdev)
-{
-}
-
-static inline void devlink_port_type_clear(struct devlink_port *devlink_port)
-{
-}
-
-static inline void devlink_port_attrs_set(struct devlink_port *devlink_port,
- enum devlink_port_flavour flavour,
- u32 port_number, bool split,
- u32 split_subport_number)
-{
-}
-
-static inline int
-devlink_port_get_phys_port_name(struct devlink_port *devlink_port,
- char *name, size_t len)
-{
- return -EOPNOTSUPP;
-}
-
-static inline int devlink_sb_register(struct devlink *devlink,
- unsigned int sb_index, u32 size,
- u16 ingress_pools_count,
- u16 egress_pools_count,
- u16 ingress_tc_count,
- u16 egress_tc_count)
-{
- return 0;
-}
-
-static inline void devlink_sb_unregister(struct devlink *devlink,
- unsigned int sb_index)
-{
-}
-
-static inline int
-devlink_dpipe_table_register(struct devlink *devlink,
- const char *table_name,
- struct devlink_dpipe_table_ops *table_ops,
- void *priv, bool counter_control_extern)
-{
- return 0;
-}
-
-static inline void devlink_dpipe_table_unregister(struct devlink *devlink,
- const char *table_name)
-{
-}
-
-static inline int devlink_dpipe_headers_register(struct devlink *devlink,
- struct devlink_dpipe_headers *
- dpipe_headers)
-{
- return 0;
-}
-
-static inline void devlink_dpipe_headers_unregister(struct devlink *devlink)
-{
-}
-
-static inline bool devlink_dpipe_table_counter_enabled(struct devlink *devlink,
- const char *table_name)
-{
- return false;
-}
-
-static inline int
-devlink_dpipe_entry_ctx_prepare(struct devlink_dpipe_dump_ctx *dump_ctx)
-{
- return 0;
-}
-
-static inline int
-devlink_dpipe_entry_ctx_append(struct devlink_dpipe_dump_ctx *dump_ctx,
- struct devlink_dpipe_entry *entry)
-{
- return 0;
-}
-
-static inline int
-devlink_dpipe_entry_ctx_close(struct devlink_dpipe_dump_ctx *dump_ctx)
-{
- return 0;
-}
-
-static inline void
-devlink_dpipe_entry_clear(struct devlink_dpipe_entry *entry)
-{
-}
-
-static inline int
-devlink_dpipe_action_put(struct sk_buff *skb,
- struct devlink_dpipe_action *action)
-{
- return 0;
-}
-
-static inline int
-devlink_dpipe_match_put(struct sk_buff *skb,
- struct devlink_dpipe_match *match)
-{
- return 0;
-}
-
-static inline int
-devlink_resource_register(struct devlink *devlink,
- const char *resource_name,
- u64 resource_size,
- u64 resource_id,
- u64 parent_resource_id,
- const struct devlink_resource_size_params *size_params)
-{
- return 0;
-}
-
static inline void
-devlink_resources_unregister(struct devlink *devlink,
- struct devlink_resource *resource)
-{
-}
-
-static inline int
-devlink_resource_size_get(struct devlink *devlink, u64 resource_id,
- u64 *p_resource_size)
-{
- return -EOPNOTSUPP;
-}
-
-static inline int
-devlink_dpipe_table_resource_set(struct devlink *devlink,
- const char *table_name, u64 resource_id,
- u64 resource_units)
-{
- return -EOPNOTSUPP;
-}
-
-static inline void
-devlink_resource_occ_get_register(struct devlink *devlink,
- u64 resource_id,
- devlink_resource_occ_get_t *occ_get,
- void *occ_get_priv)
-{
-}
-
-static inline void
-devlink_resource_occ_get_unregister(struct devlink *devlink,
- u64 resource_id)
-{
-}
-
-static inline int
-devlink_params_register(struct devlink *devlink,
- const struct devlink_param *params,
- size_t params_count)
-{
- return 0;
-}
-
-static inline void
-devlink_params_unregister(struct devlink *devlink,
- const struct devlink_param *params,
- size_t params_count)
-{
-
-}
-
-static inline int
-devlink_port_params_register(struct devlink_port *devlink_port,
- const struct devlink_param *params,
- size_t params_count)
-{
- return 0;
-}
-
-static inline void
-devlink_port_params_unregister(struct devlink_port *devlink_port,
- const struct devlink_param *params,
- size_t params_count)
-{
-}
-
-static inline int
-devlink_param_driverinit_value_get(struct devlink *devlink, u32 param_id,
- union devlink_param_value *init_val)
+devlink_compat_running_version(struct net_device *dev, char *buf, size_t len)
{
- return -EOPNOTSUPP;
}
static inline int
-devlink_param_driverinit_value_set(struct devlink *devlink, u32 param_id,
- union devlink_param_value init_val)
+devlink_compat_flash_update(struct net_device *dev, const char *file_name)
{
return -EOPNOTSUPP;
}
static inline int
-devlink_port_param_driverinit_value_get(struct devlink_port *devlink_port,
- u32 param_id,
- union devlink_param_value *init_val)
+devlink_compat_phys_port_name_get(struct net_device *dev,
+ char *name, size_t len)
{
return -EOPNOTSUPP;
}
static inline int
-devlink_port_param_driverinit_value_set(struct devlink_port *devlink_port,
- u32 param_id,
- union devlink_param_value init_val)
+devlink_compat_switch_id_get(struct net_device *dev,
+ struct netdev_phys_item_id *ppid)
{
return -EOPNOTSUPP;
}
-static inline void
-devlink_param_value_changed(struct devlink *devlink, u32 param_id)
-{
-}
-
-static inline void
-devlink_port_param_value_changed(struct devlink_port *devlink_port,
- u32 param_id)
-{
-}
-
-static inline void
-devlink_param_value_str_fill(union devlink_param_value *dst_val,
- const char *src)
-{
-}
-
-static inline struct devlink_region *
-devlink_region_create(struct devlink *devlink,
- const char *region_name,
- u32 region_max_snapshots,
- u64 region_size)
-{
- return NULL;
-}
-
-static inline void
-devlink_region_destroy(struct devlink_region *region)
-{
-}
-
-static inline u32
-devlink_region_shapshot_id_get(struct devlink *devlink)
-{
- return 0;
-}
-
-static inline int
-devlink_region_snapshot_create(struct devlink_region *region, u64 data_len,
- u8 *data, u32 snapshot_id,
- devlink_snapshot_data_dest_t *data_destructor)
-{
- return 0;
-}
-
-static inline int
-devlink_info_driver_name_put(struct devlink_info_req *req, const char *name)
-{
- return 0;
-}
-
-static inline int
-devlink_info_serial_number_put(struct devlink_info_req *req, const char *sn)
-{
- return 0;
-}
-
-static inline int
-devlink_info_version_fixed_put(struct devlink_info_req *req,
- const char *version_name,
- const char *version_value)
-{
- return 0;
-}
-
-static inline int
-devlink_info_version_stored_put(struct devlink_info_req *req,
- const char *version_name,
- const char *version_value)
-{
- return 0;
-}
-
-static inline int
-devlink_info_version_running_put(struct devlink_info_req *req,
- const char *version_name,
- const char *version_value)
-{
- return 0;
-}
-
-static inline int
-devlink_fmsg_obj_nest_start(struct devlink_fmsg *fmsg)
-{
- return 0;
-}
-
-static inline int
-devlink_fmsg_obj_nest_end(struct devlink_fmsg *fmsg)
-{
- return 0;
-}
-
-static inline int
-devlink_fmsg_pair_nest_start(struct devlink_fmsg *fmsg, const char *name)
-{
- return 0;
-}
-
-static inline int
-devlink_fmsg_pair_nest_end(struct devlink_fmsg *fmsg)
-{
- return 0;
-}
-
-static inline int
-devlink_fmsg_arr_pair_nest_start(struct devlink_fmsg *fmsg,
- const char *name)
-{
- return 0;
-}
-
-static inline int
-devlink_fmsg_arr_pair_nest_end(struct devlink_fmsg *fmsg)
-{
- return 0;
-}
-
-static inline int
-devlink_fmsg_bool_put(struct devlink_fmsg *fmsg, bool value)
-{
- return 0;
-}
-
-static inline int
-devlink_fmsg_u8_put(struct devlink_fmsg *fmsg, u8 value)
-{
- return 0;
-}
-
-static inline int
-devlink_fmsg_u32_put(struct devlink_fmsg *fmsg, u32 value)
-{
- return 0;
-}
-
-static inline int
-devlink_fmsg_u64_put(struct devlink_fmsg *fmsg, u64 value)
-{
- return 0;
-}
-
-static inline int
-devlink_fmsg_string_put(struct devlink_fmsg *fmsg, const char *value)
-{
- return 0;
-}
-
-static inline int
-devlink_fmsg_binary_put(struct devlink_fmsg *fmsg, const void *value,
- u16 value_len)
-{
- return 0;
-}
-
-static inline int
-devlink_fmsg_bool_pair_put(struct devlink_fmsg *fmsg, const char *name,
- bool value)
-{
- return 0;
-}
-
-static inline int
-devlink_fmsg_u8_pair_put(struct devlink_fmsg *fmsg, const char *name,
- u8 value)
-{
- return 0;
-}
-
-static inline int
-devlink_fmsg_u32_pair_put(struct devlink_fmsg *fmsg, const char *name,
- u32 value)
-{
- return 0;
-}
-
-static inline int
-devlink_fmsg_u64_pair_put(struct devlink_fmsg *fmsg, const char *name,
- u64 value)
-{
- return 0;
-}
-
-static inline int
-devlink_fmsg_string_pair_put(struct devlink_fmsg *fmsg, const char *name,
- const char *value)
-{
- return 0;
-}
-
-static inline int
-devlink_fmsg_binary_pair_put(struct devlink_fmsg *fmsg, const char *name,
- const void *value, u16 value_len)
-{
- return 0;
-}
-
-static inline struct devlink_health_reporter *
-devlink_health_reporter_create(struct devlink *devlink,
- const struct devlink_health_reporter_ops *ops,
- u64 graceful_period, bool auto_recover,
- void *priv)
-{
- return NULL;
-}
-
-static inline void
-devlink_health_reporter_destroy(struct devlink_health_reporter *reporter)
-{
-}
-
-static inline void *
-devlink_health_reporter_priv(struct devlink_health_reporter *reporter)
-{
- return NULL;
-}
-
-static inline int
-devlink_health_report(struct devlink_health_reporter *reporter,
- const char *msg, void *priv_ctx)
-{
- return 0;
-}
-
-static inline void
-devlink_health_reporter_state_update(struct devlink_health_reporter *reporter,
- enum devlink_health_reporter_state state)
-{
-}
-
-static inline void
-devlink_compat_running_version(struct net_device *dev, char *buf, size_t len)
-{
-}
-
-static inline int
-devlink_compat_flash_update(struct net_device *dev, const char *file_name)
-{
- return -EOPNOTSUPP;
-}
#endif
#endif /* _NET_DEVLINK_H_ */
diff --git a/include/net/dsa.h b/include/net/dsa.h
index ae480bba11f5..6aaaadd6a413 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -30,20 +30,36 @@ struct phy_device;
struct fixed_phy_status;
struct phylink_link_state;
+#define DSA_TAG_PROTO_NONE_VALUE 0
+#define DSA_TAG_PROTO_BRCM_VALUE 1
+#define DSA_TAG_PROTO_BRCM_PREPEND_VALUE 2
+#define DSA_TAG_PROTO_DSA_VALUE 3
+#define DSA_TAG_PROTO_EDSA_VALUE 4
+#define DSA_TAG_PROTO_GSWIP_VALUE 5
+#define DSA_TAG_PROTO_KSZ9477_VALUE 6
+#define DSA_TAG_PROTO_KSZ9893_VALUE 7
+#define DSA_TAG_PROTO_LAN9303_VALUE 8
+#define DSA_TAG_PROTO_MTK_VALUE 9
+#define DSA_TAG_PROTO_QCA_VALUE 10
+#define DSA_TAG_PROTO_TRAILER_VALUE 11
+#define DSA_TAG_PROTO_8021Q_VALUE 12
+#define DSA_TAG_PROTO_SJA1105_VALUE 13
+
enum dsa_tag_protocol {
- DSA_TAG_PROTO_NONE = 0,
- DSA_TAG_PROTO_BRCM,
- DSA_TAG_PROTO_BRCM_PREPEND,
- DSA_TAG_PROTO_DSA,
- DSA_TAG_PROTO_EDSA,
- DSA_TAG_PROTO_GSWIP,
- DSA_TAG_PROTO_KSZ9477,
- DSA_TAG_PROTO_KSZ9893,
- DSA_TAG_PROTO_LAN9303,
- DSA_TAG_PROTO_MTK,
- DSA_TAG_PROTO_QCA,
- DSA_TAG_PROTO_TRAILER,
- DSA_TAG_LAST, /* MUST BE LAST */
+ DSA_TAG_PROTO_NONE = DSA_TAG_PROTO_NONE_VALUE,
+ DSA_TAG_PROTO_BRCM = DSA_TAG_PROTO_BRCM_VALUE,
+ DSA_TAG_PROTO_BRCM_PREPEND = DSA_TAG_PROTO_BRCM_PREPEND_VALUE,
+ DSA_TAG_PROTO_DSA = DSA_TAG_PROTO_DSA_VALUE,
+ DSA_TAG_PROTO_EDSA = DSA_TAG_PROTO_EDSA_VALUE,
+ DSA_TAG_PROTO_GSWIP = DSA_TAG_PROTO_GSWIP_VALUE,
+ DSA_TAG_PROTO_KSZ9477 = DSA_TAG_PROTO_KSZ9477_VALUE,
+ DSA_TAG_PROTO_KSZ9893 = DSA_TAG_PROTO_KSZ9893_VALUE,
+ DSA_TAG_PROTO_LAN9303 = DSA_TAG_PROTO_LAN9303_VALUE,
+ DSA_TAG_PROTO_MTK = DSA_TAG_PROTO_MTK_VALUE,
+ DSA_TAG_PROTO_QCA = DSA_TAG_PROTO_QCA_VALUE,
+ DSA_TAG_PROTO_TRAILER = DSA_TAG_PROTO_TRAILER_VALUE,
+ DSA_TAG_PROTO_8021Q = DSA_TAG_PROTO_8021Q_VALUE,
+ DSA_TAG_PROTO_SJA1105 = DSA_TAG_PROTO_SJA1105_VALUE,
};
struct packet_type;
@@ -55,9 +71,52 @@ struct dsa_device_ops {
struct packet_type *pt);
int (*flow_dissect)(const struct sk_buff *skb, __be16 *proto,
int *offset);
+ /* Used to determine which traffic should match the DSA filter in
+ * eth_type_trans, and which, if any, should bypass it and be processed
+ * as regular on the master net device.
+ */
+ bool (*filter)(const struct sk_buff *skb, struct net_device *dev);
unsigned int overhead;
+ const char *name;
+ enum dsa_tag_protocol proto;
+};
+
+#define DSA_TAG_DRIVER_ALIAS "dsa_tag-"
+#define MODULE_ALIAS_DSA_TAG_DRIVER(__proto) \
+ MODULE_ALIAS(DSA_TAG_DRIVER_ALIAS __stringify(__proto##_VALUE))
+
+struct dsa_skb_cb {
+ struct sk_buff *clone;
+ bool deferred_xmit;
+};
+
+struct __dsa_skb_cb {
+ struct dsa_skb_cb cb;
+ u8 priv[48 - sizeof(struct dsa_skb_cb)];
};
+#define __DSA_SKB_CB(skb) ((struct __dsa_skb_cb *)((skb)->cb))
+
+#define DSA_SKB_CB(skb) ((struct dsa_skb_cb *)((skb)->cb))
+
+#define DSA_SKB_CB_COPY(nskb, skb) \
+ { *__DSA_SKB_CB(nskb) = *__DSA_SKB_CB(skb); }
+
+#define DSA_SKB_CB_ZERO(skb) \
+ { *__DSA_SKB_CB(skb) = (struct __dsa_skb_cb) {0}; }
+
+#define DSA_SKB_CB_PRIV(skb) \
+ ((void *)(skb)->cb + offsetof(struct __dsa_skb_cb, priv))
+
+#define DSA_SKB_CB_CLONE(_clone, _skb) \
+ { \
+ struct sk_buff *clone = _clone; \
+ struct sk_buff *skb = _skb; \
+ \
+ DSA_SKB_CB_COPY(clone, skb); \
+ DSA_SKB_CB(skb)->clone = clone; \
+ }
+
struct dsa_switch_tree {
struct list_head list;
@@ -128,6 +187,7 @@ struct dsa_port {
struct dsa_switch_tree *dst;
struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev,
struct packet_type *pt);
+ bool (*filter)(const struct sk_buff *skb, struct net_device *dev);
enum {
DSA_PORT_TYPE_UNUSED = 0,
@@ -140,12 +200,24 @@ struct dsa_port {
unsigned int index;
const char *name;
const struct dsa_port *cpu_dp;
+ const char *mac;
struct device_node *dn;
unsigned int ageing_time;
+ bool vlan_filtering;
u8 stp_state;
struct net_device *bridge_dev;
struct devlink_port devlink_port;
struct phylink *pl;
+
+ struct work_struct xmit_work;
+ struct sk_buff_head xmit_queue;
+
+ /*
+ * Give the switch driver somewhere to hang its per-port private data
+ * structures (accessible from the tagger).
+ */
+ void *priv;
+
/*
* Original copy of the master netdev ethtool_ops
*/
@@ -208,6 +280,16 @@ struct dsa_switch {
/* Number of switch port queues */
unsigned int num_tx_queues;
+ /* Disallow bridge core from requesting different VLAN awareness
+ * settings on ports if not hardware-supported
+ */
+ bool vlan_filtering_is_global;
+
+ /* In case vlan_filtering_is_global is set, the VLAN awareness state
+ * should be retrieved from here and not from the per-port settings.
+ */
+ bool vlan_filtering;
+
unsigned long *bitmap;
unsigned long _bitmap;
@@ -275,18 +357,19 @@ static inline unsigned int dsa_upstream_port(struct dsa_switch *ds, int port)
return dsa_towards_port(ds, cpu_dp->ds->index, cpu_dp->index);
}
+static inline bool dsa_port_is_vlan_filtering(const struct dsa_port *dp)
+{
+ const struct dsa_switch *ds = dp->ds;
+
+ if (ds->vlan_filtering_is_global)
+ return ds->vlan_filtering;
+ else
+ return dp->vlan_filtering;
+}
+
typedef int dsa_fdb_dump_cb_t(const unsigned char *addr, u16 vid,
bool is_static, void *data);
struct dsa_switch_ops {
-#if IS_ENABLED(CONFIG_NET_DSA_LEGACY)
- /*
- * Legacy probing.
- */
- const char *(*probe)(struct device *dsa_dev,
- struct device *host_dev, int sw_addr,
- void **priv);
-#endif
-
enum dsa_tag_protocol (*get_tag_protocol)(struct dsa_switch *ds,
int port);
@@ -469,6 +552,12 @@ struct dsa_switch_ops {
struct sk_buff *clone, unsigned int type);
bool (*port_rxtstamp)(struct dsa_switch *ds, int port,
struct sk_buff *skb, unsigned int type);
+
+ /*
+ * Deferred frame Tx
+ */
+ netdev_tx_t (*port_deferred_xmit)(struct dsa_switch *ds, int port,
+ struct sk_buff *skb);
};
struct dsa_switch_driver {
@@ -476,20 +565,6 @@ struct dsa_switch_driver {
const struct dsa_switch_ops *ops;
};
-#if IS_ENABLED(CONFIG_NET_DSA_LEGACY)
-/* Legacy driver registration */
-void register_switch_driver(struct dsa_switch_driver *type);
-void unregister_switch_driver(struct dsa_switch_driver *type);
-struct mii_bus *dsa_host_dev_to_mii_bus(struct device *dev);
-
-#else
-static inline void register_switch_driver(struct dsa_switch_driver *type) { }
-static inline void unregister_switch_driver(struct dsa_switch_driver *type) { }
-static inline struct mii_bus *dsa_host_dev_to_mii_bus(struct device *dev)
-{
- return NULL;
-}
-#endif
struct net_device *dsa_dev_to_net_device(struct device *dev);
/* Keep inline for faster access in hot path */
@@ -501,6 +576,15 @@ static inline bool netdev_uses_dsa(struct net_device *dev)
return false;
}
+static inline bool dsa_can_decode(const struct sk_buff *skb,
+ struct net_device *dev)
+{
+#if IS_ENABLED(CONFIG_NET_DSA)
+ return !dev->dsa_ptr->filter || dev->dsa_ptr->filter(skb, dev);
+#endif
+ return false;
+}
+
struct dsa_switch *dsa_switch_alloc(struct device *dev, size_t n);
void dsa_unregister_switch(struct dsa_switch *ds);
int dsa_register_switch(struct dsa_switch *ds);
@@ -569,9 +653,76 @@ static inline int call_dsa_notifiers(unsigned long val, struct net_device *dev,
#define BRCM_TAG_GET_QUEUE(v) ((v) & 0xff)
+netdev_tx_t dsa_enqueue_skb(struct sk_buff *skb, struct net_device *dev);
int dsa_port_get_phy_strings(struct dsa_port *dp, uint8_t *data);
int dsa_port_get_ethtool_phy_stats(struct dsa_port *dp, uint64_t *data);
int dsa_port_get_phy_sset_count(struct dsa_port *dp);
void dsa_port_phylink_mac_change(struct dsa_switch *ds, int port, bool up);
+struct dsa_tag_driver {
+ const struct dsa_device_ops *ops;
+ struct list_head list;
+ struct module *owner;
+};
+
+void dsa_tag_drivers_register(struct dsa_tag_driver *dsa_tag_driver_array[],
+ unsigned int count,
+ struct module *owner);
+void dsa_tag_drivers_unregister(struct dsa_tag_driver *dsa_tag_driver_array[],
+ unsigned int count);
+
+#define dsa_tag_driver_module_drivers(__dsa_tag_drivers_array, __count) \
+static int __init dsa_tag_driver_module_init(void) \
+{ \
+ dsa_tag_drivers_register(__dsa_tag_drivers_array, __count, \
+ THIS_MODULE); \
+ return 0; \
+} \
+module_init(dsa_tag_driver_module_init); \
+ \
+static void __exit dsa_tag_driver_module_exit(void) \
+{ \
+ dsa_tag_drivers_unregister(__dsa_tag_drivers_array, __count); \
+} \
+module_exit(dsa_tag_driver_module_exit)
+
+/**
+ * module_dsa_tag_drivers() - Helper macro for registering DSA tag
+ * drivers
+ * @__ops_array: Array of tag driver strucutres
+ *
+ * Helper macro for DSA tag drivers which do not do anything special
+ * in module init/exit. Each module may only use this macro once, and
+ * calling it replaces module_init() and module_exit().
+ */
+#define module_dsa_tag_drivers(__ops_array) \
+dsa_tag_driver_module_drivers(__ops_array, ARRAY_SIZE(__ops_array))
+
+#define DSA_TAG_DRIVER_NAME(__ops) dsa_tag_driver ## _ ## __ops
+
+/* Create a static structure we can build a linked list of dsa_tag
+ * drivers
+ */
+#define DSA_TAG_DRIVER(__ops) \
+static struct dsa_tag_driver DSA_TAG_DRIVER_NAME(__ops) = { \
+ .ops = &__ops, \
+}
+
+/**
+ * module_dsa_tag_driver() - Helper macro for registering a single DSA tag
+ * driver
+ * @__ops: Single tag driver structures
+ *
+ * Helper macro for DSA tag drivers which do not do anything special
+ * in module init/exit. Each module may only use this macro once, and
+ * calling it replaces module_init() and module_exit().
+ */
+#define module_dsa_tag_driver(__ops) \
+DSA_TAG_DRIVER(__ops); \
+ \
+static struct dsa_tag_driver *dsa_tag_driver_array[] = { \
+ &DSA_TAG_DRIVER_NAME(__ops) \
+}; \
+module_dsa_tag_drivers(dsa_tag_driver_array)
#endif
+
diff --git a/include/net/dst.h b/include/net/dst.h
index 6cf0870414c7..12b31c602cb0 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -19,17 +19,6 @@
#include <net/neighbour.h>
#include <asm/processor.h>
-#define DST_GC_MIN (HZ/10)
-#define DST_GC_INC (HZ/2)
-#define DST_GC_MAX (120*HZ)
-
-/* Each dst_entry has reference count and sits in some parent list(s).
- * When it is removed from parent list, it is "freed" (dst_free).
- * After this it enters dead state (dst->obsolete > 0) and if its refcnt
- * is zero, it can be destroyed immediately, otherwise it is added
- * to gc list and garbage collector periodically checks the refcnt.
- */
-
struct sk_buff;
struct dst_entry {
diff --git a/include/net/fib_notifier.h b/include/net/fib_notifier.h
index c91ec732afd6..c49d7bfb5c30 100644
--- a/include/net/fib_notifier.h
+++ b/include/net/fib_notifier.h
@@ -2,10 +2,11 @@
#define __NET_FIB_NOTIFIER_H
#include <linux/types.h>
-#include <linux/module.h>
#include <linux/notifier.h>
#include <net/net_namespace.h>
+struct module;
+
struct fib_notifier_info {
struct net *net;
int family;
diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
index 2b26979efb48..7c5a8d9a8d2a 100644
--- a/include/net/flow_dissector.h
+++ b/include/net/flow_dissector.h
@@ -305,4 +305,11 @@ static inline void *skb_flow_dissector_target(struct flow_dissector *flow_dissec
return ((char *)target_container) + flow_dissector->offset[key_id];
}
+struct bpf_flow_dissector {
+ struct bpf_flow_keys *flow_keys;
+ const struct sk_buff *skb;
+ void *data;
+ void *data_end;
+};
+
#endif
diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h
index d035183c8d03..6200900434e1 100644
--- a/include/net/flow_offload.h
+++ b/include/net/flow_offload.h
@@ -118,6 +118,8 @@ enum flow_action_id {
FLOW_ACTION_MARK,
FLOW_ACTION_WAKE,
FLOW_ACTION_QUEUE,
+ FLOW_ACTION_SAMPLE,
+ FLOW_ACTION_POLICE,
};
/* This is mirroring enum pedit_header_type definition for easy mapping between
@@ -157,6 +159,16 @@ struct flow_action_entry {
u32 index;
u8 vf;
} queue;
+ struct { /* FLOW_ACTION_SAMPLE */
+ struct psample_group *psample_group;
+ u32 rate;
+ u32 trunc_size;
+ bool truncate;
+ } sample;
+ struct { /* FLOW_ACTION_POLICE */
+ s64 burst;
+ u64 rate_bytes_ps;
+ } police;
};
};
@@ -170,6 +182,17 @@ static inline bool flow_action_has_entries(const struct flow_action *action)
return action->num_entries;
}
+/**
+ * flow_action_has_one_action() - check if exactly one action is present
+ * @action: tc filter flow offload action
+ *
+ * Returns true if exactly one action is present.
+ */
+static inline bool flow_offload_has_one_action(const struct flow_action *action)
+{
+ return action->num_entries == 1;
+}
+
#define flow_action_for_each(__i, __act, __actions) \
for (__i = 0, __act = &(__actions)->entries[0]; __i < (__actions)->num_entries; __act = &(__actions)->entries[++__i])
diff --git a/include/net/fq_impl.h b/include/net/fq_impl.h
index be7c0fab3478..2caa86660ab0 100644
--- a/include/net/fq_impl.h
+++ b/include/net/fq_impl.h
@@ -107,21 +107,23 @@ begin:
return skb;
}
+static u32 fq_flow_idx(struct fq *fq, struct sk_buff *skb)
+{
+ u32 hash = skb_get_hash_perturb(skb, fq->perturbation);
+
+ return reciprocal_scale(hash, fq->flows_cnt);
+}
+
static struct fq_flow *fq_flow_classify(struct fq *fq,
- struct fq_tin *tin,
+ struct fq_tin *tin, u32 idx,
struct sk_buff *skb,
fq_flow_get_default_t get_default_func)
{
struct fq_flow *flow;
- u32 hash;
- u32 idx;
lockdep_assert_held(&fq->lock);
- hash = skb_get_hash_perturb(skb, fq->perturbation);
- idx = reciprocal_scale(hash, fq->flows_cnt);
flow = &fq->flows[idx];
-
if (flow->tin && flow->tin != tin) {
flow = get_default_func(fq, tin, idx, skb);
tin->collisions++;
@@ -153,7 +155,7 @@ static void fq_recalc_backlog(struct fq *fq,
}
static void fq_tin_enqueue(struct fq *fq,
- struct fq_tin *tin,
+ struct fq_tin *tin, u32 idx,
struct sk_buff *skb,
fq_skb_free_t free_func,
fq_flow_get_default_t get_default_func)
@@ -163,7 +165,7 @@ static void fq_tin_enqueue(struct fq *fq,
lockdep_assert_held(&fq->lock);
- flow = fq_flow_classify(fq, tin, skb, get_default_func);
+ flow = fq_flow_classify(fq, tin, idx, skb, get_default_func);
flow->tin = tin;
flow->backlog += skb->len;
diff --git a/include/net/genetlink.h b/include/net/genetlink.h
index aa2e5888f18d..9292f1c588b7 100644
--- a/include/net/genetlink.h
+++ b/include/net/genetlink.h
@@ -26,6 +26,7 @@ struct genl_info;
* @name: name of family
* @version: protocol version
* @maxattr: maximum number of attributes supported
+ * @policy: netlink policy
* @netnsok: set to true if the family can handle network
* namespaces and should be presented in all of them
* @parallel_ops: operations can be called in parallel and aren't
@@ -56,6 +57,7 @@ struct genl_family {
unsigned int maxattr;
bool netnsok;
bool parallel_ops;
+ const struct nla_policy *policy;
int (*pre_doit)(const struct genl_ops *ops,
struct sk_buff *skb,
struct genl_info *info);
@@ -119,19 +121,23 @@ static inline int genl_err_attr(struct genl_info *info, int err,
return err;
}
+enum genl_validate_flags {
+ GENL_DONT_VALIDATE_STRICT = BIT(0),
+ GENL_DONT_VALIDATE_DUMP = BIT(1),
+ GENL_DONT_VALIDATE_DUMP_STRICT = BIT(2),
+};
+
/**
* struct genl_ops - generic netlink operations
* @cmd: command identifier
* @internal_flags: flags used by the family
* @flags: flags
- * @policy: attribute validation policy
* @doit: standard command callback
* @start: start callback for dumps
* @dumpit: callback for dumpers
* @done: completion callback for dumps
*/
struct genl_ops {
- const struct nla_policy *policy;
int (*doit)(struct sk_buff *skb,
struct genl_info *info);
int (*start)(struct netlink_callback *cb);
@@ -141,6 +147,7 @@ struct genl_ops {
u8 cmd;
u8 internal_flags;
u8 flags;
+ u8 validate;
};
int genl_register_family(struct genl_family *family);
@@ -165,6 +172,25 @@ static inline struct nlmsghdr *genlmsg_nlhdr(void *user_hdr)
}
/**
+ * genlmsg_parse_deprecated - parse attributes of a genetlink message
+ * @nlh: netlink message header
+ * @family: genetlink message family
+ * @tb: destination array with maxtype+1 elements
+ * @maxtype: maximum attribute type to be expected
+ * @policy: validation policy
+ * @extack: extended ACK report struct
+ */
+static inline int genlmsg_parse_deprecated(const struct nlmsghdr *nlh,
+ const struct genl_family *family,
+ struct nlattr *tb[], int maxtype,
+ const struct nla_policy *policy,
+ struct netlink_ext_ack *extack)
+{
+ return __nlmsg_parse(nlh, family->hdrsize + GENL_HDRLEN, tb, maxtype,
+ policy, NL_VALIDATE_LIBERAL, extack);
+}
+
+/**
* genlmsg_parse - parse attributes of a genetlink message
* @nlh: netlink message header
* @family: genetlink message family
@@ -179,8 +205,8 @@ static inline int genlmsg_parse(const struct nlmsghdr *nlh,
const struct nla_policy *policy,
struct netlink_ext_ack *extack)
{
- return nlmsg_parse(nlh, family->hdrsize + GENL_HDRLEN, tb, maxtype,
- policy, extack);
+ return __nlmsg_parse(nlh, family->hdrsize + GENL_HDRLEN, tb, maxtype,
+ policy, NL_VALIDATE_STRICT, extack);
}
/**
diff --git a/include/net/geneve.h b/include/net/geneve.h
index fc6a7e0a874a..bced0b1d9fe4 100644
--- a/include/net/geneve.h
+++ b/include/net/geneve.h
@@ -4,6 +4,8 @@
#include <net/udp_tunnel.h>
+#define GENEVE_UDP_PORT 6081
+
/* Geneve Header:
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
* |Ver| Opt Len |O|C| Rsvd. | Protocol Type |
diff --git a/include/net/ife.h b/include/net/ife.h
index e117617e3c34..7e2538d8585b 100644
--- a/include/net/ife.h
+++ b/include/net/ife.h
@@ -4,7 +4,6 @@
#include <linux/etherdevice.h>
#include <linux/rtnetlink.h>
-#include <linux/module.h>
#include <uapi/linux/ife.h>
#if IS_ENABLED(CONFIG_NET_IFE)
diff --git a/include/net/ip.h b/include/net/ip.h
index 583526aad1d0..2d3cce7c3e8a 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -38,6 +38,10 @@
#define IPV4_MAX_PMTU 65535U /* RFC 2675, Section 5.1 */
#define IPV4_MIN_MTU 68 /* RFC 791 */
+extern unsigned int sysctl_fib_sync_mem;
+extern unsigned int sysctl_fib_sync_mem_min;
+extern unsigned int sysctl_fib_sync_mem_max;
+
struct sock;
struct inet_skb_parm {
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 84097010237c..40105738e2f6 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -19,6 +19,7 @@
#include <linux/notifier.h>
#include <net/dst.h>
#include <net/flow.h>
+#include <net/ip_fib.h>
#include <net/netlink.h>
#include <net/inetpeer.h>
#include <net/fib_notifier.h>
@@ -50,7 +51,8 @@ struct fib6_config {
u32 fc_protocol;
u16 fc_type; /* only 8 bits are used */
u16 fc_delete_all_nh : 1,
- __unused : 15;
+ fc_ignore_dev_down:1,
+ __unused : 14;
struct in6_addr fc_dst;
struct in6_addr fc_src;
@@ -124,13 +126,11 @@ struct rt6_exception {
#define FIB6_MAX_DEPTH 5
struct fib6_nh {
- struct in6_addr nh_gw;
- struct net_device *nh_dev;
- struct lwtunnel_state *nh_lwtstate;
+ struct fib_nh_common nh_common;
- unsigned int nh_flags;
- atomic_t nh_upper_bound;
- int nh_weight;
+#ifdef CONFIG_IPV6_ROUTER_PREF
+ unsigned long last_probe;
+#endif
};
struct fib6_info {
@@ -146,7 +146,7 @@ struct fib6_info {
struct list_head fib6_siblings;
unsigned int fib6_nsiblings;
- atomic_t fib6_ref;
+ refcount_t fib6_ref;
unsigned long expires;
struct dst_metrics *fib6_metrics;
#define fib6_pmtu fib6_metrics->metrics[RTAX_MTU-1]
@@ -159,10 +159,6 @@ struct fib6_info {
struct rt6_info * __percpu *rt6i_pcpu;
struct rt6_exception_bucket __rcu *rt6i_exception_bucket;
-#ifdef CONFIG_IPV6_ROUTER_PREF
- unsigned long last_probe;
-#endif
-
u32 fib6_metric;
u8 fib6_protocol;
u8 fib6_type;
@@ -194,6 +190,14 @@ struct rt6_info {
unsigned short rt6i_nfheader_len;
};
+struct fib6_result {
+ struct fib6_nh *nh;
+ struct fib6_info *f6i;
+ u32 fib6_flags;
+ u8 fib6_type;
+ struct rt6_info *rt6;
+};
+
#define for_each_fib6_node_rt_rcu(fn) \
for (rt = rcu_dereference((fn)->leaf); rt; \
rt = rcu_dereference(rt->fib6_next))
@@ -281,17 +285,17 @@ void fib6_info_destroy_rcu(struct rcu_head *head);
static inline void fib6_info_hold(struct fib6_info *f6i)
{
- atomic_inc(&f6i->fib6_ref);
+ refcount_inc(&f6i->fib6_ref);
}
static inline bool fib6_info_hold_safe(struct fib6_info *f6i)
{
- return atomic_inc_not_zero(&f6i->fib6_ref);
+ return refcount_inc_not_zero(&f6i->fib6_ref);
}
static inline void fib6_info_release(struct fib6_info *f6i)
{
- if (f6i && atomic_dec_and_test(&f6i->fib6_ref))
+ if (f6i && refcount_dec_and_test(&f6i->fib6_ref))
call_rcu(&f6i->rcu, fib6_info_destroy_rcu);
}
@@ -388,18 +392,17 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
/* called with rcu lock held; can return error pointer
* caller needs to select path
*/
-struct fib6_info *fib6_lookup(struct net *net, int oif, struct flowi6 *fl6,
- int flags);
+int fib6_lookup(struct net *net, int oif, struct flowi6 *fl6,
+ struct fib6_result *res, int flags);
/* called with rcu lock held; caller needs to select path */
-struct fib6_info *fib6_table_lookup(struct net *net, struct fib6_table *table,
- int oif, struct flowi6 *fl6, int strict);
-
-struct fib6_info *fib6_multipath_select(const struct net *net,
- struct fib6_info *match,
- struct flowi6 *fl6, int oif,
- const struct sk_buff *skb, int strict);
+int fib6_table_lookup(struct net *net, struct fib6_table *table,
+ int oif, struct flowi6 *fl6, struct fib6_result *res,
+ int strict);
+void fib6_select_path(const struct net *net, struct fib6_result *res,
+ struct flowi6 *fl6, int oif, bool have_oif_match,
+ const struct sk_buff *skb, int strict);
struct fib6_node *fib6_node_lookup(struct fib6_node *root,
const struct in6_addr *daddr,
const struct in6_addr *saddr);
@@ -440,14 +443,13 @@ void rt6_get_prefsrc(const struct rt6_info *rt, struct in6_addr *addr)
static inline struct net_device *fib6_info_nh_dev(const struct fib6_info *f6i)
{
- return f6i->fib6_nh.nh_dev;
+ return f6i->fib6_nh.fib_nh_dev;
}
-static inline
-struct lwtunnel_state *fib6_info_nh_lwt(const struct fib6_info *f6i)
-{
- return f6i->fib6_nh.nh_lwtstate;
-}
+int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
+ struct fib6_config *cfg, gfp_t gfp_flags,
+ struct netlink_ext_ack *extack);
+void fib6_nh_release(struct fib6_nh *fib6_nh);
void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
unsigned int flags);
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 7ab119936e69..4790beaa86e0 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -68,8 +68,9 @@ static inline bool rt6_need_strict(const struct in6_addr *daddr)
static inline bool rt6_qualify_for_ecmp(const struct fib6_info *f6i)
{
- return (f6i->fib6_flags & (RTF_GATEWAY|RTF_ADDRCONF|RTF_DYNAMIC)) ==
- RTF_GATEWAY;
+ /* the RTF_ADDRCONF flag filters out RA's */
+ return !(f6i->fib6_flags & RTF_ADDRCONF) &&
+ f6i->fib6_nh.fib_nh_gw_family;
}
void ip6_route_input(struct sk_buff *skb);
@@ -181,7 +182,7 @@ int rt6_dump_route(struct fib6_info *f6i, void *p_arg);
void rt6_mtu_change(struct net_device *dev, unsigned int mtu);
void rt6_remove_prefsrc(struct inet6_ifaddr *ifp);
void rt6_clean_tohost(struct net *net, struct in6_addr *gateway);
-void rt6_sync_up(struct net_device *dev, unsigned int nh_flags);
+void rt6_sync_up(struct net_device *dev, unsigned char nh_flags);
void rt6_disable_ip(struct net_device *dev, unsigned long event);
void rt6_sync_down_dev(struct net_device *dev, unsigned long event);
void rt6_multipath_rebalance(struct fib6_info *f6i);
@@ -274,9 +275,11 @@ static inline struct in6_addr *rt6_nexthop(struct rt6_info *rt,
static inline bool rt6_duplicate_nexthop(struct fib6_info *a, struct fib6_info *b)
{
- return a->fib6_nh.nh_dev == b->fib6_nh.nh_dev &&
- ipv6_addr_equal(&a->fib6_nh.nh_gw, &b->fib6_nh.nh_gw) &&
- !lwtunnel_cmp_encap(a->fib6_nh.nh_lwtstate, b->fib6_nh.nh_lwtstate);
+ struct fib6_nh *nha = &a->fib6_nh, *nhb = &b->fib6_nh;
+
+ return nha->fib_nh_dev == nhb->fib_nh_dev &&
+ ipv6_addr_equal(&nha->fib_nh_gw6, &nhb->fib_nh_gw6) &&
+ !lwtunnel_cmp_encap(nha->fib_nh_lws, nhb->fib_nh_lws);
}
static inline unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
@@ -300,8 +303,9 @@ static inline unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
return mtu;
}
-u32 ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr,
- struct in6_addr *saddr);
+u32 ip6_mtu_from_fib6(const struct fib6_result *res,
+ const struct in6_addr *daddr,
+ const struct in6_addr *saddr);
struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw,
struct net_device *dev, struct sk_buff *skb,
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 9c8214d2116d..d0e28f4ab099 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -32,10 +32,14 @@ struct fib_config {
u8 fc_protocol;
u8 fc_scope;
u8 fc_type;
- /* 3 bytes unused */
+ u8 fc_gw_family;
+ /* 2 bytes unused */
u32 fc_table;
__be32 fc_dst;
- __be32 fc_gw;
+ union {
+ __be32 fc_gw4;
+ struct in6_addr fc_gw6;
+ };
int fc_oif;
u32 fc_flags;
u32 fc_priority;
@@ -76,27 +80,49 @@ struct fnhe_hash_bucket {
#define FNHE_HASH_SIZE (1 << FNHE_HASH_SHIFT)
#define FNHE_RECLAIM_DEPTH 5
+struct fib_nh_common {
+ struct net_device *nhc_dev;
+ int nhc_oif;
+ unsigned char nhc_scope;
+ u8 nhc_family;
+ u8 nhc_gw_family;
+ unsigned char nhc_flags;
+ struct lwtunnel_state *nhc_lwtstate;
+
+ union {
+ __be32 ipv4;
+ struct in6_addr ipv6;
+ } nhc_gw;
+
+ int nhc_weight;
+ atomic_t nhc_upper_bound;
+
+ /* v4 specific, but allows fib6_nh with v4 routes */
+ struct rtable __rcu * __percpu *nhc_pcpu_rth_output;
+ struct rtable __rcu *nhc_rth_input;
+ struct fnhe_hash_bucket __rcu *nhc_exceptions;
+};
+
struct fib_nh {
- struct net_device *nh_dev;
+ struct fib_nh_common nh_common;
struct hlist_node nh_hash;
struct fib_info *nh_parent;
- unsigned int nh_flags;
- unsigned char nh_scope;
-#ifdef CONFIG_IP_ROUTE_MULTIPATH
- int nh_weight;
- atomic_t nh_upper_bound;
-#endif
#ifdef CONFIG_IP_ROUTE_CLASSID
__u32 nh_tclassid;
#endif
- int nh_oif;
- __be32 nh_gw;
__be32 nh_saddr;
int nh_saddr_genid;
- struct rtable __rcu * __percpu *nh_pcpu_rth_output;
- struct rtable __rcu *nh_rth_input;
- struct fnhe_hash_bucket __rcu *nh_exceptions;
- struct lwtunnel_state *nh_lwtstate;
+#define fib_nh_family nh_common.nhc_family
+#define fib_nh_dev nh_common.nhc_dev
+#define fib_nh_oif nh_common.nhc_oif
+#define fib_nh_flags nh_common.nhc_flags
+#define fib_nh_lws nh_common.nhc_lwtstate
+#define fib_nh_scope nh_common.nhc_scope
+#define fib_nh_gw_family nh_common.nhc_gw_family
+#define fib_nh_gw4 nh_common.nhc_gw.ipv4
+#define fib_nh_gw6 nh_common.nhc_gw.ipv6
+#define fib_nh_weight nh_common.nhc_weight
+#define fib_nh_upper_bound nh_common.nhc_upper_bound
};
/*
@@ -123,9 +149,10 @@ struct fib_info {
#define fib_rtt fib_metrics->metrics[RTAX_RTT-1]
#define fib_advmss fib_metrics->metrics[RTAX_ADVMSS-1]
int fib_nhs;
+ bool fib_nh_is_v6;
struct rcu_head rcu;
struct fib_nh fib_nh[0];
-#define fib_dev fib_nh[0].nh_dev
+#define fib_dev fib_nh[0].fib_nh_dev
};
@@ -135,15 +162,16 @@ struct fib_rule;
struct fib_table;
struct fib_result {
- __be32 prefix;
- unsigned char prefixlen;
- unsigned char nh_sel;
- unsigned char type;
- unsigned char scope;
- u32 tclassid;
- struct fib_info *fi;
- struct fib_table *table;
- struct hlist_head *fa_head;
+ __be32 prefix;
+ unsigned char prefixlen;
+ unsigned char nh_sel;
+ unsigned char type;
+ unsigned char scope;
+ u32 tclassid;
+ struct fib_nh_common *nhc;
+ struct fib_info *fi;
+ struct fib_table *table;
+ struct hlist_head *fa_head;
};
struct fib_result_nl {
@@ -161,11 +189,10 @@ struct fib_result_nl {
int err;
};
-#ifdef CONFIG_IP_ROUTE_MULTIPATH
-#define FIB_RES_NH(res) ((res).fi->fib_nh[(res).nh_sel])
-#else /* CONFIG_IP_ROUTE_MULTIPATH */
-#define FIB_RES_NH(res) ((res).fi->fib_nh[0])
-#endif /* CONFIG_IP_ROUTE_MULTIPATH */
+static inline struct fib_nh_common *fib_info_nhc(struct fib_info *fi, int nhsel)
+{
+ return &fi->fib_nh[nhsel].nh_common;
+}
#ifdef CONFIG_IP_MULTIPLE_TABLES
#define FIB_TABLE_HASHSZ 256
@@ -174,18 +201,11 @@ struct fib_result_nl {
#endif
__be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh);
+__be32 fib_result_prefsrc(struct net *net, struct fib_result *res);
-#define FIB_RES_SADDR(net, res) \
- ((FIB_RES_NH(res).nh_saddr_genid == \
- atomic_read(&(net)->ipv4.dev_addr_genid)) ? \
- FIB_RES_NH(res).nh_saddr : \
- fib_info_update_nh_saddr((net), &FIB_RES_NH(res)))
-#define FIB_RES_GW(res) (FIB_RES_NH(res).nh_gw)
-#define FIB_RES_DEV(res) (FIB_RES_NH(res).nh_dev)
-#define FIB_RES_OIF(res) (FIB_RES_NH(res).nh_oif)
-
-#define FIB_RES_PREFSRC(net, res) ((res).fi->fib_prefsrc ? : \
- FIB_RES_SADDR(net, res))
+#define FIB_RES_NHC(res) ((res).nhc)
+#define FIB_RES_DEV(res) (FIB_RES_NHC(res)->nhc_dev)
+#define FIB_RES_OIF(res) (FIB_RES_NHC(res)->nhc_oif)
struct fib_entry_notifier_info {
struct fib_notifier_info info; /* must be first */
@@ -383,6 +403,8 @@ static inline bool fib4_rules_early_flow_dissect(struct net *net,
/* Exported by fib_frontend.c */
extern const struct nla_policy rtm_ipv4_policy[];
void ip_fib_init(void);
+int fib_gw_from_via(struct fib_config *cfg, struct nlattr *nla,
+ struct netlink_ext_ack *extack);
__be32 fib_compute_spec_dst(struct sk_buff *skb);
bool fib_info_nh_uses_dev(struct fib_info *fi, const struct net_device *dev);
int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
@@ -405,7 +427,7 @@ int fib_unmerge(struct net *net);
int ip_fib_check_default(__be32 gw, struct net_device *dev);
int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force);
int fib_sync_down_addr(struct net_device *dev, __be32 local);
-int fib_sync_up(struct net_device *dev, unsigned int nh_flags);
+int fib_sync_up(struct net_device *dev, unsigned char nh_flags);
void fib_sync_mtu(struct net_device *dev, u32 orig_mtu);
#ifdef CONFIG_IP_ROUTE_MULTIPATH
@@ -416,6 +438,15 @@ void fib_select_multipath(struct fib_result *res, int hash);
void fib_select_path(struct net *net, struct fib_result *res,
struct flowi4 *fl4, const struct sk_buff *skb);
+int fib_nh_init(struct net *net, struct fib_nh *fib_nh,
+ struct fib_config *cfg, int nh_weight,
+ struct netlink_ext_ack *extack);
+void fib_nh_release(struct net *net, struct fib_nh *fib_nh);
+int fib_nh_common_init(struct fib_nh_common *nhc, struct nlattr *fc_encap,
+ u16 fc_encap_type, void *cfg, gfp_t gfp_flags,
+ struct netlink_ext_ack *extack);
+void fib_nh_common_release(struct fib_nh_common *nhc);
+
/* Exported by fib_trie.c */
void fib_trie_init(void);
struct fib_table *fib_trie_table(u32 id, struct fib_table *alias);
@@ -423,10 +454,12 @@ struct fib_table *fib_trie_table(u32 id, struct fib_table *alias);
static inline void fib_combine_itag(u32 *itag, const struct fib_result *res)
{
#ifdef CONFIG_IP_ROUTE_CLASSID
+ struct fib_nh_common *nhc = res->nhc;
+ struct fib_nh *nh = container_of(nhc, struct fib_nh, nh_common);
#ifdef CONFIG_IP_MULTIPLE_TABLES
u32 rtag;
#endif
- *itag = FIB_RES_NH(*res).nh_tclassid<<16;
+ *itag = nh->nh_tclassid << 16;
#ifdef CONFIG_IP_MULTIPLE_TABLES
rtag = res->tclassid;
if (*itag == 0)
@@ -467,4 +500,9 @@ u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr);
int ip_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh,
struct fib_dump_filter *filter,
struct netlink_callback *cb);
+
+int fib_nexthop_info(struct sk_buff *skb, const struct fib_nh_common *nh,
+ unsigned char *flags, bool skip_oif);
+int fib_add_nexthop(struct sk_buff *skb, const struct fib_nh_common *nh,
+ int nh_weight);
#endif /* _NET_FIB_H */
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 047f9a5ccaad..2ac40135b576 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -600,6 +600,9 @@ struct ip_vs_dest_user_kern {
/* Address family of addr */
u16 af;
+
+ u16 tun_type; /* tunnel type */
+ __be16 tun_port; /* tunnel port */
};
@@ -660,6 +663,8 @@ struct ip_vs_dest {
atomic_t conn_flags; /* flags to copy to conn */
atomic_t weight; /* server weight */
atomic_t last_weight; /* server latest weight */
+ __u16 tun_type; /* tunnel type */
+ __be16 tun_port; /* tunnel port */
refcount_t refcnt; /* reference counter */
struct ip_vs_stats stats; /* statistics */
diff --git a/include/net/ipv6_frag.h b/include/net/ipv6_frag.h
index 28aa9b30aece..1f77fb4dc79d 100644
--- a/include/net/ipv6_frag.h
+++ b/include/net/ipv6_frag.h
@@ -94,7 +94,6 @@ ip6frag_expire_frag_queue(struct net *net, struct frag_queue *fq)
goto out;
head->dev = dev;
- skb_get(head);
spin_unlock(&fq->q.lock);
icmpv6_send(head, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0);
diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h
new file mode 100644
index 000000000000..6c0c4fde16f8
--- /dev/null
+++ b/include/net/ipv6_stubs.h
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _IPV6_STUBS_H
+#define _IPV6_STUBS_H
+
+#include <linux/in6.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <net/dst.h>
+#include <net/flow.h>
+#include <net/neighbour.h>
+#include <net/sock.h>
+
+/* structs from net/ip6_fib.h */
+struct fib6_info;
+struct fib6_nh;
+struct fib6_config;
+struct fib6_result;
+
+/* This is ugly, ideally these symbols should be built
+ * into the core kernel.
+ */
+struct ipv6_stub {
+ int (*ipv6_sock_mc_join)(struct sock *sk, int ifindex,
+ const struct in6_addr *addr);
+ int (*ipv6_sock_mc_drop)(struct sock *sk, int ifindex,
+ const struct in6_addr *addr);
+ int (*ipv6_dst_lookup)(struct net *net, struct sock *sk,
+ struct dst_entry **dst, struct flowi6 *fl6);
+ int (*ipv6_route_input)(struct sk_buff *skb);
+
+ struct fib6_table *(*fib6_get_table)(struct net *net, u32 id);
+ int (*fib6_lookup)(struct net *net, int oif, struct flowi6 *fl6,
+ struct fib6_result *res, int flags);
+ int (*fib6_table_lookup)(struct net *net, struct fib6_table *table,
+ int oif, struct flowi6 *fl6,
+ struct fib6_result *res, int flags);
+ void (*fib6_select_path)(const struct net *net, struct fib6_result *res,
+ struct flowi6 *fl6, int oif, bool oif_match,
+ const struct sk_buff *skb, int strict);
+ u32 (*ip6_mtu_from_fib6)(const struct fib6_result *res,
+ const struct in6_addr *daddr,
+ const struct in6_addr *saddr);
+
+ int (*fib6_nh_init)(struct net *net, struct fib6_nh *fib6_nh,
+ struct fib6_config *cfg, gfp_t gfp_flags,
+ struct netlink_ext_ack *extack);
+ void (*fib6_nh_release)(struct fib6_nh *fib6_nh);
+ void (*udpv6_encap_enable)(void);
+ void (*ndisc_send_na)(struct net_device *dev, const struct in6_addr *daddr,
+ const struct in6_addr *solicited_addr,
+ bool router, bool solicited, bool override, bool inc_opt);
+ struct neigh_table *nd_tbl;
+};
+extern const struct ipv6_stub *ipv6_stub __read_mostly;
+
+/* A stub used by bpf helpers. Similarly ugly as ipv6_stub */
+struct ipv6_bpf_stub {
+ int (*inet6_bind)(struct sock *sk, struct sockaddr *uaddr, int addr_len,
+ bool force_bind_address_no_port, bool with_lock);
+ struct sock *(*udp6_lib_lookup)(struct net *net,
+ const struct in6_addr *saddr, __be16 sport,
+ const struct in6_addr *daddr, __be16 dport,
+ int dif, int sdif, struct udp_table *tbl,
+ struct sk_buff *skb);
+};
+extern const struct ipv6_bpf_stub *ipv6_bpf_stub __read_mostly;
+
+#endif
diff --git a/include/net/lwtunnel.h b/include/net/lwtunnel.h
index 671113bcb2cc..5d6c5b1fc695 100644
--- a/include/net/lwtunnel.h
+++ b/include/net/lwtunnel.h
@@ -118,8 +118,8 @@ int lwtunnel_build_state(u16 encap_type,
unsigned int family, const void *cfg,
struct lwtunnel_state **lws,
struct netlink_ext_ack *extack);
-int lwtunnel_fill_encap(struct sk_buff *skb,
- struct lwtunnel_state *lwtstate);
+int lwtunnel_fill_encap(struct sk_buff *skb, struct lwtunnel_state *lwtstate,
+ int encap_attr, int encap_type_attr);
int lwtunnel_get_encap_size(struct lwtunnel_state *lwtstate);
struct lwtunnel_state *lwtunnel_state_alloc(int hdr_len);
int lwtunnel_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b);
@@ -219,7 +219,8 @@ static inline int lwtunnel_build_state(u16 encap_type,
}
static inline int lwtunnel_fill_encap(struct sk_buff *skb,
- struct lwtunnel_state *lwtstate)
+ struct lwtunnel_state *lwtstate,
+ int encap_attr, int encap_type_attr)
{
return 0;
}
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 112dc18c658f..72080d9d617e 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -807,6 +807,7 @@ enum mac80211_tx_info_flags {
* @IEEE80211_TX_CTRL_RATE_INJECT: This frame is injected with rate information
* @IEEE80211_TX_CTRL_AMSDU: This frame is an A-MSDU frame
* @IEEE80211_TX_CTRL_FAST_XMIT: This frame is going through the fast_xmit path
+ * @IEEE80211_TX_CTRL_SKIP_MPATH_LOOKUP: This frame skips mesh path lookup
*
* These flags are used in tx_info->control.flags.
*/
@@ -816,6 +817,7 @@ enum mac80211_tx_control_flags {
IEEE80211_TX_CTRL_RATE_INJECT = BIT(2),
IEEE80211_TX_CTRL_AMSDU = BIT(3),
IEEE80211_TX_CTRL_FAST_XMIT = BIT(4),
+ IEEE80211_TX_CTRL_SKIP_MPATH_LOOKUP = BIT(5),
};
/*
@@ -1697,6 +1699,7 @@ struct wireless_dev *ieee80211_vif_to_wdev(struct ieee80211_vif *vif);
* @IEEE80211_KEY_FLAG_PUT_MIC_SPACE: This flag should be set by the driver for
* a TKIP key if it only requires MIC space. Do not set together with
* @IEEE80211_KEY_FLAG_GENERATE_MMIC on the same key.
+ * @IEEE80211_KEY_FLAG_NO_AUTO_TX: Key needs explicit Tx activation.
*/
enum ieee80211_key_flags {
IEEE80211_KEY_FLAG_GENERATE_IV_MGMT = BIT(0),
@@ -1708,6 +1711,7 @@ enum ieee80211_key_flags {
IEEE80211_KEY_FLAG_RX_MGMT = BIT(6),
IEEE80211_KEY_FLAG_RESERVE_TAILROOM = BIT(7),
IEEE80211_KEY_FLAG_PUT_MIC_SPACE = BIT(8),
+ IEEE80211_KEY_FLAG_NO_AUTO_TX = BIT(9),
};
/**
@@ -1888,6 +1892,24 @@ struct ieee80211_sta_rates {
};
/**
+ * struct ieee80211_sta_txpwr - station txpower configuration
+ *
+ * Used to configure txpower for station.
+ *
+ * @power: indicates the tx power, in dBm, to be used when sending data frames
+ * to the STA.
+ * @type: In particular if TPC %type is NL80211_TX_POWER_LIMITED then tx power
+ * will be less than or equal to specified from userspace, whereas if TPC
+ * %type is NL80211_TX_POWER_AUTOMATIC then it indicates default tx power.
+ * NL80211_TX_POWER_FIXED is not a valid configuration option for
+ * per peer TPC.
+ */
+struct ieee80211_sta_txpwr {
+ s16 power;
+ enum nl80211_tx_power_setting type;
+};
+
+/**
* struct ieee80211_sta - station table entry
*
* A station table entry represents a station we are possibly
@@ -1973,6 +1995,7 @@ struct ieee80211_sta {
bool support_p2p_ps;
u16 max_rc_amsdu_len;
u16 max_tid_amsdu_len[IEEE80211_NUM_TIDS];
+ struct ieee80211_sta_txpwr txpwr;
struct ieee80211_txq *txq[IEEE80211_NUM_TIDS + 1];
@@ -2243,6 +2266,9 @@ struct ieee80211_txq {
* @IEEE80211_HW_SUPPORTS_ONLY_HE_MULTI_BSSID: Hardware supports multi BSSID
* only for HE APs. Applies if @IEEE80211_HW_SUPPORTS_MULTI_BSSID is set.
*
+ * @IEEE80211_HW_EXT_KEY_ID_NATIVE: Driver and hardware are supporting Extended
+ * Key ID and can handle two unicast keys per station for Rx and Tx.
+ *
* @NUM_IEEE80211_HW_FLAGS: number of hardware flags, used for sizing arrays
*/
enum ieee80211_hw_flags {
@@ -2294,6 +2320,7 @@ enum ieee80211_hw_flags {
IEEE80211_HW_TX_STATUS_NO_AMPDU_LEN,
IEEE80211_HW_SUPPORTS_MULTI_BSSID,
IEEE80211_HW_SUPPORTS_ONLY_HE_MULTI_BSSID,
+ IEEE80211_HW_EXT_KEY_ID_NATIVE,
/* keep last, obviously */
NUM_IEEE80211_HW_FLAGS
@@ -3794,6 +3821,9 @@ struct ieee80211_ops {
#endif
void (*sta_notify)(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
enum sta_notify_cmd, struct ieee80211_sta *sta);
+ int (*sta_set_txpwr)(struct ieee80211_hw *hw,
+ struct ieee80211_vif *vif,
+ struct ieee80211_sta *sta);
int (*sta_state)(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
struct ieee80211_sta *sta,
enum ieee80211_sta_state old_state,
diff --git a/include/net/ndisc.h b/include/net/ndisc.h
index ddfbb591e2c5..366150053043 100644
--- a/include/net/ndisc.h
+++ b/include/net/ndisc.h
@@ -2,6 +2,8 @@
#ifndef _NDISC_H
#define _NDISC_H
+#include <net/ipv6_stubs.h>
+
/*
* ICMP codes for neighbour discovery messages
*/
@@ -379,6 +381,14 @@ static inline struct neighbour *__ipv6_neigh_lookup_noref(struct net_device *dev
return ___neigh_lookup_noref(&nd_tbl, neigh_key_eq128, ndisc_hashfn, pkey, dev);
}
+static inline
+struct neighbour *__ipv6_neigh_lookup_noref_stub(struct net_device *dev,
+ const void *pkey)
+{
+ return ___neigh_lookup_noref(ipv6_stub->nd_tbl, neigh_key_eq128,
+ ndisc_hashfn, pkey, dev);
+}
+
static inline struct neighbour *__ipv6_neigh_lookup(struct net_device *dev, const void *pkey)
{
struct neighbour *n;
@@ -409,6 +419,36 @@ static inline void __ipv6_confirm_neigh(struct net_device *dev,
rcu_read_unlock_bh();
}
+static inline void __ipv6_confirm_neigh_stub(struct net_device *dev,
+ const void *pkey)
+{
+ struct neighbour *n;
+
+ rcu_read_lock_bh();
+ n = __ipv6_neigh_lookup_noref_stub(dev, pkey);
+ if (n) {
+ unsigned long now = jiffies;
+
+ /* avoid dirtying neighbour */
+ if (n->confirmed != now)
+ n->confirmed = now;
+ }
+ rcu_read_unlock_bh();
+}
+
+/* uses ipv6_stub and is meant for use outside of IPv6 core */
+static inline struct neighbour *ip_neigh_gw6(struct net_device *dev,
+ const void *addr)
+{
+ struct neighbour *neigh;
+
+ neigh = __ipv6_neigh_lookup_noref_stub(dev, addr);
+ if (unlikely(!neigh))
+ neigh = __neigh_create(ipv6_stub->nd_tbl, addr, dev, false);
+
+ return neigh;
+}
+
int ndisc_init(void);
int ndisc_late_init(void);
diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index 7c1ab9edba03..50a67bd6a434 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -205,6 +205,8 @@ struct neigh_table {
int (*pconstructor)(struct pneigh_entry *);
void (*pdestructor)(struct pneigh_entry *);
void (*proxy_redo)(struct sk_buff *skb);
+ bool (*allow_add)(const struct net_device *dev,
+ struct netlink_ext_ack *extack);
char *id;
struct neigh_parms parms;
struct list_head parms_list;
@@ -498,11 +500,12 @@ static inline int neigh_hh_output(const struct hh_cache *hh, struct sk_buff *skb
return dev_queue_xmit(skb);
}
-static inline int neigh_output(struct neighbour *n, struct sk_buff *skb)
+static inline int neigh_output(struct neighbour *n, struct sk_buff *skb,
+ bool skip_cache)
{
const struct hh_cache *hh = &n->hh;
- if ((n->nud_state & NUD_CONNECTED) && hh->hh_len)
+ if ((n->nud_state & NUD_CONNECTED) && hh->hh_len && !skip_cache)
return neigh_hh_output(hh, skb);
else
return n->output(n, skb);
diff --git a/include/net/netfilter/ipv4/nf_nat_masquerade.h b/include/net/netfilter/ipv4/nf_nat_masquerade.h
deleted file mode 100644
index 13d55206bb9f..000000000000
--- a/include/net/netfilter/ipv4/nf_nat_masquerade.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _NF_NAT_MASQUERADE_IPV4_H_
-#define _NF_NAT_MASQUERADE_IPV4_H_
-
-#include <net/netfilter/nf_nat.h>
-
-unsigned int
-nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum,
- const struct nf_nat_range2 *range,
- const struct net_device *out);
-
-int nf_nat_masquerade_ipv4_register_notifier(void);
-void nf_nat_masquerade_ipv4_unregister_notifier(void);
-
-#endif /*_NF_NAT_MASQUERADE_IPV4_H_ */
diff --git a/include/net/netfilter/ipv6/nf_nat_masquerade.h b/include/net/netfilter/ipv6/nf_nat_masquerade.h
deleted file mode 100644
index 2917bf95c437..000000000000
--- a/include/net/netfilter/ipv6/nf_nat_masquerade.h
+++ /dev/null
@@ -1,11 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _NF_NAT_MASQUERADE_IPV6_H_
-#define _NF_NAT_MASQUERADE_IPV6_H_
-
-unsigned int
-nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range,
- const struct net_device *out);
-int nf_nat_masquerade_ipv6_register_notifier(void);
-void nf_nat_masquerade_ipv6_unregister_notifier(void);
-
-#endif /* _NF_NAT_MASQUERADE_IPV6_H_ */
diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h
index 006e430d1cdf..93ce6b0daaba 100644
--- a/include/net/netfilter/nf_conntrack_expect.h
+++ b/include/net/netfilter/nf_conntrack_expect.h
@@ -48,7 +48,7 @@ struct nf_conntrack_expect {
/* Expectation class */
unsigned int class;
-#ifdef CONFIG_NF_NAT_NEEDED
+#if IS_ENABLED(CONFIG_NF_NAT)
union nf_inet_addr saved_addr;
/* This is the original per-proto part, used to map the
* expected connection the way the recipient expects. */
diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h
index ec52a8dc32fd..44b5a00a9c64 100644
--- a/include/net/netfilter/nf_conntrack_helper.h
+++ b/include/net/netfilter/nf_conntrack_helper.h
@@ -15,6 +15,11 @@
#include <net/netfilter/nf_conntrack_extend.h>
#include <net/netfilter/nf_conntrack_expect.h>
+#define NF_NAT_HELPER_PREFIX "ip_nat_"
+#define NF_NAT_HELPER_NAME(name) NF_NAT_HELPER_PREFIX name
+#define MODULE_ALIAS_NF_NAT_HELPER(name) \
+ MODULE_ALIAS(NF_NAT_HELPER_NAME(name))
+
struct module;
enum nf_ct_helper_flags {
@@ -54,6 +59,8 @@ struct nf_conntrack_helper {
unsigned int queue_num;
/* length of userspace private data stored in nf_conn_help->data */
u16 data_len;
+ /* name of NAT helper module */
+ char nat_mod_name[NF_CT_HELPER_NAME_LEN];
};
/* Must be kept in sync with the classes defined by helpers */
@@ -153,4 +160,21 @@ nf_ct_helper_expectfn_find_by_symbol(const void *symbol);
extern struct hlist_head *nf_ct_helper_hash;
extern unsigned int nf_ct_helper_hsize;
+struct nf_conntrack_nat_helper {
+ struct list_head list;
+ char mod_name[NF_CT_HELPER_NAME_LEN]; /* module name */
+ struct module *module; /* pointer to self */
+};
+
+#define NF_CT_NAT_HELPER_INIT(name) \
+ { \
+ .mod_name = NF_NAT_HELPER_NAME(name), \
+ .module = THIS_MODULE \
+ }
+
+void nf_nat_helper_register(struct nf_conntrack_nat_helper *nat);
+void nf_nat_helper_unregister(struct nf_conntrack_nat_helper *nat);
+int nf_nat_helper_try_module_get(const char *name, u16 l3num,
+ u8 protonum);
+void nf_nat_helper_put(struct nf_conntrack_helper *helper);
#endif /*_NF_CONNTRACK_HELPER_H*/
diff --git a/include/net/netfilter/nf_conntrack_timeout.h b/include/net/netfilter/nf_conntrack_timeout.h
index 3394d75e1c80..00a8fbb2d735 100644
--- a/include/net/netfilter/nf_conntrack_timeout.h
+++ b/include/net/netfilter/nf_conntrack_timeout.h
@@ -88,6 +88,9 @@ static inline unsigned int *nf_ct_timeout_lookup(const struct nf_conn *ct)
int nf_conntrack_timeout_init(void);
void nf_conntrack_timeout_fini(void);
void nf_ct_untimeout(struct net *net, struct nf_ct_timeout *timeout);
+int nf_ct_set_timeout(struct net *net, struct nf_conn *ct, u8 l3num, u8 l4num,
+ const char *timeout_name);
+void nf_ct_destroy_timeout(struct nf_conn *ct);
#else
static inline int nf_conntrack_timeout_init(void)
{
@@ -98,6 +101,18 @@ static inline void nf_conntrack_timeout_fini(void)
{
return;
}
+
+static inline int nf_ct_set_timeout(struct net *net, struct nf_conn *ct,
+ u8 l3num, u8 l4num,
+ const char *timeout_name)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline void nf_ct_destroy_timeout(struct nf_conn *ct)
+{
+ return;
+}
#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h
index cf332c4e0b32..423cda2c6542 100644
--- a/include/net/netfilter/nf_nat.h
+++ b/include/net/netfilter/nf_nat.h
@@ -69,9 +69,9 @@ static inline bool nf_nat_oif_changed(unsigned int hooknum,
#endif
}
-int nf_nat_register_fn(struct net *net, const struct nf_hook_ops *ops,
+int nf_nat_register_fn(struct net *net, u8 pf, const struct nf_hook_ops *ops,
const struct nf_hook_ops *nat_ops, unsigned int ops_count);
-void nf_nat_unregister_fn(struct net *net, const struct nf_hook_ops *ops,
+void nf_nat_unregister_fn(struct net *net, u8 pf, const struct nf_hook_ops *ops,
unsigned int ops_count);
unsigned int nf_nat_packet(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
@@ -98,6 +98,9 @@ void nf_nat_ipv4_unregister_fn(struct net *net, const struct nf_hook_ops *ops);
int nf_nat_ipv6_register_fn(struct net *net, const struct nf_hook_ops *ops);
void nf_nat_ipv6_unregister_fn(struct net *net, const struct nf_hook_ops *ops);
+int nf_nat_inet_register_fn(struct net *net, const struct nf_hook_ops *ops);
+void nf_nat_inet_unregister_fn(struct net *net, const struct nf_hook_ops *ops);
+
unsigned int
nf_nat_inet_fn(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state);
diff --git a/include/net/netfilter/nf_nat_masquerade.h b/include/net/netfilter/nf_nat_masquerade.h
new file mode 100644
index 000000000000..54a14d643c34
--- /dev/null
+++ b/include/net/netfilter/nf_nat_masquerade.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _NF_NAT_MASQUERADE_H_
+#define _NF_NAT_MASQUERADE_H_
+
+#include <net/netfilter/nf_nat.h>
+
+unsigned int
+nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum,
+ const struct nf_nat_range2 *range,
+ const struct net_device *out);
+
+int nf_nat_masquerade_inet_register_notifiers(void);
+void nf_nat_masquerade_inet_unregister_notifiers(void);
+
+unsigned int
+nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range,
+ const struct net_device *out);
+
+#endif /*_NF_NAT_MASQUERADE_H_ */
diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h
index a50a69f5334c..7239105d9d2e 100644
--- a/include/net/netfilter/nf_queue.h
+++ b/include/net/netfilter/nf_queue.h
@@ -119,4 +119,7 @@ nfqueue_hash(const struct sk_buff *skb, u16 queue, u16 queues_total, u8 family,
return queue;
}
+int nf_queue(struct sk_buff *skb, struct nf_hook_state *state,
+ const struct nf_hook_entries *entries, unsigned int index,
+ unsigned int verdict);
#endif /* _NF_QUEUE_H */
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 3e9ab643eedf..5b8624ae4a27 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -2,7 +2,6 @@
#ifndef _NET_NF_TABLES_H
#define _NET_NF_TABLES_H
-#include <linux/module.h>
#include <linux/list.h>
#include <linux/netfilter.h>
#include <linux/netfilter/nfnetlink.h>
@@ -13,6 +12,8 @@
#include <net/netfilter/nf_flow_table.h>
#include <net/netlink.h>
+struct module;
+
#define NFT_JUMP_STACK_SIZE 16
struct nft_pktinfo {
@@ -475,8 +476,6 @@ void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set,
enum nft_trans_phase phase);
int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,
struct nft_set_binding *binding);
-void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
- struct nft_set_binding *binding, bool commit);
void nf_tables_destroy_set(const struct nft_ctx *ctx, struct nft_set *set);
/**
@@ -808,23 +807,6 @@ void nft_expr_destroy(const struct nft_ctx *ctx, struct nft_expr *expr);
int nft_expr_dump(struct sk_buff *skb, unsigned int attr,
const struct nft_expr *expr);
-static inline int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src)
-{
- int err;
-
- if (src->ops->clone) {
- dst->ops = src->ops;
- err = src->ops->clone(dst, src);
- if (err < 0)
- return err;
- } else {
- memcpy(dst, src, src->ops->size);
- }
-
- __module_get(src->ops->type->owner);
- return 0;
-}
-
/**
* struct nft_rule - nf_tables rule
*
@@ -1411,4 +1393,6 @@ struct nft_trans_flowtable {
int __init nft_chain_filter_init(void);
void nft_chain_filter_fini(void);
+void __init nft_chain_route_init(void);
+void nft_chain_route_fini(void);
#endif /* _NET_NF_TABLES_H */
diff --git a/include/net/netlink.h b/include/net/netlink.h
index 23f27b0b3cef..395b4406f4b0 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -183,6 +183,7 @@ enum {
NLA_REJECT,
NLA_EXACT_LEN,
NLA_EXACT_LEN_WARN,
+ NLA_MIN_LEN,
__NLA_TYPE_MAX,
};
@@ -212,6 +213,7 @@ enum nla_policy_validation {
* NLA_NUL_STRING Maximum length of string (excluding NUL)
* NLA_FLAG Unused
* NLA_BINARY Maximum length of attribute payload
+ * NLA_MIN_LEN Minimum length of attribute payload
* NLA_NESTED,
* NLA_NESTED_ARRAY Length verification is done by checking len of
* nested header (or empty); len field is used if
@@ -230,6 +232,7 @@ enum nla_policy_validation {
* it is rejected.
* NLA_EXACT_LEN_WARN Attribute should have exactly this length, a warning
* is logged if it is longer, shorter is rejected.
+ * NLA_MIN_LEN Minimum length of attribute payload
* All other Minimum length of attribute payload
*
* Meaning of `validation_data' field:
@@ -281,7 +284,7 @@ enum nla_policy_validation {
* static const struct nla_policy my_policy[ATTR_MAX+1] = {
* [ATTR_FOO] = { .type = NLA_U16 },
* [ATTR_BAR] = { .type = NLA_STRING, .len = BARSIZ },
- * [ATTR_BAZ] = { .len = sizeof(struct mystruct) },
+ * [ATTR_BAZ] = { .type = NLA_EXACT_LEN, .len = sizeof(struct mystruct) },
* [ATTR_GOO] = { .type = NLA_BITFIELD32, .validation_data = &myvalidflags },
* };
*/
@@ -296,12 +299,31 @@ struct nla_policy {
};
int (*validate)(const struct nlattr *attr,
struct netlink_ext_ack *extack);
+ /* This entry is special, and used for the attribute at index 0
+ * only, and specifies special data about the policy, namely it
+ * specifies the "boundary type" where strict length validation
+ * starts for any attribute types >= this value, also, strict
+ * nesting validation starts here.
+ *
+ * Additionally, it means that NLA_UNSPEC is actually NLA_REJECT
+ * for any types >= this, so need to use NLA_MIN_LEN to get the
+ * previous pure { .len = xyz } behaviour. The advantage of this
+ * is that types not specified in the policy will be rejected.
+ *
+ * For completely new families it should be set to 1 so that the
+ * validation is enforced for all attributes. For existing ones
+ * it should be set at least when new attributes are added to
+ * the enum used by the policy, and be set to the new value that
+ * was added to enforce strict validation from thereon.
+ */
+ u16 strict_start_type;
};
};
#define NLA_POLICY_EXACT_LEN(_len) { .type = NLA_EXACT_LEN, .len = _len }
#define NLA_POLICY_EXACT_LEN_WARN(_len) { .type = NLA_EXACT_LEN_WARN, \
.len = _len }
+#define NLA_POLICY_MIN_LEN(_len) { .type = NLA_MIN_LEN, .len = _len }
#define NLA_POLICY_ETH_ADDR NLA_POLICY_EXACT_LEN(ETH_ALEN)
#define NLA_POLICY_ETH_ADDR_COMPAT NLA_POLICY_EXACT_LEN_WARN(ETH_ALEN)
@@ -365,21 +387,52 @@ struct nl_info {
bool skip_notify;
};
+/**
+ * enum netlink_validation - netlink message/attribute validation levels
+ * @NL_VALIDATE_LIBERAL: Old-style "be liberal" validation, not caring about
+ * extra data at the end of the message, attributes being longer than
+ * they should be, or unknown attributes being present.
+ * @NL_VALIDATE_TRAILING: Reject junk data encountered after attribute parsing.
+ * @NL_VALIDATE_MAXTYPE: Reject attributes > max type; Together with _TRAILING
+ * this is equivalent to the old nla_parse_strict()/nlmsg_parse_strict().
+ * @NL_VALIDATE_UNSPEC: Reject attributes with NLA_UNSPEC in the policy.
+ * This can safely be set by the kernel when the given policy has no
+ * NLA_UNSPEC anymore, and can thus be used to ensure policy entries
+ * are enforced going forward.
+ * @NL_VALIDATE_STRICT_ATTRS: strict attribute policy parsing (e.g.
+ * U8, U16, U32 must have exact size, etc.)
+ * @NL_VALIDATE_NESTED: Check that NLA_F_NESTED is set for NLA_NESTED(_ARRAY)
+ * and unset for other policies.
+ */
+enum netlink_validation {
+ NL_VALIDATE_LIBERAL = 0,
+ NL_VALIDATE_TRAILING = BIT(0),
+ NL_VALIDATE_MAXTYPE = BIT(1),
+ NL_VALIDATE_UNSPEC = BIT(2),
+ NL_VALIDATE_STRICT_ATTRS = BIT(3),
+ NL_VALIDATE_NESTED = BIT(4),
+};
+
+#define NL_VALIDATE_DEPRECATED_STRICT (NL_VALIDATE_TRAILING |\
+ NL_VALIDATE_MAXTYPE)
+#define NL_VALIDATE_STRICT (NL_VALIDATE_TRAILING |\
+ NL_VALIDATE_MAXTYPE |\
+ NL_VALIDATE_UNSPEC |\
+ NL_VALIDATE_STRICT_ATTRS |\
+ NL_VALIDATE_NESTED)
+
int netlink_rcv_skb(struct sk_buff *skb,
int (*cb)(struct sk_buff *, struct nlmsghdr *,
struct netlink_ext_ack *));
int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 portid,
unsigned int group, int report, gfp_t flags);
-int nla_validate(const struct nlattr *head, int len, int maxtype,
- const struct nla_policy *policy,
- struct netlink_ext_ack *extack);
-int nla_parse(struct nlattr **tb, int maxtype, const struct nlattr *head,
- int len, const struct nla_policy *policy,
- struct netlink_ext_ack *extack);
-int nla_parse_strict(struct nlattr **tb, int maxtype, const struct nlattr *head,
- int len, const struct nla_policy *policy,
- struct netlink_ext_ack *extack);
+int __nla_validate(const struct nlattr *head, int len, int maxtype,
+ const struct nla_policy *policy, unsigned int validate,
+ struct netlink_ext_ack *extack);
+int __nla_parse(struct nlattr **tb, int maxtype, const struct nlattr *head,
+ int len, const struct nla_policy *policy, unsigned int validate,
+ struct netlink_ext_ack *extack);
int nla_policy_len(const struct nla_policy *, int);
struct nlattr *nla_find(const struct nlattr *head, int len, int attrtype);
size_t nla_strlcpy(char *dst, const struct nlattr *nla, size_t dstsize);
@@ -508,42 +561,167 @@ nlmsg_next(const struct nlmsghdr *nlh, int *remaining)
}
/**
- * nlmsg_parse - parse attributes of a netlink message
+ * nla_parse - Parse a stream of attributes into a tb buffer
+ * @tb: destination array with maxtype+1 elements
+ * @maxtype: maximum attribute type to be expected
+ * @head: head of attribute stream
+ * @len: length of attribute stream
+ * @policy: validation policy
+ * @extack: extended ACK pointer
+ *
+ * Parses a stream of attributes and stores a pointer to each attribute in
+ * the tb array accessible via the attribute type. Attributes with a type
+ * exceeding maxtype will be rejected, policy must be specified, attributes
+ * will be validated in the strictest way possible.
+ *
+ * Returns 0 on success or a negative error code.
+ */
+static inline int nla_parse(struct nlattr **tb, int maxtype,
+ const struct nlattr *head, int len,
+ const struct nla_policy *policy,
+ struct netlink_ext_ack *extack)
+{
+ return __nla_parse(tb, maxtype, head, len, policy,
+ NL_VALIDATE_STRICT, extack);
+}
+
+/**
+ * nla_parse_deprecated - Parse a stream of attributes into a tb buffer
+ * @tb: destination array with maxtype+1 elements
+ * @maxtype: maximum attribute type to be expected
+ * @head: head of attribute stream
+ * @len: length of attribute stream
+ * @policy: validation policy
+ * @extack: extended ACK pointer
+ *
+ * Parses a stream of attributes and stores a pointer to each attribute in
+ * the tb array accessible via the attribute type. Attributes with a type
+ * exceeding maxtype will be ignored and attributes from the policy are not
+ * always strictly validated (only for new attributes).
+ *
+ * Returns 0 on success or a negative error code.
+ */
+static inline int nla_parse_deprecated(struct nlattr **tb, int maxtype,
+ const struct nlattr *head, int len,
+ const struct nla_policy *policy,
+ struct netlink_ext_ack *extack)
+{
+ return __nla_parse(tb, maxtype, head, len, policy,
+ NL_VALIDATE_LIBERAL, extack);
+}
+
+/**
+ * nla_parse_deprecated_strict - Parse a stream of attributes into a tb buffer
+ * @tb: destination array with maxtype+1 elements
+ * @maxtype: maximum attribute type to be expected
+ * @head: head of attribute stream
+ * @len: length of attribute stream
+ * @policy: validation policy
+ * @extack: extended ACK pointer
+ *
+ * Parses a stream of attributes and stores a pointer to each attribute in
+ * the tb array accessible via the attribute type. Attributes with a type
+ * exceeding maxtype will be rejected as well as trailing data, but the
+ * policy is not completely strictly validated (only for new attributes).
+ *
+ * Returns 0 on success or a negative error code.
+ */
+static inline int nla_parse_deprecated_strict(struct nlattr **tb, int maxtype,
+ const struct nlattr *head,
+ int len,
+ const struct nla_policy *policy,
+ struct netlink_ext_ack *extack)
+{
+ return __nla_parse(tb, maxtype, head, len, policy,
+ NL_VALIDATE_DEPRECATED_STRICT, extack);
+}
+
+/**
+ * __nlmsg_parse - parse attributes of a netlink message
* @nlh: netlink message header
* @hdrlen: length of family specific header
* @tb: destination array with maxtype+1 elements
* @maxtype: maximum attribute type to be expected
* @policy: validation policy
+ * @validate: validation strictness
* @extack: extended ACK report struct
*
* See nla_parse()
*/
-static inline int nlmsg_parse(const struct nlmsghdr *nlh, int hdrlen,
- struct nlattr *tb[], int maxtype,
- const struct nla_policy *policy,
- struct netlink_ext_ack *extack)
+static inline int __nlmsg_parse(const struct nlmsghdr *nlh, int hdrlen,
+ struct nlattr *tb[], int maxtype,
+ const struct nla_policy *policy,
+ unsigned int validate,
+ struct netlink_ext_ack *extack)
{
if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen)) {
NL_SET_ERR_MSG(extack, "Invalid header length");
return -EINVAL;
}
- return nla_parse(tb, maxtype, nlmsg_attrdata(nlh, hdrlen),
- nlmsg_attrlen(nlh, hdrlen), policy, extack);
+ return __nla_parse(tb, maxtype, nlmsg_attrdata(nlh, hdrlen),
+ nlmsg_attrlen(nlh, hdrlen), policy, validate,
+ extack);
}
-static inline int nlmsg_parse_strict(const struct nlmsghdr *nlh, int hdrlen,
- struct nlattr *tb[], int maxtype,
- const struct nla_policy *policy,
- struct netlink_ext_ack *extack)
+/**
+ * nlmsg_parse - parse attributes of a netlink message
+ * @nlh: netlink message header
+ * @hdrlen: length of family specific header
+ * @tb: destination array with maxtype+1 elements
+ * @maxtype: maximum attribute type to be expected
+ * @validate: validation strictness
+ * @extack: extended ACK report struct
+ *
+ * See nla_parse()
+ */
+static inline int nlmsg_parse(const struct nlmsghdr *nlh, int hdrlen,
+ struct nlattr *tb[], int maxtype,
+ const struct nla_policy *policy,
+ struct netlink_ext_ack *extack)
{
- if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen)) {
- NL_SET_ERR_MSG(extack, "Invalid header length");
- return -EINVAL;
- }
+ return __nla_parse(tb, maxtype, nlmsg_attrdata(nlh, hdrlen),
+ nlmsg_attrlen(nlh, hdrlen), policy,
+ NL_VALIDATE_STRICT, extack);
+}
- return nla_parse_strict(tb, maxtype, nlmsg_attrdata(nlh, hdrlen),
- nlmsg_attrlen(nlh, hdrlen), policy, extack);
+/**
+ * nlmsg_parse_deprecated - parse attributes of a netlink message
+ * @nlh: netlink message header
+ * @hdrlen: length of family specific header
+ * @tb: destination array with maxtype+1 elements
+ * @maxtype: maximum attribute type to be expected
+ * @extack: extended ACK report struct
+ *
+ * See nla_parse_deprecated()
+ */
+static inline int nlmsg_parse_deprecated(const struct nlmsghdr *nlh, int hdrlen,
+ struct nlattr *tb[], int maxtype,
+ const struct nla_policy *policy,
+ struct netlink_ext_ack *extack)
+{
+ return __nlmsg_parse(nlh, hdrlen, tb, maxtype, policy,
+ NL_VALIDATE_LIBERAL, extack);
+}
+
+/**
+ * nlmsg_parse_deprecated_strict - parse attributes of a netlink message
+ * @nlh: netlink message header
+ * @hdrlen: length of family specific header
+ * @tb: destination array with maxtype+1 elements
+ * @maxtype: maximum attribute type to be expected
+ * @extack: extended ACK report struct
+ *
+ * See nla_parse_deprecated_strict()
+ */
+static inline int
+nlmsg_parse_deprecated_strict(const struct nlmsghdr *nlh, int hdrlen,
+ struct nlattr *tb[], int maxtype,
+ const struct nla_policy *policy,
+ struct netlink_ext_ack *extack)
+{
+ return __nlmsg_parse(nlh, hdrlen, tb, maxtype, policy,
+ NL_VALIDATE_DEPRECATED_STRICT, extack);
}
/**
@@ -562,26 +740,75 @@ static inline struct nlattr *nlmsg_find_attr(const struct nlmsghdr *nlh,
}
/**
- * nlmsg_validate - validate a netlink message including attributes
+ * nla_validate_deprecated - Validate a stream of attributes
+ * @head: head of attribute stream
+ * @len: length of attribute stream
+ * @maxtype: maximum attribute type to be expected
+ * @policy: validation policy
+ * @validate: validation strictness
+ * @extack: extended ACK report struct
+ *
+ * Validates all attributes in the specified attribute stream against the
+ * specified policy. Validation is done in liberal mode.
+ * See documenation of struct nla_policy for more details.
+ *
+ * Returns 0 on success or a negative error code.
+ */
+static inline int nla_validate_deprecated(const struct nlattr *head, int len,
+ int maxtype,
+ const struct nla_policy *policy,
+ struct netlink_ext_ack *extack)
+{
+ return __nla_validate(head, len, maxtype, policy, NL_VALIDATE_LIBERAL,
+ extack);
+}
+
+/**
+ * nla_validate - Validate a stream of attributes
+ * @head: head of attribute stream
+ * @len: length of attribute stream
+ * @maxtype: maximum attribute type to be expected
+ * @policy: validation policy
+ * @validate: validation strictness
+ * @extack: extended ACK report struct
+ *
+ * Validates all attributes in the specified attribute stream against the
+ * specified policy. Validation is done in strict mode.
+ * See documenation of struct nla_policy for more details.
+ *
+ * Returns 0 on success or a negative error code.
+ */
+static inline int nla_validate(const struct nlattr *head, int len, int maxtype,
+ const struct nla_policy *policy,
+ struct netlink_ext_ack *extack)
+{
+ return __nla_validate(head, len, maxtype, policy, NL_VALIDATE_STRICT,
+ extack);
+}
+
+/**
+ * nlmsg_validate_deprecated - validate a netlink message including attributes
* @nlh: netlinket message header
* @hdrlen: length of familiy specific header
* @maxtype: maximum attribute type to be expected
* @policy: validation policy
* @extack: extended ACK report struct
*/
-static inline int nlmsg_validate(const struct nlmsghdr *nlh,
- int hdrlen, int maxtype,
- const struct nla_policy *policy,
- struct netlink_ext_ack *extack)
+static inline int nlmsg_validate_deprecated(const struct nlmsghdr *nlh,
+ int hdrlen, int maxtype,
+ const struct nla_policy *policy,
+ struct netlink_ext_ack *extack)
{
if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen))
return -EINVAL;
- return nla_validate(nlmsg_attrdata(nlh, hdrlen),
- nlmsg_attrlen(nlh, hdrlen), maxtype, policy,
- extack);
+ return __nla_validate(nlmsg_attrdata(nlh, hdrlen),
+ nlmsg_attrlen(nlh, hdrlen), maxtype,
+ policy, NL_VALIDATE_LIBERAL, extack);
}
+
+
/**
* nlmsg_report - need to report back to application?
* @nlh: netlink message header
@@ -909,8 +1136,32 @@ static inline int nla_parse_nested(struct nlattr *tb[], int maxtype,
const struct nla_policy *policy,
struct netlink_ext_ack *extack)
{
- return nla_parse(tb, maxtype, nla_data(nla), nla_len(nla), policy,
- extack);
+ if (!(nla->nla_type & NLA_F_NESTED)) {
+ NL_SET_ERR_MSG_ATTR(extack, nla, "NLA_F_NESTED is missing");
+ return -EINVAL;
+ }
+
+ return __nla_parse(tb, maxtype, nla_data(nla), nla_len(nla), policy,
+ NL_VALIDATE_STRICT, extack);
+}
+
+/**
+ * nla_parse_nested_deprecated - parse nested attributes
+ * @tb: destination array with maxtype+1 elements
+ * @maxtype: maximum attribute type to be expected
+ * @nla: attribute containing the nested attributes
+ * @policy: validation policy
+ * @extack: extended ACK report struct
+ *
+ * See nla_parse_deprecated()
+ */
+static inline int nla_parse_nested_deprecated(struct nlattr *tb[], int maxtype,
+ const struct nlattr *nla,
+ const struct nla_policy *policy,
+ struct netlink_ext_ack *extack)
+{
+ return __nla_parse(tb, maxtype, nla_data(nla), nla_len(nla), policy,
+ NL_VALIDATE_LIBERAL, extack);
}
/**
@@ -1415,13 +1666,18 @@ static inline void *nla_memdup(const struct nlattr *src, gfp_t gfp)
}
/**
- * nla_nest_start - Start a new level of nested attributes
+ * nla_nest_start_noflag - Start a new level of nested attributes
* @skb: socket buffer to add attributes to
* @attrtype: attribute type of container
*
- * Returns the container attribute
+ * This function exists for backward compatibility to use in APIs which never
+ * marked their nest attributes with NLA_F_NESTED flag. New APIs should use
+ * nla_nest_start() which sets the flag.
+ *
+ * Returns the container attribute or NULL on error
*/
-static inline struct nlattr *nla_nest_start(struct sk_buff *skb, int attrtype)
+static inline struct nlattr *nla_nest_start_noflag(struct sk_buff *skb,
+ int attrtype)
{
struct nlattr *start = (struct nlattr *)skb_tail_pointer(skb);
@@ -1432,6 +1688,21 @@ static inline struct nlattr *nla_nest_start(struct sk_buff *skb, int attrtype)
}
/**
+ * nla_nest_start - Start a new level of nested attributes, with NLA_F_NESTED
+ * @skb: socket buffer to add attributes to
+ * @attrtype: attribute type of container
+ *
+ * Unlike nla_nest_start_noflag(), mark the nest attribute with NLA_F_NESTED
+ * flag. This is the preferred function to use in new code.
+ *
+ * Returns the container attribute or NULL on error
+ */
+static inline struct nlattr *nla_nest_start(struct sk_buff *skb, int attrtype)
+{
+ return nla_nest_start_noflag(skb, attrtype | NLA_F_NESTED);
+}
+
+/**
* nla_nest_end - Finalize nesting of attributes
* @skb: socket buffer the attributes are stored in
* @start: container attribute
@@ -1465,6 +1736,7 @@ static inline void nla_nest_cancel(struct sk_buff *skb, struct nlattr *start)
* @start: container attribute
* @maxtype: maximum attribute type to be expected
* @policy: validation policy
+ * @validate: validation strictness
* @extack: extended ACK report struct
*
* Validates all attributes in the nested attribute stream against the
@@ -1473,12 +1745,22 @@ static inline void nla_nest_cancel(struct sk_buff *skb, struct nlattr *start)
*
* Returns 0 on success or a negative error code.
*/
-static inline int nla_validate_nested(const struct nlattr *start, int maxtype,
- const struct nla_policy *policy,
- struct netlink_ext_ack *extack)
+static inline int __nla_validate_nested(const struct nlattr *start, int maxtype,
+ const struct nla_policy *policy,
+ unsigned int validate,
+ struct netlink_ext_ack *extack)
+{
+ return __nla_validate(nla_data(start), nla_len(start), maxtype, policy,
+ validate, extack);
+}
+
+static inline int
+nla_validate_nested_deprecated(const struct nlattr *start, int maxtype,
+ const struct nla_policy *policy,
+ struct netlink_ext_ack *extack)
{
- return nla_validate(nla_data(start), nla_len(start), maxtype, policy,
- extack);
+ return __nla_validate_nested(start, maxtype, policy,
+ NL_VALIDATE_LIBERAL, extack);
}
/**
diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
index f19b53130bf7..806454e767bf 100644
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -24,9 +24,9 @@ struct nf_generic_net {
struct nf_tcp_net {
unsigned int timeouts[TCP_CONNTRACK_TIMEOUT_MAX];
- unsigned int tcp_loose;
- unsigned int tcp_be_liberal;
- unsigned int tcp_max_retrans;
+ int tcp_loose;
+ int tcp_be_liberal;
+ int tcp_max_retrans;
};
enum udp_conntrack {
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 104a6669e344..7698460a3dd1 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -9,6 +9,7 @@
#include <linux/uidgid.h>
#include <net/inet_frag.h>
#include <linux/rcupdate.h>
+#include <linux/siphash.h>
struct tcpm_hash_bucket;
struct ctl_table_header;
@@ -217,5 +218,6 @@ struct netns_ipv4 {
unsigned int ipmr_seq; /* protected by rtnl_mutex */
atomic_t rt_genid;
+ siphash_key_t ip_id_key;
};
#endif
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index b028a1dc150d..5e61b5a8635d 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -8,6 +8,7 @@
#ifndef __NETNS_IPV6_H__
#define __NETNS_IPV6_H__
#include <net/dst_ops.h>
+#include <uapi/linux/icmpv6.h>
struct ctl_table_header;
@@ -33,6 +34,10 @@ struct netns_sysctl_ipv6 {
int auto_flowlabels;
int icmpv6_time;
int icmpv6_echo_ignore_all;
+ int icmpv6_echo_ignore_multicast;
+ int icmpv6_echo_ignore_anycast;
+ DECLARE_BITMAP(icmpv6_ratemask, ICMPV6_MSG_MAX + 1);
+ unsigned long *icmpv6_ratemask_ptr;
int anycast_src_echo_reply;
int ip_nonlocal_bind;
int fwmark_reflect;
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index d5e7a1af346f..514e3c80ecc1 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -100,6 +100,11 @@ int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct tcf_result *res, bool compat_mode);
#else
+static inline bool tcf_block_shared(struct tcf_block *block)
+{
+ return false;
+}
+
static inline
int tcf_block_get(struct tcf_block **p_block,
struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q,
@@ -372,30 +377,6 @@ static inline bool tcf_exts_has_actions(struct tcf_exts *exts)
}
/**
- * tcf_exts_has_one_action - check if exactly one action is present
- * @exts: tc filter extensions handle
- *
- * Returns true if exactly one action is present.
- */
-static inline bool tcf_exts_has_one_action(struct tcf_exts *exts)
-{
-#ifdef CONFIG_NET_CLS_ACT
- return exts->nr_actions == 1;
-#else
- return false;
-#endif
-}
-
-static inline struct tc_action *tcf_exts_first_action(struct tcf_exts *exts)
-{
-#ifdef CONFIG_NET_CLS_ACT
- return exts->actions[0];
-#else
- return NULL;
-#endif
-}
-
-/**
* tcf_exts_exec - execute tc filter extensions
* @skb: socket buffer
* @exts: tc filter extensions handle
@@ -784,12 +765,14 @@ tc_cls_flower_offload_flow_rule(struct tc_cls_flower_offload *tc_flow_cmd)
enum tc_matchall_command {
TC_CLSMATCHALL_REPLACE,
TC_CLSMATCHALL_DESTROY,
+ TC_CLSMATCHALL_STATS,
};
struct tc_cls_matchall_offload {
struct tc_cls_common_offload common;
enum tc_matchall_command command;
- struct tcf_exts *exts;
+ struct flow_rule *rule;
+ struct flow_stats stats;
unsigned long cookie;
};
diff --git a/include/net/psample.h b/include/net/psample.h
index 9b80f814ab04..37a4df2325b2 100644
--- a/include/net/psample.h
+++ b/include/net/psample.h
@@ -3,7 +3,6 @@
#define __NET_PSAMPLE_H
#include <uapi/linux/psample.h>
-#include <linux/module.h>
#include <linux/list.h>
struct psample_group {
diff --git a/include/net/request_sock.h b/include/net/request_sock.h
index 21a5243fecd1..9dfd7960d90a 100644
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -106,10 +106,8 @@ reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener,
return req;
}
-static inline void reqsk_free(struct request_sock *req)
+static inline void __reqsk_free(struct request_sock *req)
{
- WARN_ON_ONCE(refcount_read(&req->rsk_refcnt) != 0);
-
req->rsk_ops->destructor(req);
if (req->rsk_listener)
sock_put(req->rsk_listener);
@@ -117,6 +115,12 @@ static inline void reqsk_free(struct request_sock *req)
kmem_cache_free(req->rsk_ops->slab, req);
}
+static inline void reqsk_free(struct request_sock *req)
+{
+ WARN_ON_ONCE(refcount_read(&req->rsk_refcnt) != 0);
+ __reqsk_free(req);
+}
+
static inline void reqsk_put(struct request_sock *req)
{
if (refcount_dec_and_test(&req->rsk_refcnt))
diff --git a/include/net/route.h b/include/net/route.h
index 9883dc82f723..96f6c9ae33c2 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -29,6 +29,8 @@
#include <net/flow.h>
#include <net/inet_sock.h>
#include <net/ip_fib.h>
+#include <net/arp.h>
+#include <net/ndisc.h>
#include <linux/in_route.h>
#include <linux/rtnetlink.h>
#include <linux/rcupdate.h>
@@ -55,12 +57,15 @@ struct rtable {
unsigned int rt_flags;
__u16 rt_type;
__u8 rt_is_input;
- __u8 rt_uses_gateway;
+ u8 rt_gw_family;
int rt_iif;
/* Info on neighbour */
- __be32 rt_gateway;
+ union {
+ __be32 rt_gw4;
+ struct in6_addr rt_gw6;
+ };
/* Miscellaneous cached information */
u32 rt_mtu_locked:1,
@@ -82,8 +87,8 @@ static inline bool rt_is_output_route(const struct rtable *rt)
static inline __be32 rt_nexthop(const struct rtable *rt, __be32 daddr)
{
- if (rt->rt_gateway)
- return rt->rt_gateway;
+ if (rt->rt_gw_family == AF_INET)
+ return rt->rt_gw4;
return daddr;
}
@@ -347,4 +352,34 @@ static inline int ip4_dst_hoplimit(const struct dst_entry *dst)
return hoplimit;
}
+static inline struct neighbour *ip_neigh_gw4(struct net_device *dev,
+ __be32 daddr)
+{
+ struct neighbour *neigh;
+
+ neigh = __ipv4_neigh_lookup_noref(dev, daddr);
+ if (unlikely(!neigh))
+ neigh = __neigh_create(&arp_tbl, &daddr, dev, false);
+
+ return neigh;
+}
+
+static inline struct neighbour *ip_neigh_for_gw(struct rtable *rt,
+ struct sk_buff *skb,
+ bool *is_v6gw)
+{
+ struct net_device *dev = rt->dst.dev;
+ struct neighbour *neigh;
+
+ if (likely(rt->rt_gw_family == AF_INET)) {
+ neigh = ip_neigh_gw4(dev, rt->rt_gw4);
+ } else if (rt->rt_gw_family == AF_INET6) {
+ neigh = ip_neigh_gw6(dev, &rt->rt_gw6);
+ *is_v6gw = true;
+ } else {
+ neigh = ip_neigh_gw4(dev, ip_hdr(skb)->daddr);
+ }
+ return neigh;
+}
+
#endif /* _ROUTE_H */
diff --git a/include/net/nexthop.h b/include/net/rtnh.h
index 902ff382a6dc..aa2cfc508f7c 100644
--- a/include/net/nexthop.h
+++ b/include/net/rtnh.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __NET_NEXTHOP_H
-#define __NET_NEXTHOP_H
+#ifndef __NET_RTNH_H
+#define __NET_RTNH_H
#include <linux/rtnetlink.h>
#include <net/netlink.h>
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index a2b38b3deeca..21f434f3ac9e 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -52,10 +52,7 @@ struct qdisc_size_table {
struct qdisc_skb_head {
struct sk_buff *head;
struct sk_buff *tail;
- union {
- u32 qlen;
- atomic_t atomic_qlen;
- };
+ __u32 qlen;
spinlock_t lock;
};
@@ -113,6 +110,9 @@ struct Qdisc {
spinlock_t busylock ____cacheline_aligned_in_smp;
spinlock_t seqlock;
+
+ /* for NOLOCK qdisc, true if there are no enqueued skbs */
+ bool empty;
struct rcu_head rcu;
};
@@ -143,11 +143,24 @@ static inline bool qdisc_is_running(struct Qdisc *qdisc)
return (raw_read_seqcount(&qdisc->running) & 1) ? true : false;
}
+static inline bool qdisc_is_percpu_stats(const struct Qdisc *q)
+{
+ return q->flags & TCQ_F_CPUSTATS;
+}
+
+static inline bool qdisc_is_empty(const struct Qdisc *qdisc)
+{
+ if (qdisc_is_percpu_stats(qdisc))
+ return qdisc->empty;
+ return !qdisc->q.qlen;
+}
+
static inline bool qdisc_run_begin(struct Qdisc *qdisc)
{
if (qdisc->flags & TCQ_F_NOLOCK) {
if (!spin_trylock(&qdisc->seqlock))
return false;
+ qdisc->empty = false;
} else if (qdisc_is_running(qdisc)) {
return false;
}
@@ -351,13 +364,10 @@ struct tcf_proto {
};
struct qdisc_skb_cb {
- union {
- struct {
- unsigned int pkt_len;
- u16 slave_dev_queue_mapping;
- u16 tc_classid;
- };
- struct bpf_flow_keys *flow_keys;
+ struct {
+ unsigned int pkt_len;
+ u16 slave_dev_queue_mapping;
+ u16 tc_classid;
};
#define QDISC_CB_PRIV_LEN 20
unsigned char data[QDISC_CB_PRIV_LEN];
@@ -470,19 +480,27 @@ static inline void qdisc_cb_private_validate(const struct sk_buff *skb, int sz)
BUILD_BUG_ON(sizeof(qcb->data) < sz);
}
+static inline int qdisc_qlen_cpu(const struct Qdisc *q)
+{
+ return this_cpu_ptr(q->cpu_qstats)->qlen;
+}
+
static inline int qdisc_qlen(const struct Qdisc *q)
{
return q->q.qlen;
}
-static inline u32 qdisc_qlen_sum(const struct Qdisc *q)
+static inline int qdisc_qlen_sum(const struct Qdisc *q)
{
- u32 qlen = q->qstats.qlen;
+ __u32 qlen = q->qstats.qlen;
+ int i;
- if (q->flags & TCQ_F_NOLOCK)
- qlen += atomic_read(&q->q.atomic_qlen);
- else
+ if (qdisc_is_percpu_stats(q)) {
+ for_each_possible_cpu(i)
+ qlen += per_cpu_ptr(q->cpu_qstats, i)->qlen;
+ } else {
qlen += q->q.qlen;
+ }
return qlen;
}
@@ -736,7 +754,7 @@ static inline bool qdisc_all_tx_empty(const struct net_device *dev)
struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
const struct Qdisc *q = rcu_dereference(txq->qdisc);
- if (q->q.qlen) {
+ if (!qdisc_is_empty(q)) {
rcu_read_unlock();
return false;
}
@@ -806,11 +824,6 @@ static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
return sch->enqueue(skb, sch, to_free);
}
-static inline bool qdisc_is_percpu_stats(const struct Qdisc *q)
-{
- return q->flags & TCQ_F_CPUSTATS;
-}
-
static inline void _bstats_update(struct gnet_stats_basic_packed *bstats,
__u64 bytes, __u32 packets)
{
@@ -878,14 +891,14 @@ static inline void qdisc_qstats_cpu_backlog_inc(struct Qdisc *sch,
this_cpu_add(sch->cpu_qstats->backlog, qdisc_pkt_len(skb));
}
-static inline void qdisc_qstats_atomic_qlen_inc(struct Qdisc *sch)
+static inline void qdisc_qstats_cpu_qlen_inc(struct Qdisc *sch)
{
- atomic_inc(&sch->q.atomic_qlen);
+ this_cpu_inc(sch->cpu_qstats->qlen);
}
-static inline void qdisc_qstats_atomic_qlen_dec(struct Qdisc *sch)
+static inline void qdisc_qstats_cpu_qlen_dec(struct Qdisc *sch)
{
- atomic_dec(&sch->q.atomic_qlen);
+ this_cpu_dec(sch->cpu_qstats->qlen);
}
static inline void qdisc_qstats_cpu_requeues_inc(struct Qdisc *sch)
@@ -1095,6 +1108,32 @@ static inline struct sk_buff *qdisc_peek_dequeued(struct Qdisc *sch)
return skb;
}
+static inline void qdisc_update_stats_at_dequeue(struct Qdisc *sch,
+ struct sk_buff *skb)
+{
+ if (qdisc_is_percpu_stats(sch)) {
+ qdisc_qstats_cpu_backlog_dec(sch, skb);
+ qdisc_bstats_cpu_update(sch, skb);
+ qdisc_qstats_cpu_qlen_dec(sch);
+ } else {
+ qdisc_qstats_backlog_dec(sch, skb);
+ qdisc_bstats_update(sch, skb);
+ sch->q.qlen--;
+ }
+}
+
+static inline void qdisc_update_stats_at_enqueue(struct Qdisc *sch,
+ unsigned int pkt_len)
+{
+ if (qdisc_is_percpu_stats(sch)) {
+ qdisc_qstats_cpu_qlen_inc(sch);
+ this_cpu_add(sch->cpu_qstats->backlog, pkt_len);
+ } else {
+ sch->qstats.backlog += pkt_len;
+ sch->q.qlen++;
+ }
+}
+
/* use instead of qdisc->dequeue() for all qdiscs queried with ->peek() */
static inline struct sk_buff *qdisc_dequeue_peeked(struct Qdisc *sch)
{
@@ -1102,8 +1141,13 @@ static inline struct sk_buff *qdisc_dequeue_peeked(struct Qdisc *sch)
if (skb) {
skb = __skb_dequeue(&sch->gso_skb);
- qdisc_qstats_backlog_dec(sch, skb);
- sch->q.qlen--;
+ if (qdisc_is_percpu_stats(sch)) {
+ qdisc_qstats_cpu_backlog_dec(sch, skb);
+ qdisc_qstats_cpu_qlen_dec(sch);
+ } else {
+ qdisc_qstats_backlog_dec(sch, skb);
+ sch->q.qlen--;
+ }
} else {
skb = sch->dequeue(sch);
}
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 1d13ec3f2707..eefdfa5abf6e 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -421,7 +421,7 @@ static inline void sctp_skb_set_owner_r(struct sk_buff *skb, struct sock *sk)
/*
* This mimics the behavior of skb_set_owner_r
*/
- sk->sk_forward_alloc -= event->rmem_len;
+ sk_mem_charge(sk, event->rmem_len);
}
/* Tests if the list has one and only one entry. */
diff --git a/include/net/sctp/ulpqueue.h b/include/net/sctp/ulpqueue.h
index bb0ecba3db2b..f4ac7117ff29 100644
--- a/include/net/sctp/ulpqueue.h
+++ b/include/net/sctp/ulpqueue.h
@@ -59,7 +59,7 @@ void sctp_ulpq_free(struct sctp_ulpq *);
int sctp_ulpq_tail_data(struct sctp_ulpq *, struct sctp_chunk *, gfp_t);
/* Add a new event for propagation to the ULP. */
-int sctp_ulpq_tail_event(struct sctp_ulpq *, struct sctp_ulpevent *ev);
+int sctp_ulpq_tail_event(struct sctp_ulpq *, struct sk_buff_head *skb_list);
/* Renege previously received chunks. */
void sctp_ulpq_renege(struct sctp_ulpq *, struct sctp_chunk *, gfp_t);
diff --git a/include/net/sock.h b/include/net/sock.h
index 341f8bafa0cf..4d208c0f9c14 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -236,6 +236,8 @@ struct sock_common {
/* public: */
};
+struct bpf_sk_storage;
+
/**
* struct sock - network layer representation of sockets
* @__sk_common: shared layout with inet_timewait_sock
@@ -368,6 +370,7 @@ struct sock {
atomic_t sk_drops;
int sk_rcvlowat;
struct sk_buff_head sk_error_queue;
+ struct sk_buff *sk_rx_skb_cache;
struct sk_buff_head sk_receive_queue;
/*
* The backlog queue is special, it is always used with
@@ -414,6 +417,7 @@ struct sock {
struct sk_buff *sk_send_head;
struct rb_root tcp_rtx_queue;
};
+ struct sk_buff *sk_tx_skb_cache;
struct sk_buff_head sk_write_queue;
__s32 sk_peek_off;
int sk_write_pending;
@@ -508,6 +512,9 @@ struct sock {
#endif
void (*sk_destruct)(struct sock *sk);
struct sock_reuseport __rcu *sk_reuseport_cb;
+#ifdef CONFIG_BPF_SYSCALL
+ struct bpf_sk_storage __rcu *sk_bpf_storage;
+#endif
struct rcu_head sk_rcu;
};
@@ -966,7 +973,7 @@ static inline void sock_rps_record_flow_hash(__u32 hash)
static inline void sock_rps_record_flow(const struct sock *sk)
{
#ifdef CONFIG_RPS
- if (static_key_false(&rfs_needed)) {
+ if (static_branch_unlikely(&rfs_needed)) {
/* Reading sk->sk_rxhash might incur an expensive cache line
* miss.
*
@@ -1466,6 +1473,11 @@ static inline void sk_wmem_free_skb(struct sock *sk, struct sk_buff *skb)
sock_set_flag(sk, SOCK_QUEUE_SHRUNK);
sk->sk_wmem_queued -= skb->truesize;
sk_mem_uncharge(sk, skb->truesize);
+ if (!sk->sk_tx_skb_cache) {
+ skb_zcopy_clear(skb, true);
+ sk->sk_tx_skb_cache = skb;
+ return;
+ }
__kfree_skb(skb);
}
@@ -1607,6 +1619,8 @@ int sock_setsockopt(struct socket *sock, int level, int op,
int sock_getsockopt(struct socket *sock, int level, int op,
char __user *optval, int __user *optlen);
+int sock_gettstamp(struct socket *sock, void __user *userstamp,
+ bool timeval, bool time32);
struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
int noblock, int *errcode);
struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
@@ -2427,6 +2441,15 @@ static inline void skb_setup_tx_timestamp(struct sk_buff *skb, __u16 tsflags)
static inline void sk_eat_skb(struct sock *sk, struct sk_buff *skb)
{
__skb_unlink(skb, &sk->sk_receive_queue);
+ if (
+#ifdef CONFIG_RPS
+ !static_branch_unlikely(&rps_needed) &&
+#endif
+ !sk->sk_rx_skb_cache) {
+ sk->sk_rx_skb_cache = skb;
+ skb_orphan(skb);
+ return;
+ }
__kfree_skb(skb);
}
@@ -2487,8 +2510,6 @@ static inline bool sk_listener(const struct sock *sk)
}
void sock_enable_timestamp(struct sock *sk, int flag);
-int sock_get_timestamp(struct sock *, struct timeval __user *);
-int sock_get_timestampns(struct sock *, struct timespec __user *);
int sock_recv_errqueue(struct sock *sk, struct msghdr *msg, int len, int level,
int type);
diff --git a/include/net/tc_act/tc_ife.h b/include/net/tc_act/tc_ife.h
index 86d13b01b39d..c7f24a2da1ca 100644
--- a/include/net/tc_act/tc_ife.h
+++ b/include/net/tc_act/tc_ife.h
@@ -5,7 +5,8 @@
#include <net/act_api.h>
#include <linux/etherdevice.h>
#include <linux/rtnetlink.h>
-#include <linux/module.h>
+
+struct module;
struct tcf_ife_params {
u8 eth_dst[ETH_ALEN];
diff --git a/include/net/tc_act/tc_police.h b/include/net/tc_act/tc_police.h
new file mode 100644
index 000000000000..8b9ef3664262
--- /dev/null
+++ b/include/net/tc_act/tc_police.h
@@ -0,0 +1,70 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __NET_TC_POLICE_H
+#define __NET_TC_POLICE_H
+
+#include <net/act_api.h>
+
+struct tcf_police_params {
+ int tcfp_result;
+ u32 tcfp_ewma_rate;
+ s64 tcfp_burst;
+ u32 tcfp_mtu;
+ s64 tcfp_mtu_ptoks;
+ struct psched_ratecfg rate;
+ bool rate_present;
+ struct psched_ratecfg peak;
+ bool peak_present;
+ struct rcu_head rcu;
+};
+
+struct tcf_police {
+ struct tc_action common;
+ struct tcf_police_params __rcu *params;
+
+ spinlock_t tcfp_lock ____cacheline_aligned_in_smp;
+ s64 tcfp_toks;
+ s64 tcfp_ptoks;
+ s64 tcfp_t_c;
+};
+
+#define to_police(pc) ((struct tcf_police *)pc)
+
+/* old policer structure from before tc actions */
+struct tc_police_compat {
+ u32 index;
+ int action;
+ u32 limit;
+ u32 burst;
+ u32 mtu;
+ struct tc_ratespec rate;
+ struct tc_ratespec peakrate;
+};
+
+static inline bool is_tcf_police(const struct tc_action *act)
+{
+#ifdef CONFIG_NET_CLS_ACT
+ if (act->ops && act->ops->id == TCA_ID_POLICE)
+ return true;
+#endif
+ return false;
+}
+
+static inline u64 tcf_police_rate_bytes_ps(const struct tc_action *act)
+{
+ struct tcf_police *police = to_police(act);
+ struct tcf_police_params *params;
+
+ params = rcu_dereference_bh(police->params);
+ return params->rate.rate_bytes_ps;
+}
+
+static inline s64 tcf_police_tcfp_burst(const struct tc_action *act)
+{
+ struct tcf_police *police = to_police(act);
+ struct tcf_police_params *params;
+
+ params = rcu_dereference_bh(police->params);
+ return params->tcfp_burst;
+}
+
+#endif /* __NET_TC_POLICE_H */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 68ee02523b87..7cf1181630a3 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1314,7 +1314,7 @@ static inline void tcp_update_wl(struct tcp_sock *tp, u32 seq)
static inline __sum16 tcp_v4_check(int len, __be32 saddr,
__be32 daddr, __wsum base)
{
- return csum_tcpudp_magic(saddr,daddr,len,IPPROTO_TCP,base);
+ return csum_tcpudp_magic(saddr, daddr, len, IPPROTO_TCP, base);
}
static inline bool tcp_checksum_complete(struct sk_buff *skb)
diff --git a/include/net/tls.h b/include/net/tls.h
index 5934246b2c6f..39ea62f0c1f6 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -60,6 +60,17 @@
#define TLS_AAD_SPACE_SIZE 13
#define TLS_DEVICE_NAME_MAX 32
+#define MAX_IV_SIZE 16
+
+/* For AES-CCM, the full 16-bytes of IV is made of '4' fields of given sizes.
+ *
+ * IV[16] = b0[1] || implicit nonce[4] || explicit nonce[8] || length[3]
+ *
+ * The field 'length' is encoded in field 'b0' as '(length width - 1)'.
+ * Hence b0 contains (3 - 1) = 2.
+ */
+#define TLS_AES_CCM_IV_B0_BYTE 2
+
/*
* This structure defines the routines for Inline TLS driver.
* The following routines are optional and filled with a
@@ -123,8 +134,7 @@ struct tls_rec {
struct scatterlist sg_content_type;
char aad_space[TLS_AAD_SPACE_SIZE];
- u8 iv_data[TLS_CIPHER_AES_GCM_128_IV_SIZE +
- TLS_CIPHER_AES_GCM_128_SALT_SIZE];
+ u8 iv_data[MAX_IV_SIZE];
struct aead_request aead_req;
u8 aead_req_ctx[];
};
@@ -219,6 +229,7 @@ struct tls_prot_info {
u16 tag_size;
u16 overhead_size;
u16 iv_size;
+ u16 salt_size;
u16 rec_seq_size;
u16 aad_size;
u16 tail_size;
@@ -266,6 +277,23 @@ struct tls_context {
void (*unhash)(struct sock *sk);
};
+enum tls_offload_ctx_dir {
+ TLS_OFFLOAD_CTX_DIR_RX,
+ TLS_OFFLOAD_CTX_DIR_TX,
+};
+
+struct tlsdev_ops {
+ int (*tls_dev_add)(struct net_device *netdev, struct sock *sk,
+ enum tls_offload_ctx_dir direction,
+ struct tls_crypto_info *crypto_info,
+ u32 start_offload_tcp_sn);
+ void (*tls_dev_del)(struct net_device *netdev,
+ struct tls_context *ctx,
+ enum tls_offload_ctx_dir direction);
+ void (*tls_dev_resync_rx)(struct net_device *netdev,
+ struct sock *sk, u32 seq, u64 rcd_sn);
+};
+
struct tls_offload_context_rx {
/* sw must be the first member of tls_offload_context_rx */
struct tls_sw_context_rx sw;
@@ -306,7 +334,6 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx);
int tls_device_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
int tls_device_sendpage(struct sock *sk, struct page *page,
int offset, size_t size, int flags);
-void tls_device_sk_destruct(struct sock *sk);
void tls_device_free_resources_tx(struct sock *sk);
void tls_device_init(void);
void tls_device_cleanup(void);
@@ -325,7 +352,6 @@ static inline u32 tls_record_start_seq(struct tls_record_info *rec)
return rec->end_seq - rec->len;
}
-void tls_sk_destruct(struct sock *sk, struct tls_context *ctx);
int tls_push_sg(struct sock *sk, struct tls_context *ctx,
struct scatterlist *sg, u16 first_offset,
int flags);
@@ -536,7 +562,7 @@ static inline void tls_offload_rx_resync_request(struct sock *sk, __be32 seq)
struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_offload_context_rx *rx_ctx = tls_offload_ctx_rx(tls_ctx);
- atomic64_set(&rx_ctx->resync_req, ((((uint64_t)seq) << 32) | 1));
+ atomic64_set(&rx_ctx->resync_req, ((u64)ntohl(seq) << 32) | 1);
}
diff --git a/include/net/udp.h b/include/net/udp.h
index fd6d948755c8..d8ce937bc395 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -269,13 +269,13 @@ void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len);
int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb);
void udp_skb_destructor(struct sock *sk, struct sk_buff *skb);
struct sk_buff *__skb_recv_udp(struct sock *sk, unsigned int flags,
- int noblock, int *peeked, int *off, int *err);
+ int noblock, int *off, int *err);
static inline struct sk_buff *skb_recv_udp(struct sock *sk, unsigned int flags,
int noblock, int *err)
{
- int peeked, off = 0;
+ int off = 0;
- return __skb_recv_udp(sk, flags, noblock, &peeked, &off, err);
+ return __skb_recv_udp(sk, flags, noblock, &off, err);
}
int udp_v4_early_demux(struct sk_buff *skb);
diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h
index b8137953fea3..4b1f95e08307 100644
--- a/include/net/udp_tunnel.h
+++ b/include/net/udp_tunnel.h
@@ -7,7 +7,7 @@
#if IS_ENABLED(CONFIG_IPV6)
#include <net/ipv6.h>
-#include <net/addrconf.h>
+#include <net/ipv6_stubs.h>
#endif
struct udp_port_cfg {
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index 00254a58824b..83b5999a2587 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -8,6 +8,8 @@
#include <net/rtnetlink.h>
#include <net/switchdev.h>
+#define IANA_VXLAN_UDP_PORT 4789
+
/* VXLAN protocol (RFC 7348) header:
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
* |R|R|R|R|I|R|R|R| Reserved |
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 99f722c4d804..a2907873ed56 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -132,6 +132,17 @@ struct xfrm_state_offload {
u8 flags;
};
+struct xfrm_mode {
+ u8 encap;
+ u8 family;
+ u8 flags;
+};
+
+/* Flags for xfrm_mode. */
+enum {
+ XFRM_MODE_FLAG_TUNNEL = 1,
+};
+
/* Full description of state of transformer. */
struct xfrm_state {
possible_net_t xs_net;
@@ -234,9 +245,9 @@ struct xfrm_state {
/* Reference to data common to all the instances of this
* transformer. */
const struct xfrm_type *type;
- struct xfrm_mode *inner_mode;
- struct xfrm_mode *inner_mode_iaf;
- struct xfrm_mode *outer_mode;
+ struct xfrm_mode inner_mode;
+ struct xfrm_mode inner_mode_iaf;
+ struct xfrm_mode outer_mode;
const struct xfrm_type_offload *type_offload;
@@ -316,13 +327,6 @@ struct xfrm_policy_afinfo {
xfrm_address_t *saddr,
xfrm_address_t *daddr,
u32 mark);
- void (*decode_session)(struct sk_buff *skb,
- struct flowi *fl,
- int reverse);
- int (*get_tos)(const struct flowi *fl);
- int (*init_path)(struct xfrm_dst *path,
- struct dst_entry *dst,
- int nfheader_len);
int (*fill_dst)(struct xfrm_dst *xdst,
struct net_device *dev,
const struct flowi *fl);
@@ -348,7 +352,6 @@ struct xfrm_state_afinfo {
struct module *owner;
const struct xfrm_type *type_map[IPPROTO_MAX];
const struct xfrm_type_offload *type_offload_map[IPPROTO_MAX];
- struct xfrm_mode *mode_map[XFRM_MODE_MAX];
int (*init_flags)(struct xfrm_state *x);
void (*init_tempsel)(struct xfrm_selector *sel,
@@ -423,78 +426,6 @@ struct xfrm_type_offload {
int xfrm_register_type_offload(const struct xfrm_type_offload *type, unsigned short family);
int xfrm_unregister_type_offload(const struct xfrm_type_offload *type, unsigned short family);
-struct xfrm_mode {
- /*
- * Remove encapsulation header.
- *
- * The IP header will be moved over the top of the encapsulation
- * header.
- *
- * On entry, the transport header shall point to where the IP header
- * should be and the network header shall be set to where the IP
- * header currently is. skb->data shall point to the start of the
- * payload.
- */
- int (*input2)(struct xfrm_state *x, struct sk_buff *skb);
-
- /*
- * This is the actual input entry point.
- *
- * For transport mode and equivalent this would be identical to
- * input2 (which does not need to be set). While tunnel mode
- * and equivalent would set this to the tunnel encapsulation function
- * xfrm4_prepare_input that would in turn call input2.
- */
- int (*input)(struct xfrm_state *x, struct sk_buff *skb);
-
- /*
- * Add encapsulation header.
- *
- * On exit, the transport header will be set to the start of the
- * encapsulation header to be filled in by x->type->output and
- * the mac header will be set to the nextheader (protocol for
- * IPv4) field of the extension header directly preceding the
- * encapsulation header, or in its absence, that of the top IP
- * header. The value of the network header will always point
- * to the top IP header while skb->data will point to the payload.
- */
- int (*output2)(struct xfrm_state *x,struct sk_buff *skb);
-
- /*
- * This is the actual output entry point.
- *
- * For transport mode and equivalent this would be identical to
- * output2 (which does not need to be set). While tunnel mode
- * and equivalent would set this to a tunnel encapsulation function
- * (xfrm4_prepare_output or xfrm6_prepare_output) that would in turn
- * call output2.
- */
- int (*output)(struct xfrm_state *x, struct sk_buff *skb);
-
- /*
- * Adjust pointers into the packet and do GSO segmentation.
- */
- struct sk_buff *(*gso_segment)(struct xfrm_state *x, struct sk_buff *skb, netdev_features_t features);
-
- /*
- * Adjust pointers into the packet when IPsec is done at layer2.
- */
- void (*xmit)(struct xfrm_state *x, struct sk_buff *skb);
-
- struct xfrm_state_afinfo *afinfo;
- struct module *owner;
- unsigned int encap;
- int flags;
-};
-
-/* Flags for xfrm_mode. */
-enum {
- XFRM_MODE_FLAG_TUNNEL = 1,
-};
-
-int xfrm_register_mode(struct xfrm_mode *mode, int family);
-int xfrm_unregister_mode(struct xfrm_mode *mode, int family);
-
static inline int xfrm_af2proto(unsigned int family)
{
switch(family) {
@@ -507,13 +438,13 @@ static inline int xfrm_af2proto(unsigned int family)
}
}
-static inline struct xfrm_mode *xfrm_ip2inner_mode(struct xfrm_state *x, int ipproto)
+static inline const struct xfrm_mode *xfrm_ip2inner_mode(struct xfrm_state *x, int ipproto)
{
if ((ipproto == IPPROTO_IPIP && x->props.family == AF_INET) ||
(ipproto == IPPROTO_IPV6 && x->props.family == AF_INET6))
- return x->inner_mode;
+ return &x->inner_mode;
else
- return x->inner_mode_iaf;
+ return &x->inner_mode_iaf;
}
struct xfrm_tmpl {
@@ -1623,7 +1554,6 @@ int xfrm_init_replay(struct xfrm_state *x);
int xfrm_state_mtu(struct xfrm_state *x, int mtu);
int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload);
int xfrm_init_state(struct xfrm_state *x);
-int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb);
int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type);
int xfrm_input_resume(struct sk_buff *skb, int nexthdr);
int xfrm_trans_queue(struct sk_buff *skb,
@@ -1631,7 +1561,11 @@ int xfrm_trans_queue(struct sk_buff *skb,
struct sk_buff *));
int xfrm_output_resume(struct sk_buff *skb, int err);
int xfrm_output(struct sock *sk, struct sk_buff *skb);
-int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb);
+
+#if IS_ENABLED(CONFIG_NET_PKTGEN)
+int pktgen_xfrm_outer_mode_output(struct xfrm_state *x, struct sk_buff *skb);
+#endif
+
void xfrm_local_error(struct sk_buff *skb, int mtu);
int xfrm4_extract_header(struct sk_buff *skb);
int xfrm4_extract_input(struct xfrm_state *x, struct sk_buff *skb);
@@ -1650,10 +1584,8 @@ static inline int xfrm4_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi)
}
int xfrm4_extract_output(struct xfrm_state *x, struct sk_buff *skb);
-int xfrm4_prepare_output(struct xfrm_state *x, struct sk_buff *skb);
int xfrm4_output(struct net *net, struct sock *sk, struct sk_buff *skb);
int xfrm4_output_finish(struct sock *sk, struct sk_buff *skb);
-int xfrm4_rcv_cb(struct sk_buff *skb, u8 protocol, int err);
int xfrm4_protocol_register(struct xfrm4_protocol *handler, unsigned char protocol);
int xfrm4_protocol_deregister(struct xfrm4_protocol *handler, unsigned char protocol);
int xfrm4_tunnel_register(struct xfrm_tunnel *handler, unsigned short family);
@@ -1669,7 +1601,6 @@ int xfrm6_rcv(struct sk_buff *skb);
int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr,
xfrm_address_t *saddr, u8 proto);
void xfrm6_local_error(struct sk_buff *skb, u32 mtu);
-int xfrm6_rcv_cb(struct sk_buff *skb, u8 protocol, int err);
int xfrm6_protocol_register(struct xfrm6_protocol *handler, unsigned char protocol);
int xfrm6_protocol_deregister(struct xfrm6_protocol *handler, unsigned char protocol);
int xfrm6_tunnel_register(struct xfrm6_tunnel *handler, unsigned short family);
@@ -1677,7 +1608,6 @@ int xfrm6_tunnel_deregister(struct xfrm6_tunnel *handler, unsigned short family)
__be32 xfrm6_tunnel_alloc_spi(struct net *net, xfrm_address_t *saddr);
__be32 xfrm6_tunnel_spi_lookup(struct net *net, const xfrm_address_t *saddr);
int xfrm6_extract_output(struct xfrm_state *x, struct sk_buff *skb);
-int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb);
int xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb);
int xfrm6_output_finish(struct sock *sk, struct sk_buff *skb);
int xfrm6_find_1stfragopt(struct xfrm_state *x, struct sk_buff *skb,
@@ -2069,7 +1999,7 @@ static inline int xfrm_tunnel_check(struct sk_buff *skb, struct xfrm_state *x,
tunnel = true;
break;
}
- if (tunnel && !(x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL))
+ if (tunnel && !(x->outer_mode.flags & XFRM_MODE_FLAG_TUNNEL))
return -EINVAL;
return 0;
diff --git a/include/trace/bpf_probe.h b/include/trace/bpf_probe.h
index 505dae0bed80..d6e556c0a085 100644
--- a/include/trace/bpf_probe.h
+++ b/include/trace/bpf_probe.h
@@ -69,8 +69,7 @@ __bpf_trace_##call(void *__data, proto) \
* to make sure that if the tracepoint handling changes, the
* bpf probe will fail to compile unless it too is updated.
*/
-#undef DEFINE_EVENT
-#define DEFINE_EVENT(template, call, proto, args) \
+#define __DEFINE_EVENT(template, call, proto, args, size) \
static inline void bpf_test_probe_##call(void) \
{ \
check_trace_callback_type_##call(__bpf_trace_##template); \
@@ -81,12 +80,36 @@ __bpf_trace_tp_map_##call = { \
.tp = &__tracepoint_##call, \
.bpf_func = (void *)__bpf_trace_##template, \
.num_args = COUNT_ARGS(args), \
+ .writable_size = size, \
};
+#define FIRST(x, ...) x
+
+#undef DEFINE_EVENT_WRITABLE
+#define DEFINE_EVENT_WRITABLE(template, call, proto, args, size) \
+static inline void bpf_test_buffer_##call(void) \
+{ \
+ /* BUILD_BUG_ON() is ignored if the code is completely eliminated, but \
+ * BUILD_BUG_ON_ZERO() uses a different mechanism that is not \
+ * dead-code-eliminated. \
+ */ \
+ FIRST(proto); \
+ (void)BUILD_BUG_ON_ZERO(size != sizeof(*FIRST(args))); \
+} \
+__DEFINE_EVENT(template, call, PARAMS(proto), PARAMS(args), size)
+
+#undef DEFINE_EVENT
+#define DEFINE_EVENT(template, call, proto, args) \
+ __DEFINE_EVENT(template, call, PARAMS(proto), PARAMS(args), 0)
#undef DEFINE_EVENT_PRINT
#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \
DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
+
+#undef DEFINE_EVENT_WRITABLE
+#undef __DEFINE_EVENT
+#undef FIRST
+
#endif /* CONFIG_BPF_EVENTS */
diff --git a/include/trace/events/bpf_test_run.h b/include/trace/events/bpf_test_run.h
new file mode 100644
index 000000000000..265447e3f71a
--- /dev/null
+++ b/include/trace/events/bpf_test_run.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM bpf_test_run
+
+#if !defined(_TRACE_BPF_TEST_RUN_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_BPF_TEST_RUN_H
+
+#include <linux/tracepoint.h>
+
+DECLARE_EVENT_CLASS(bpf_test_finish,
+
+ TP_PROTO(int *err),
+
+ TP_ARGS(err),
+
+ TP_STRUCT__entry(
+ __field(int, err)
+ ),
+
+ TP_fast_assign(
+ __entry->err = *err;
+ ),
+
+ TP_printk("bpf_test_finish with err=%d", __entry->err)
+);
+
+#ifdef DEFINE_EVENT_WRITABLE
+#undef BPF_TEST_RUN_DEFINE_EVENT
+#define BPF_TEST_RUN_DEFINE_EVENT(template, call, proto, args, size) \
+ DEFINE_EVENT_WRITABLE(template, call, PARAMS(proto), \
+ PARAMS(args), size)
+#else
+#undef BPF_TEST_RUN_DEFINE_EVENT
+#define BPF_TEST_RUN_DEFINE_EVENT(template, call, proto, args, size) \
+ DEFINE_EVENT(template, call, PARAMS(proto), PARAMS(args))
+#endif
+
+BPF_TEST_RUN_DEFINE_EVENT(bpf_test_finish, bpf_test_finish,
+
+ TP_PROTO(int *err),
+
+ TP_ARGS(err),
+
+ sizeof(int)
+);
+
+#endif
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/trace/events/fib.h b/include/trace/events/fib.h
index 6271bab63bfb..6f2a4dc35e37 100644
--- a/include/trace/events/fib.h
+++ b/include/trace/events/fib.h
@@ -13,9 +13,9 @@
TRACE_EVENT(fib_table_lookup,
TP_PROTO(u32 tb_id, const struct flowi4 *flp,
- const struct fib_nh *nh, int err),
+ const struct fib_nh_common *nhc, int err),
- TP_ARGS(tb_id, flp, nh, err),
+ TP_ARGS(tb_id, flp, nhc, err),
TP_STRUCT__entry(
__field( u32, tb_id )
@@ -28,14 +28,17 @@ TRACE_EVENT(fib_table_lookup,
__field( __u8, flags )
__array( __u8, src, 4 )
__array( __u8, dst, 4 )
- __array( __u8, gw, 4 )
- __array( __u8, saddr, 4 )
+ __array( __u8, gw4, 4 )
+ __array( __u8, gw6, 16 )
__field( u16, sport )
__field( u16, dport )
__dynamic_array(char, name, IFNAMSIZ )
),
TP_fast_assign(
+ struct in6_addr in6_zero = {};
+ struct net_device *dev;
+ struct in6_addr *in6;
__be32 *p32;
__entry->tb_id = tb_id;
@@ -62,30 +65,37 @@ TRACE_EVENT(fib_table_lookup,
__entry->dport = 0;
}
- if (nh) {
- p32 = (__be32 *) __entry->saddr;
- *p32 = nh->nh_saddr;
+ dev = nhc ? nhc->nhc_dev : NULL;
+ __assign_str(name, dev ? dev->name : "-");
- p32 = (__be32 *) __entry->gw;
- *p32 = nh->nh_gw;
+ if (nhc) {
+ if (nhc->nhc_gw_family == AF_INET) {
+ p32 = (__be32 *) __entry->gw4;
+ *p32 = nhc->nhc_gw.ipv4;
- __assign_str(name, nh->nh_dev ? nh->nh_dev->name : "-");
- } else {
- p32 = (__be32 *) __entry->saddr;
- *p32 = 0;
+ in6 = (struct in6_addr *)__entry->gw6;
+ *in6 = in6_zero;
+ } else if (nhc->nhc_gw_family == AF_INET6) {
+ p32 = (__be32 *) __entry->gw4;
+ *p32 = 0;
- p32 = (__be32 *) __entry->gw;
+ in6 = (struct in6_addr *)__entry->gw6;
+ *in6 = nhc->nhc_gw.ipv6;
+ }
+ } else {
+ p32 = (__be32 *) __entry->gw4;
*p32 = 0;
- __assign_str(name, "-");
+ in6 = (struct in6_addr *)__entry->gw6;
+ *in6 = in6_zero;
}
),
- TP_printk("table %u oif %d iif %d proto %u %pI4/%u -> %pI4/%u tos %d scope %d flags %x ==> dev %s gw %pI4 src %pI4 err %d",
+ TP_printk("table %u oif %d iif %d proto %u %pI4/%u -> %pI4/%u tos %d scope %d flags %x ==> dev %s gw %pI4/%pI6c err %d",
__entry->tb_id, __entry->oif, __entry->iif, __entry->proto,
__entry->src, __entry->sport, __entry->dst, __entry->dport,
__entry->tos, __entry->scope, __entry->flags,
- __get_str(name), __entry->gw, __entry->saddr, __entry->err)
+ __get_str(name), __entry->gw4, __entry->gw6, __entry->err)
);
#endif /* _TRACE_FIB_H */
diff --git a/include/trace/events/fib6.h b/include/trace/events/fib6.h
index b088b54d699c..c6abdcc77c12 100644
--- a/include/trace/events/fib6.h
+++ b/include/trace/events/fib6.h
@@ -12,10 +12,10 @@
TRACE_EVENT(fib6_table_lookup,
- TP_PROTO(const struct net *net, const struct fib6_info *f6i,
+ TP_PROTO(const struct net *net, const struct fib6_result *res,
struct fib6_table *table, const struct flowi6 *flp),
- TP_ARGS(net, f6i, table, flp),
+ TP_ARGS(net, res, table, flp),
TP_STRUCT__entry(
__field( u32, tb_id )
@@ -39,7 +39,7 @@ TRACE_EVENT(fib6_table_lookup,
struct in6_addr *in6;
__entry->tb_id = table->tb6_id;
- __entry->err = ip6_rt_type_to_error(f6i->fib6_type);
+ __entry->err = ip6_rt_type_to_error(res->fib6_type);
__entry->oif = flp->flowi6_oif;
__entry->iif = flp->flowi6_iif;
__entry->tos = ip6_tclass(flp->flowlabel);
@@ -62,20 +62,20 @@ TRACE_EVENT(fib6_table_lookup,
__entry->dport = 0;
}
- if (f6i->fib6_nh.nh_dev) {
- __assign_str(name, f6i->fib6_nh.nh_dev);
+ if (res->nh && res->nh->fib_nh_dev) {
+ __assign_str(name, res->nh->fib_nh_dev);
} else {
__assign_str(name, "-");
}
- if (f6i == net->ipv6.fib6_null_entry) {
+ if (res->f6i == net->ipv6.fib6_null_entry) {
struct in6_addr in6_zero = {};
in6 = (struct in6_addr *)__entry->gw;
*in6 = in6_zero;
- } else if (f6i) {
+ } else if (res->nh) {
in6 = (struct in6_addr *)__entry->gw;
- *in6 = f6i->fib6_nh.nh_gw;
+ *in6 = res->nh->fib_nh_gw6;
}
),
diff --git a/include/trace/events/mlxsw.h b/include/trace/events/mlxsw.h
index 6a4cfaef33a2..19a25ed323a5 100644
--- a/include/trace/events/mlxsw.h
+++ b/include/trace/events/mlxsw.h
@@ -93,7 +93,7 @@ TRACE_EVENT(mlxsw_sp_acl_tcam_vregion_migrate_end,
__entry->mlxsw_sp, __entry->vregion)
);
-TRACE_EVENT(mlxsw_sp_acl_tcam_vregion_rehash_dis,
+TRACE_EVENT(mlxsw_sp_acl_tcam_vregion_rehash_rollback_failed,
TP_PROTO(const struct mlxsw_sp *mlxsw_sp,
const struct mlxsw_sp_acl_tcam_vregion *vregion),
diff --git a/include/trace/events/nbd.h b/include/trace/events/nbd.h
new file mode 100644
index 000000000000..9849956f34d8
--- /dev/null
+++ b/include/trace/events/nbd.h
@@ -0,0 +1,107 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM nbd
+
+#if !defined(_TRACE_NBD_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_NBD_H
+
+#include <linux/tracepoint.h>
+
+DECLARE_EVENT_CLASS(nbd_transport_event,
+
+ TP_PROTO(struct request *req, u64 handle),
+
+ TP_ARGS(req, handle),
+
+ TP_STRUCT__entry(
+ __field(struct request *, req)
+ __field(u64, handle)
+ ),
+
+ TP_fast_assign(
+ __entry->req = req;
+ __entry->handle = handle;
+ ),
+
+ TP_printk(
+ "nbd transport event: request %p, handle 0x%016llx",
+ __entry->req,
+ __entry->handle
+ )
+);
+
+DEFINE_EVENT(nbd_transport_event, nbd_header_sent,
+
+ TP_PROTO(struct request *req, u64 handle),
+
+ TP_ARGS(req, handle)
+);
+
+DEFINE_EVENT(nbd_transport_event, nbd_payload_sent,
+
+ TP_PROTO(struct request *req, u64 handle),
+
+ TP_ARGS(req, handle)
+);
+
+DEFINE_EVENT(nbd_transport_event, nbd_header_received,
+
+ TP_PROTO(struct request *req, u64 handle),
+
+ TP_ARGS(req, handle)
+);
+
+DEFINE_EVENT(nbd_transport_event, nbd_payload_received,
+
+ TP_PROTO(struct request *req, u64 handle),
+
+ TP_ARGS(req, handle)
+);
+
+DECLARE_EVENT_CLASS(nbd_send_request,
+
+ TP_PROTO(struct nbd_request *nbd_request, int index,
+ struct request *rq),
+
+ TP_ARGS(nbd_request, index, rq),
+
+ TP_STRUCT__entry(
+ __field(struct nbd_request *, nbd_request)
+ __field(u64, dev_index)
+ __field(struct request *, request)
+ ),
+
+ TP_fast_assign(
+ __entry->nbd_request = 0;
+ __entry->dev_index = index;
+ __entry->request = rq;
+ ),
+
+ TP_printk("nbd%lld: request %p", __entry->dev_index, __entry->request)
+);
+
+#ifdef DEFINE_EVENT_WRITABLE
+#undef NBD_DEFINE_EVENT
+#define NBD_DEFINE_EVENT(template, call, proto, args, size) \
+ DEFINE_EVENT_WRITABLE(template, call, PARAMS(proto), \
+ PARAMS(args), size)
+#else
+#undef NBD_DEFINE_EVENT
+#define NBD_DEFINE_EVENT(template, call, proto, args, size) \
+ DEFINE_EVENT(template, call, PARAMS(proto), PARAMS(args))
+#endif
+
+NBD_DEFINE_EVENT(nbd_send_request, nbd_send_request,
+
+ TP_PROTO(struct nbd_request *nbd_request, int index,
+ struct request *rq),
+
+ TP_ARGS(nbd_request, index, rq),
+
+ sizeof(struct nbd_request)
+);
+
+#endif
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/trace/events/net.h b/include/trace/events/net.h
index 1efd7d9b25fe..2399073c3afc 100644
--- a/include/trace/events/net.h
+++ b/include/trace/events/net.h
@@ -95,6 +95,29 @@ TRACE_EVENT(net_dev_xmit,
__get_str(name), __entry->skbaddr, __entry->len, __entry->rc)
);
+TRACE_EVENT(net_dev_xmit_timeout,
+
+ TP_PROTO(struct net_device *dev,
+ int queue_index),
+
+ TP_ARGS(dev, queue_index),
+
+ TP_STRUCT__entry(
+ __string( name, dev->name )
+ __string( driver, netdev_drivername(dev))
+ __field( int, queue_index )
+ ),
+
+ TP_fast_assign(
+ __assign_str(name, dev->name);
+ __assign_str(driver, netdev_drivername(dev));
+ __entry->queue_index = queue_index;
+ ),
+
+ TP_printk("dev=%s driver=%s queue=%d",
+ __get_str(name), __get_str(driver), __entry->queue_index)
+);
+
DECLARE_EVENT_CLASS(net_dev_template,
TP_PROTO(struct sk_buff *skb),
diff --git a/include/uapi/asm-generic/sockios.h b/include/uapi/asm-generic/sockios.h
index 64f658c7cec2..44fa3ed70483 100644
--- a/include/uapi/asm-generic/sockios.h
+++ b/include/uapi/asm-generic/sockios.h
@@ -8,7 +8,7 @@
#define FIOGETOWN 0x8903
#define SIOCGPGRP 0x8904
#define SIOCATMARK 0x8905
-#define SIOCGSTAMP 0x8906 /* Get stamp (timeval) */
-#define SIOCGSTAMPNS 0x8907 /* Get stamp (timespec) */
+#define SIOCGSTAMP_OLD 0x8906 /* Get stamp (timeval) */
+#define SIOCGSTAMPNS_OLD 0x8907 /* Get stamp (timespec) */
#endif /* __ASM_GENERIC_SOCKIOS_H */
diff --git a/include/uapi/linux/batadv_packet.h b/include/uapi/linux/batadv_packet.h
index c99336f4eefe..4ebc2135e950 100644
--- a/include/uapi/linux/batadv_packet.h
+++ b/include/uapi/linux/batadv_packet.h
@@ -2,18 +2,6 @@
/* Copyright (C) 2007-2019 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _UAPI_LINUX_BATADV_PACKET_H_
diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h
index 305bf316dd03..67f4636758af 100644
--- a/include/uapi/linux/batman_adv.h
+++ b/include/uapi/linux/batman_adv.h
@@ -2,24 +2,6 @@
/* Copyright (C) 2016-2019 B.A.T.M.A.N. contributors:
*
* Matthias Schiffer
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
*/
#ifndef _UAPI_LINUX_BATMAN_ADV_H_
@@ -491,6 +473,13 @@ enum batadv_nl_attrs {
*/
BATADV_ATTR_THROUGHPUT_OVERRIDE,
+ /**
+ * @BATADV_ATTR_MULTICAST_FANOUT: defines the maximum number of packet
+ * copies that may be generated for a multicast-to-unicast conversion.
+ * Once this limit is exceeded distribution will fall back to broadcast.
+ */
+ BATADV_ATTR_MULTICAST_FANOUT,
+
/* add attributes above here, update the policy in netlink.c */
/**
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 929c8e537a14..72336bac7573 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -105,6 +105,7 @@ enum bpf_cmd {
BPF_BTF_GET_FD_BY_ID,
BPF_TASK_FD_QUERY,
BPF_MAP_LOOKUP_AND_DELETE_ELEM,
+ BPF_MAP_FREEZE,
};
enum bpf_map_type {
@@ -132,6 +133,7 @@ enum bpf_map_type {
BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE,
BPF_MAP_TYPE_QUEUE,
BPF_MAP_TYPE_STACK,
+ BPF_MAP_TYPE_SK_STORAGE,
};
/* Note that tracing related programs such as
@@ -166,6 +168,8 @@ enum bpf_prog_type {
BPF_PROG_TYPE_LIRC_MODE2,
BPF_PROG_TYPE_SK_REUSEPORT,
BPF_PROG_TYPE_FLOW_DISSECTOR,
+ BPF_PROG_TYPE_CGROUP_SYSCTL,
+ BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE,
};
enum bpf_attach_type {
@@ -187,6 +191,7 @@ enum bpf_attach_type {
BPF_CGROUP_UDP6_SENDMSG,
BPF_LIRC_MODE2,
BPF_FLOW_DISSECTOR,
+ BPF_CGROUP_SYSCTL,
__MAX_BPF_ATTACH_TYPE
};
@@ -255,8 +260,19 @@ enum bpf_attach_type {
*/
#define BPF_F_ANY_ALIGNMENT (1U << 1)
-/* when bpf_ldimm64->src_reg == BPF_PSEUDO_MAP_FD, bpf_ldimm64->imm == fd */
+/* When BPF ldimm64's insn[0].src_reg != 0 then this can have
+ * two extensions:
+ *
+ * insn[0].src_reg: BPF_PSEUDO_MAP_FD BPF_PSEUDO_MAP_VALUE
+ * insn[0].imm: map fd map fd
+ * insn[1].imm: 0 offset into value
+ * insn[0].off: 0 0
+ * insn[1].off: 0 0
+ * ldimm64 rewrite: address of map address of map[0]+offset
+ * verifier type: CONST_PTR_TO_MAP PTR_TO_MAP_VALUE
+ */
#define BPF_PSEUDO_MAP_FD 1
+#define BPF_PSEUDO_MAP_VALUE 2
/* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative
* offset to another bpf function
@@ -283,7 +299,7 @@ enum bpf_attach_type {
#define BPF_OBJ_NAME_LEN 16U
-/* Flags for accessing BPF object */
+/* Flags for accessing BPF object from syscall side. */
#define BPF_F_RDONLY (1U << 3)
#define BPF_F_WRONLY (1U << 4)
@@ -293,6 +309,10 @@ enum bpf_attach_type {
/* Zero-initialize hash function seed. This should only be used for testing. */
#define BPF_F_ZERO_SEED (1U << 6)
+/* Flags for accessing BPF object from program side. */
+#define BPF_F_RDONLY_PROG (1U << 7)
+#define BPF_F_WRONLY_PROG (1U << 8)
+
/* flags for BPF_PROG_QUERY */
#define BPF_F_QUERY_EFFECTIVE (1U << 0)
@@ -396,6 +416,13 @@ union bpf_attr {
__aligned_u64 data_out;
__u32 repeat;
__u32 duration;
+ __u32 ctx_size_in; /* input: len of ctx_in */
+ __u32 ctx_size_out; /* input/output: len of ctx_out
+ * returns ENOSPC if ctx_out
+ * is too small.
+ */
+ __aligned_u64 ctx_in;
+ __aligned_u64 ctx_out;
} test;
struct { /* anonymous struct used by BPF_*_GET_*_ID */
@@ -1478,13 +1505,31 @@ union bpf_attr {
* Grow or shrink the room for data in the packet associated to
* *skb* by *len_diff*, and according to the selected *mode*.
*
- * There is a single supported mode at this time:
+ * There are two supported modes at this time:
+ *
+ * * **BPF_ADJ_ROOM_MAC**: Adjust room at the mac layer
+ * (room space is added or removed below the layer 2 header).
*
* * **BPF_ADJ_ROOM_NET**: Adjust room at the network layer
* (room space is added or removed below the layer 3 header).
*
- * All values for *flags* are reserved for future usage, and must
- * be left at zero.
+ * The following flags are supported at this time:
+ *
+ * * **BPF_F_ADJ_ROOM_FIXED_GSO**: Do not adjust gso_size.
+ * Adjusting mss in this way is not allowed for datagrams.
+ *
+ * * **BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 **:
+ * * **BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 **:
+ * Any new space is reserved to hold a tunnel header.
+ * Configure skb offsets and other fields accordingly.
+ *
+ * * **BPF_F_ADJ_ROOM_ENCAP_L4_GRE **:
+ * * **BPF_F_ADJ_ROOM_ENCAP_L4_UDP **:
+ * Use with ENCAP_L3 flags to further specify the tunnel type.
+ *
+ * * **BPF_F_ADJ_ROOM_ENCAP_L2(len) **:
+ * Use with ENCAP_L3/L4 flags to further specify the tunnel
+ * type; **len** is the length of the inner MAC header.
*
* A call to this helper is susceptible to change the underlaying
* packet buffer. Therefore, at load time, all checks on pointers
@@ -1694,12 +1739,19 @@ union bpf_attr {
* error if an eBPF program tries to set a callback that is not
* supported in the current kernel.
*
- * The supported callback values that *argval* can combine are:
+ * *argval* is a flag array which can combine these flags:
*
* * **BPF_SOCK_OPS_RTO_CB_FLAG** (retransmission time out)
* * **BPF_SOCK_OPS_RETRANS_CB_FLAG** (retransmission)
* * **BPF_SOCK_OPS_STATE_CB_FLAG** (TCP state change)
*
+ * Therefore, this function can be used to clear a callback flag by
+ * setting the appropriate bit to zero. e.g. to disable the RTO
+ * callback:
+ *
+ * **bpf_sock_ops_cb_flags_set(bpf_sock,**
+ * **bpf_sock->bpf_sock_ops_cb_flags & ~BPF_SOCK_OPS_RTO_CB_FLAG)**
+ *
* Here are some examples of where one could call such eBPF
* program:
*
@@ -2431,6 +2483,190 @@ union bpf_attr {
* Return
* A **struct bpf_sock** pointer on success, or **NULL** in
* case of failure.
+ *
+ * struct bpf_sock *bpf_skc_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags)
+ * Description
+ * Look for TCP socket matching *tuple*, optionally in a child
+ * network namespace *netns*. The return value must be checked,
+ * and if non-**NULL**, released via **bpf_sk_release**\ ().
+ *
+ * This function is identical to bpf_sk_lookup_tcp, except that it
+ * also returns timewait or request sockets. Use bpf_sk_fullsock
+ * or bpf_tcp_socket to access the full structure.
+ *
+ * This helper is available only if the kernel was compiled with
+ * **CONFIG_NET** configuration option.
+ * Return
+ * Pointer to **struct bpf_sock**, or **NULL** in case of failure.
+ * For sockets with reuseport option, the **struct bpf_sock**
+ * result is from **reuse->socks**\ [] using the hash of the tuple.
+ *
+ * int bpf_tcp_check_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len)
+ * Description
+ * Check whether iph and th contain a valid SYN cookie ACK for
+ * the listening socket in sk.
+ *
+ * iph points to the start of the IPv4 or IPv6 header, while
+ * iph_len contains sizeof(struct iphdr) or sizeof(struct ip6hdr).
+ *
+ * th points to the start of the TCP header, while th_len contains
+ * sizeof(struct tcphdr).
+ *
+ * Return
+ * 0 if iph and th are a valid SYN cookie ACK, or a negative error
+ * otherwise.
+ *
+ * int bpf_sysctl_get_name(struct bpf_sysctl *ctx, char *buf, size_t buf_len, u64 flags)
+ * Description
+ * Get name of sysctl in /proc/sys/ and copy it into provided by
+ * program buffer *buf* of size *buf_len*.
+ *
+ * The buffer is always NUL terminated, unless it's zero-sized.
+ *
+ * If *flags* is zero, full name (e.g. "net/ipv4/tcp_mem") is
+ * copied. Use **BPF_F_SYSCTL_BASE_NAME** flag to copy base name
+ * only (e.g. "tcp_mem").
+ * Return
+ * Number of character copied (not including the trailing NUL).
+ *
+ * **-E2BIG** if the buffer wasn't big enough (*buf* will contain
+ * truncated name in this case).
+ *
+ * int bpf_sysctl_get_current_value(struct bpf_sysctl *ctx, char *buf, size_t buf_len)
+ * Description
+ * Get current value of sysctl as it is presented in /proc/sys
+ * (incl. newline, etc), and copy it as a string into provided
+ * by program buffer *buf* of size *buf_len*.
+ *
+ * The whole value is copied, no matter what file position user
+ * space issued e.g. sys_read at.
+ *
+ * The buffer is always NUL terminated, unless it's zero-sized.
+ * Return
+ * Number of character copied (not including the trailing NUL).
+ *
+ * **-E2BIG** if the buffer wasn't big enough (*buf* will contain
+ * truncated name in this case).
+ *
+ * **-EINVAL** if current value was unavailable, e.g. because
+ * sysctl is uninitialized and read returns -EIO for it.
+ *
+ * int bpf_sysctl_get_new_value(struct bpf_sysctl *ctx, char *buf, size_t buf_len)
+ * Description
+ * Get new value being written by user space to sysctl (before
+ * the actual write happens) and copy it as a string into
+ * provided by program buffer *buf* of size *buf_len*.
+ *
+ * User space may write new value at file position > 0.
+ *
+ * The buffer is always NUL terminated, unless it's zero-sized.
+ * Return
+ * Number of character copied (not including the trailing NUL).
+ *
+ * **-E2BIG** if the buffer wasn't big enough (*buf* will contain
+ * truncated name in this case).
+ *
+ * **-EINVAL** if sysctl is being read.
+ *
+ * int bpf_sysctl_set_new_value(struct bpf_sysctl *ctx, const char *buf, size_t buf_len)
+ * Description
+ * Override new value being written by user space to sysctl with
+ * value provided by program in buffer *buf* of size *buf_len*.
+ *
+ * *buf* should contain a string in same form as provided by user
+ * space on sysctl write.
+ *
+ * User space may write new value at file position > 0. To override
+ * the whole sysctl value file position should be set to zero.
+ * Return
+ * 0 on success.
+ *
+ * **-E2BIG** if the *buf_len* is too big.
+ *
+ * **-EINVAL** if sysctl is being read.
+ *
+ * int bpf_strtol(const char *buf, size_t buf_len, u64 flags, long *res)
+ * Description
+ * Convert the initial part of the string from buffer *buf* of
+ * size *buf_len* to a long integer according to the given base
+ * and save the result in *res*.
+ *
+ * The string may begin with an arbitrary amount of white space
+ * (as determined by isspace(3)) followed by a single optional '-'
+ * sign.
+ *
+ * Five least significant bits of *flags* encode base, other bits
+ * are currently unused.
+ *
+ * Base must be either 8, 10, 16 or 0 to detect it automatically
+ * similar to user space strtol(3).
+ * Return
+ * Number of characters consumed on success. Must be positive but
+ * no more than buf_len.
+ *
+ * **-EINVAL** if no valid digits were found or unsupported base
+ * was provided.
+ *
+ * **-ERANGE** if resulting value was out of range.
+ *
+ * int bpf_strtoul(const char *buf, size_t buf_len, u64 flags, unsigned long *res)
+ * Description
+ * Convert the initial part of the string from buffer *buf* of
+ * size *buf_len* to an unsigned long integer according to the
+ * given base and save the result in *res*.
+ *
+ * The string may begin with an arbitrary amount of white space
+ * (as determined by isspace(3)).
+ *
+ * Five least significant bits of *flags* encode base, other bits
+ * are currently unused.
+ *
+ * Base must be either 8, 10, 16 or 0 to detect it automatically
+ * similar to user space strtoul(3).
+ * Return
+ * Number of characters consumed on success. Must be positive but
+ * no more than buf_len.
+ *
+ * **-EINVAL** if no valid digits were found or unsupported base
+ * was provided.
+ *
+ * **-ERANGE** if resulting value was out of range.
+ *
+ * void *bpf_sk_storage_get(struct bpf_map *map, struct bpf_sock *sk, void *value, u64 flags)
+ * Description
+ * Get a bpf-local-storage from a sk.
+ *
+ * Logically, it could be thought of getting the value from
+ * a *map* with *sk* as the **key**. From this
+ * perspective, the usage is not much different from
+ * **bpf_map_lookup_elem(map, &sk)** except this
+ * helper enforces the key must be a **bpf_fullsock()**
+ * and the map must be a BPF_MAP_TYPE_SK_STORAGE also.
+ *
+ * Underneath, the value is stored locally at *sk* instead of
+ * the map. The *map* is used as the bpf-local-storage **type**.
+ * The bpf-local-storage **type** (i.e. the *map*) is searched
+ * against all bpf-local-storages residing at sk.
+ *
+ * An optional *flags* (BPF_SK_STORAGE_GET_F_CREATE) can be
+ * used such that a new bpf-local-storage will be
+ * created if one does not exist. *value* can be used
+ * together with BPF_SK_STORAGE_GET_F_CREATE to specify
+ * the initial value of a bpf-local-storage. If *value* is
+ * NULL, the new bpf-local-storage will be zero initialized.
+ * Return
+ * A bpf-local-storage pointer is returned on success.
+ *
+ * **NULL** if not found or there was an error in adding
+ * a new bpf-local-storage.
+ *
+ * int bpf_sk_storage_delete(struct bpf_map *map, struct bpf_sock *sk)
+ * Description
+ * Delete a bpf-local-storage from a sk.
+ * Return
+ * 0 on success.
+ *
+ * **-ENOENT** if the bpf-local-storage cannot be found.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -2531,7 +2767,17 @@ union bpf_attr {
FN(sk_fullsock), \
FN(tcp_sock), \
FN(skb_ecn_set_ce), \
- FN(get_listener_sock),
+ FN(get_listener_sock), \
+ FN(skc_lookup_tcp), \
+ FN(tcp_check_syncookie), \
+ FN(sysctl_get_name), \
+ FN(sysctl_get_current_value), \
+ FN(sysctl_get_new_value), \
+ FN(sysctl_set_new_value), \
+ FN(strtol), \
+ FN(strtoul), \
+ FN(sk_storage_get), \
+ FN(sk_storage_delete),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
@@ -2590,9 +2836,30 @@ enum bpf_func_id {
/* Current network namespace */
#define BPF_F_CURRENT_NETNS (-1L)
+/* BPF_FUNC_skb_adjust_room flags. */
+#define BPF_F_ADJ_ROOM_FIXED_GSO (1ULL << 0)
+
+#define BPF_ADJ_ROOM_ENCAP_L2_MASK 0xff
+#define BPF_ADJ_ROOM_ENCAP_L2_SHIFT 56
+
+#define BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 (1ULL << 1)
+#define BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 (1ULL << 2)
+#define BPF_F_ADJ_ROOM_ENCAP_L4_GRE (1ULL << 3)
+#define BPF_F_ADJ_ROOM_ENCAP_L4_UDP (1ULL << 4)
+#define BPF_F_ADJ_ROOM_ENCAP_L2(len) (((__u64)len & \
+ BPF_ADJ_ROOM_ENCAP_L2_MASK) \
+ << BPF_ADJ_ROOM_ENCAP_L2_SHIFT)
+
+/* BPF_FUNC_sysctl_get_name flags. */
+#define BPF_F_SYSCTL_BASE_NAME (1ULL << 0)
+
+/* BPF_FUNC_sk_storage_get flags */
+#define BPF_SK_STORAGE_GET_F_CREATE (1ULL << 0)
+
/* Mode for BPF_FUNC_skb_adjust_room helper. */
enum bpf_adj_room_mode {
BPF_ADJ_ROOM_NET,
+ BPF_ADJ_ROOM_MAC,
};
/* Mode for BPF_FUNC_skb_load_bytes_relative helper. */
@@ -3218,4 +3485,14 @@ struct bpf_line_info {
struct bpf_spin_lock {
__u32 val;
};
+
+struct bpf_sysctl {
+ __u32 write; /* Sysctl is being read (= 0) or written (= 1).
+ * Allows 1,2,4-byte read, but no write.
+ */
+ __u32 file_pos; /* Sysctl file position to read from, write to.
+ * Allows 1,2,4-byte read an 4-byte write.
+ */
+};
+
#endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/include/uapi/linux/btf.h b/include/uapi/linux/btf.h
index 7b7475ef2f17..9310652ca4f9 100644
--- a/include/uapi/linux/btf.h
+++ b/include/uapi/linux/btf.h
@@ -39,11 +39,11 @@ struct btf_type {
* struct, union and fwd
*/
__u32 info;
- /* "size" is used by INT, ENUM, STRUCT and UNION.
+ /* "size" is used by INT, ENUM, STRUCT, UNION and DATASEC.
* "size" tells the size of the type it is describing.
*
* "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT,
- * FUNC and FUNC_PROTO.
+ * FUNC, FUNC_PROTO and VAR.
* "type" is a type_id referring to another type.
*/
union {
@@ -70,8 +70,10 @@ struct btf_type {
#define BTF_KIND_RESTRICT 11 /* Restrict */
#define BTF_KIND_FUNC 12 /* Function */
#define BTF_KIND_FUNC_PROTO 13 /* Function Proto */
-#define BTF_KIND_MAX 13
-#define NR_BTF_KINDS 14
+#define BTF_KIND_VAR 14 /* Variable */
+#define BTF_KIND_DATASEC 15 /* Section */
+#define BTF_KIND_MAX BTF_KIND_DATASEC
+#define NR_BTF_KINDS (BTF_KIND_MAX + 1)
/* For some specific BTF_KIND, "struct btf_type" is immediately
* followed by extra data.
@@ -138,4 +140,26 @@ struct btf_param {
__u32 type;
};
+enum {
+ BTF_VAR_STATIC = 0,
+ BTF_VAR_GLOBAL_ALLOCATED,
+};
+
+/* BTF_KIND_VAR is followed by a single "struct btf_var" to describe
+ * additional information related to the variable such as its linkage.
+ */
+struct btf_var {
+ __u32 linkage;
+};
+
+/* BTF_KIND_DATASEC is followed by multiple "struct btf_var_secinfo"
+ * to describe all BTF_KIND_VAR types it contains along with it's
+ * in-section offset as well as size.
+ */
+struct btf_var_secinfo {
+ __u32 type;
+ __u32 offset;
+ __u32 size;
+};
+
#endif /* _UAPI__LINUX_BTF_H__ */
diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index d473e5ed044c..3534ce157ae9 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -252,9 +252,17 @@ struct ethtool_tunable {
#define DOWNSHIFT_DEV_DEFAULT_COUNT 0xff
#define DOWNSHIFT_DEV_DISABLE 0
+/* Time in msecs after which link is reported as down
+ * 0 = lowest time supported by the PHY
+ * 0xff = off, link down detection according to standard
+ */
+#define ETHTOOL_PHY_FAST_LINK_DOWN_ON 0
+#define ETHTOOL_PHY_FAST_LINK_DOWN_OFF 0xff
+
enum phy_tunable_id {
ETHTOOL_PHY_ID_UNSPEC,
ETHTOOL_PHY_DOWNSHIFT,
+ ETHTOOL_PHY_FAST_LINK_DOWN,
/*
* Add your fresh new phy tunable attribute above and remember to update
* phy_tunable_strings[] in net/core/ethtool.c
@@ -1704,6 +1712,9 @@ static inline int ethtool_validate_duplex(__u8 duplex)
#define ETH_MODULE_SFF_8436 0x4
#define ETH_MODULE_SFF_8436_LEN 256
+#define ETH_MODULE_SFF_8636_MAX_LEN 640
+#define ETH_MODULE_SFF_8436_MAX_LEN 640
+
/* Reset flags */
/* The reset() operation must clear the flags for the components which
* were actually reset. On successful return, the flags indicate the
diff --git a/include/uapi/linux/fou.h b/include/uapi/linux/fou.h
index f2ea833a2812..87c2c9f08803 100644
--- a/include/uapi/linux/fou.h
+++ b/include/uapi/linux/fou.h
@@ -16,6 +16,12 @@ enum {
FOU_ATTR_IPPROTO, /* u8 */
FOU_ATTR_TYPE, /* u8 */
FOU_ATTR_REMCSUM_NOPARTIAL, /* flag */
+ FOU_ATTR_LOCAL_V4, /* u32 */
+ FOU_ATTR_LOCAL_V6, /* in6_addr */
+ FOU_ATTR_PEER_V4, /* u32 */
+ FOU_ATTR_PEER_V6, /* in6_addr */
+ FOU_ATTR_PEER_PORT, /* u16 */
+ FOU_ATTR_IFINDEX, /* s32 */
__FOU_ATTR_MAX,
};
diff --git a/include/uapi/linux/icmpv6.h b/include/uapi/linux/icmpv6.h
index 325395f56bfa..2622b5a3e616 100644
--- a/include/uapi/linux/icmpv6.h
+++ b/include/uapi/linux/icmpv6.h
@@ -90,6 +90,8 @@ struct icmp6hdr {
#define ICMPV6_TIME_EXCEED 3
#define ICMPV6_PARAMPROB 4
+#define ICMPV6_ERRMSG_MAX 127
+
#define ICMPV6_INFOMSG_MASK 0x80
#define ICMPV6_ECHO_REQUEST 128
@@ -110,6 +112,8 @@ struct icmp6hdr {
#define ICMPV6_MRDISC_ADV 151
+#define ICMPV6_MSG_MAX 255
+
/*
* Codes for Destination Unreachable
*/
diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h
index 3a45b4ad71a3..3158ba672b72 100644
--- a/include/uapi/linux/if_ether.h
+++ b/include/uapi/linux/if_ether.h
@@ -109,6 +109,7 @@
#define ETH_P_QINQ2 0x9200 /* deprecated QinQ VLAN [ NOT AN OFFICIALLY REGISTERED ID ] */
#define ETH_P_QINQ3 0x9300 /* deprecated QinQ VLAN [ NOT AN OFFICIALLY REGISTERED ID ] */
#define ETH_P_EDSA 0xDADA /* Ethertype DSA [ NOT AN OFFICIALLY REGISTERED ID ] */
+#define ETH_P_DSA_8021Q 0xDADB /* Fake VLAN Header for DSA [ NOT AN OFFICIALLY REGISTERED ID ] */
#define ETH_P_IFE 0xED3E /* ForCES inter-FE LFB type */
#define ETH_P_AF_IUCV 0xFBFB /* IBM af_iucv [ NOT AN OFFICIALLY REGISTERED ID ] */
diff --git a/include/uapi/linux/if_tun.h b/include/uapi/linux/if_tun.h
index 23a6753b37df..454ae31b93c7 100644
--- a/include/uapi/linux/if_tun.h
+++ b/include/uapi/linux/if_tun.h
@@ -60,6 +60,7 @@
#define TUNSETSTEERINGEBPF _IOR('T', 224, int)
#define TUNSETFILTEREBPF _IOR('T', 225, int)
#define TUNSETCARRIER _IOW('T', 226, int)
+#define TUNGETDEVNETNS _IO('T', 227)
/* TUNSETIFF ifr flags */
#define IFF_TUN 0x0001
diff --git a/include/uapi/linux/if_vlan.h b/include/uapi/linux/if_vlan.h
index 7a0e8bd65b6b..90a2c89afc8f 100644
--- a/include/uapi/linux/if_vlan.h
+++ b/include/uapi/linux/if_vlan.h
@@ -32,10 +32,11 @@ enum vlan_ioctl_cmds {
};
enum vlan_flags {
- VLAN_FLAG_REORDER_HDR = 0x1,
- VLAN_FLAG_GVRP = 0x2,
- VLAN_FLAG_LOOSE_BINDING = 0x4,
- VLAN_FLAG_MVRP = 0x8,
+ VLAN_FLAG_REORDER_HDR = 0x1,
+ VLAN_FLAG_GVRP = 0x2,
+ VLAN_FLAG_LOOSE_BINDING = 0x4,
+ VLAN_FLAG_MVRP = 0x8,
+ VLAN_FLAG_BRIDGE_BINDING = 0x10,
};
enum vlan_name_types {
diff --git a/include/uapi/linux/ip_vs.h b/include/uapi/linux/ip_vs.h
index 1c916b2f89dc..e34f436fc79d 100644
--- a/include/uapi/linux/ip_vs.h
+++ b/include/uapi/linux/ip_vs.h
@@ -124,6 +124,13 @@
#define IP_VS_PEDATA_MAXLEN 255
+/* Tunnel types */
+enum {
+ IP_VS_CONN_F_TUNNEL_TYPE_IPIP = 0, /* IPIP */
+ IP_VS_CONN_F_TUNNEL_TYPE_GUE, /* GUE */
+ IP_VS_CONN_F_TUNNEL_TYPE_MAX,
+};
+
/*
* The struct ip_vs_service_user and struct ip_vs_dest_user are
* used to set IPVS rules through setsockopt.
@@ -392,6 +399,10 @@ enum {
IPVS_DEST_ATTR_STATS64, /* nested attribute for dest stats */
+ IPVS_DEST_ATTR_TUN_TYPE, /* tunnel type */
+
+ IPVS_DEST_ATTR_TUN_PORT, /* tunnel port */
+
__IPVS_DEST_ATTR_MAX,
};
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index a66c8de006cc..f0cf7b0f4f35 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -967,6 +967,7 @@ enum nft_socket_keys {
* @NFT_CT_SRC_IP6: conntrack layer 3 protocol source (IPv6 address)
* @NFT_CT_DST_IP6: conntrack layer 3 protocol destination (IPv6 address)
* @NFT_CT_TIMEOUT: connection tracking timeout policy assigned to conntrack
+ * @NFT_CT_ID: conntrack id
*/
enum nft_ct_keys {
NFT_CT_STATE,
@@ -993,6 +994,7 @@ enum nft_ct_keys {
NFT_CT_SRC_IP6,
NFT_CT_DST_IP6,
NFT_CT_TIMEOUT,
+ NFT_CT_ID,
__NFT_CT_MAX
};
#define NFT_CT_MAX (__NFT_CT_MAX - 1)
@@ -1522,15 +1524,21 @@ enum nft_flowtable_hook_attributes {
*
* @NFTA_OSF_DREG: destination register (NLA_U32: nft_registers)
* @NFTA_OSF_TTL: Value of the TTL osf option (NLA_U8)
+ * @NFTA_OSF_FLAGS: flags (NLA_U32)
*/
enum nft_osf_attributes {
NFTA_OSF_UNSPEC,
NFTA_OSF_DREG,
NFTA_OSF_TTL,
+ NFTA_OSF_FLAGS,
__NFTA_OSF_MAX,
};
#define NFTA_OSF_MAX (__NFTA_OSF_MAX - 1)
+enum nft_osf_flags {
+ NFT_OSF_F_VERSION = (1 << 0),
+};
+
/**
* enum nft_device_attributes - nf_tables device netlink attributes
*
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index dd4f86ee286e..6f09d1500960 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -11,7 +11,7 @@
* Copyright 2008 Jouni Malinen <jouni.malinen@atheros.com>
* Copyright 2008 Colin McCabe <colin@cozybit.com>
* Copyright 2015-2017 Intel Deutschland GmbH
- * Copyright (C) 2018 Intel Corporation
+ * Copyright (C) 2018-2019 Intel Corporation
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
@@ -1065,6 +1065,26 @@
* indicated by %NL80211_ATTR_WIPHY_FREQ and other attributes
* determining the width and type.
*
+ * @NL80211_CMD_UPDATE_OWE_INFO: This interface allows the host driver to
+ * offload OWE processing to user space. This intends to support
+ * OWE AKM by the host drivers that implement SME but rely
+ * on the user space for the cryptographic/DH IE processing in AP mode.
+ *
+ * @NL80211_CMD_PROBE_MESH_LINK: The requirement for mesh link metric
+ * refreshing, is that from one mesh point we be able to send some data
+ * frames to other mesh points which are not currently selected as a
+ * primary traffic path, but which are only 1 hop away. The absence of
+ * the primary path to the chosen node makes it necessary to apply some
+ * form of marking on a chosen packet stream so that the packets can be
+ * properly steered to the selected node for testing, and not by the
+ * regular mesh path lookup. Further, the packets must be of type data
+ * so that the rate control (often embedded in firmware) is used for
+ * rate selection.
+ *
+ * Here attribute %NL80211_ATTR_MAC is used to specify connected mesh
+ * peer MAC address and %NL80211_ATTR_FRAME is used to specify the frame
+ * content. The frame is ethernet data.
+ *
* @NL80211_CMD_MAX: highest used command number
* @__NL80211_CMD_AFTER_LAST: internal use
*/
@@ -1285,6 +1305,10 @@ enum nl80211_commands {
NL80211_CMD_NOTIFY_RADAR,
+ NL80211_CMD_UPDATE_OWE_INFO,
+
+ NL80211_CMD_PROBE_MESH_LINK,
+
/* add new commands above here */
/* used to define NL80211_CMD_MAX below */
@@ -2308,6 +2332,15 @@ enum nl80211_commands {
* @NL80211_ATTR_AIRTIME_WEIGHT: Station's weight when scheduled by the airtime
* scheduler.
*
+ * @NL80211_ATTR_STA_TX_POWER_SETTING: Transmit power setting type (u8) for
+ * station associated with the AP. See &enum nl80211_tx_power_setting for
+ * possible values.
+ * @NL80211_ATTR_STA_TX_POWER: Transmit power level (s16) in dBm units. This
+ * allows to set Tx power for a station. If this attribute is not included,
+ * the default per-interface tx power setting will be overriding. Driver
+ * should be picking up the lowest tx power, either tx power per-interface
+ * or per-station.
+ *
* @NUM_NL80211_ATTR: total number of nl80211_attrs available
* @NL80211_ATTR_MAX: highest attribute number currently defined
* @__NL80211_ATTR_AFTER_LAST: internal use
@@ -2758,6 +2791,8 @@ enum nl80211_attrs {
NL80211_ATTR_PEER_MEASUREMENTS,
NL80211_ATTR_AIRTIME_WEIGHT,
+ NL80211_ATTR_STA_TX_POWER_SETTING,
+ NL80211_ATTR_STA_TX_POWER,
/* add attributes here, update the policy in nl80211.c */
@@ -2802,7 +2837,7 @@ enum nl80211_attrs {
#define NL80211_MAX_SUPP_RATES 32
#define NL80211_MAX_SUPP_HT_RATES 77
-#define NL80211_MAX_SUPP_REG_RULES 64
+#define NL80211_MAX_SUPP_REG_RULES 128
#define NL80211_TKIP_DATA_OFFSET_ENCR_KEY 0
#define NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY 16
#define NL80211_TKIP_DATA_OFFSET_RX_MIC_KEY 24
@@ -3139,6 +3174,7 @@ enum nl80211_sta_bss_param {
* @NL80211_STA_INFO_TX_DURATION: aggregate PPDU duration for all frames
* sent to the station (u64, usec)
* @NL80211_STA_INFO_AIRTIME_WEIGHT: current airtime weight for station (u16)
+ * @NL80211_STA_INFO_AIRTIME_LINK_METRIC: airtime link metric for mesh station
* @__NL80211_STA_INFO_AFTER_LAST: internal
* @NL80211_STA_INFO_MAX: highest possible station info attribute
*/
@@ -3184,6 +3220,7 @@ enum nl80211_sta_info {
NL80211_STA_INFO_CONNECTED_TO_GATE,
NL80211_STA_INFO_TX_DURATION,
NL80211_STA_INFO_AIRTIME_WEIGHT,
+ NL80211_STA_INFO_AIRTIME_LINK_METRIC,
/* keep last */
__NL80211_STA_INFO_AFTER_LAST,
@@ -3638,6 +3675,14 @@ enum nl80211_reg_rule_attr {
* value as specified by &struct nl80211_bss_select_rssi_adjust.
* @NL80211_SCHED_SCAN_MATCH_ATTR_BSSID: BSSID to be used for matching
* (this cannot be used together with SSID).
+ * @NL80211_SCHED_SCAN_MATCH_PER_BAND_RSSI: Nested attribute that carries the
+ * band specific minimum rssi thresholds for the bands defined in
+ * enum nl80211_band. The minimum rssi threshold value(s32) specific to a
+ * band shall be encapsulated in attribute with type value equals to one
+ * of the NL80211_BAND_* defined in enum nl80211_band. For example, the
+ * minimum rssi threshold value for 2.4GHZ band shall be encapsulated
+ * within an attribute of type NL80211_BAND_2GHZ. And one or more of such
+ * attributes will be nested within this attribute.
* @NL80211_SCHED_SCAN_MATCH_ATTR_MAX: highest scheduled scan filter
* attribute number currently defined
* @__NL80211_SCHED_SCAN_MATCH_ATTR_AFTER_LAST: internal use
@@ -3650,6 +3695,7 @@ enum nl80211_sched_scan_match_attr {
NL80211_SCHED_SCAN_MATCH_ATTR_RELATIVE_RSSI,
NL80211_SCHED_SCAN_MATCH_ATTR_RSSI_ADJUST,
NL80211_SCHED_SCAN_MATCH_ATTR_BSSID,
+ NL80211_SCHED_SCAN_MATCH_PER_BAND_RSSI,
/* keep last */
__NL80211_SCHED_SCAN_MATCH_ATTR_AFTER_LAST,
@@ -4135,6 +4181,27 @@ enum nl80211_channel_type {
};
/**
+ * enum nl80211_key_mode - Key mode
+ *
+ * @NL80211_KEY_RX_TX: (Default)
+ * Key can be used for Rx and Tx immediately
+ *
+ * The following modes can only be selected for unicast keys and when the
+ * driver supports @NL80211_EXT_FEATURE_EXT_KEY_ID:
+ *
+ * @NL80211_KEY_NO_TX: Only allowed in combination with @NL80211_CMD_NEW_KEY:
+ * Unicast key can only be used for Rx, Tx not allowed, yet
+ * @NL80211_KEY_SET_TX: Only allowed in combination with @NL80211_CMD_SET_KEY:
+ * The unicast key identified by idx and mac is cleared for Tx and becomes
+ * the preferred Tx key for the station.
+ */
+enum nl80211_key_mode {
+ NL80211_KEY_RX_TX,
+ NL80211_KEY_NO_TX,
+ NL80211_KEY_SET_TX
+};
+
+/**
* enum nl80211_chan_width - channel width definitions
*
* These values are used with the %NL80211_ATTR_CHANNEL_WIDTH
@@ -4377,6 +4444,9 @@ enum nl80211_key_default_types {
* @NL80211_KEY_DEFAULT_TYPES: A nested attribute containing flags
* attributes, specifying what a key should be set as default as.
* See &enum nl80211_key_default_types.
+ * @NL80211_KEY_MODE: the mode from enum nl80211_key_mode.
+ * Defaults to @NL80211_KEY_RX_TX.
+ *
* @__NL80211_KEY_AFTER_LAST: internal
* @NL80211_KEY_MAX: highest key attribute
*/
@@ -4390,6 +4460,7 @@ enum nl80211_key_attributes {
NL80211_KEY_DEFAULT_MGMT,
NL80211_KEY_TYPE,
NL80211_KEY_DEFAULT_TYPES,
+ NL80211_KEY_MODE,
/* keep last */
__NL80211_KEY_AFTER_LAST,
@@ -5335,6 +5406,8 @@ enum nl80211_feature_flags {
* able to rekey an in-use key correctly. Userspace must not rekey PTK keys
* if this flag is not set. Ignoring this can leak clear text packets and/or
* freeze the connection.
+ * @NL80211_EXT_FEATURE_EXT_KEY_ID: Driver supports "Extended Key ID for
+ * Individually Addressed Frames" from IEEE802.11-2016.
*
* @NL80211_EXT_FEATURE_AIRTIME_FAIRNESS: Driver supports getting airtime
* fairness for transmitted packets and has enabled airtime fairness
@@ -5343,6 +5416,12 @@ enum nl80211_feature_flags {
* @NL80211_EXT_FEATURE_AP_PMKSA_CACHING: Driver/device supports PMKSA caching
* (set/del PMKSA operations) in AP mode.
*
+ * @NL80211_EXT_FEATURE_SCHED_SCAN_BAND_SPECIFIC_RSSI_THOLD: Driver supports
+ * filtering of sched scan results using band specific RSSI thresholds.
+ *
+ * @NL80211_EXT_FEATURE_STA_TX_PWR: This driver supports controlling tx power
+ * to a station.
+ *
* @NUM_NL80211_EXT_FEATURES: number of extended features.
* @MAX_NL80211_EXT_FEATURES: highest extended feature index.
*/
@@ -5384,6 +5463,9 @@ enum nl80211_ext_feature_index {
NL80211_EXT_FEATURE_ENABLE_FTM_RESPONDER,
NL80211_EXT_FEATURE_AIRTIME_FAIRNESS,
NL80211_EXT_FEATURE_AP_PMKSA_CACHING,
+ NL80211_EXT_FEATURE_SCHED_SCAN_BAND_SPECIFIC_RSSI_THOLD,
+ NL80211_EXT_FEATURE_EXT_KEY_ID,
+ NL80211_EXT_FEATURE_STA_TX_PWR,
/* add new features before the definition below */
NUM_NL80211_EXT_FEATURES,
diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
index dbe0cbe4f1b7..f271f1ec50ae 100644
--- a/include/uapi/linux/openvswitch.h
+++ b/include/uapi/linux/openvswitch.h
@@ -364,6 +364,7 @@ enum ovs_tunnel_key_attr {
OVS_TUNNEL_KEY_ATTR_IPV6_DST, /* struct in6_addr dst IPv6 address. */
OVS_TUNNEL_KEY_ATTR_PAD,
OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS, /* struct erspan_metadata */
+ OVS_TUNNEL_KEY_ATTR_IPV4_INFO_BRIDGE, /* No argument. IPV4_INFO_BRIDGE mode.*/
__OVS_TUNNEL_KEY_ATTR_MAX
};
@@ -734,6 +735,7 @@ struct ovs_action_hash {
* be received on NFNLGRP_CONNTRACK_NEW and NFNLGRP_CONNTRACK_DESTROY groups,
* respectively. Remaining bits control the changes for which an event is
* delivered on the NFNLGRP_CONNTRACK_UPDATE group.
+ * @OVS_CT_ATTR_TIMEOUT: Variable length string defining conntrack timeout.
*/
enum ovs_ct_attr {
OVS_CT_ATTR_UNSPEC,
@@ -746,6 +748,8 @@ enum ovs_ct_attr {
OVS_CT_ATTR_NAT, /* Nested OVS_NAT_ATTR_* */
OVS_CT_ATTR_FORCE_COMMIT, /* No argument */
OVS_CT_ATTR_EVENTMASK, /* u32 mask of IPCT_* events. */
+ OVS_CT_ATTR_TIMEOUT, /* Associate timeout with this connection for
+ * fine-grain timeout tuning. */
__OVS_CT_ATTR_MAX
};
@@ -798,6 +802,44 @@ struct ovs_action_push_eth {
struct ovs_key_ethernet addresses;
};
+/*
+ * enum ovs_check_pkt_len_attr - Attributes for %OVS_ACTION_ATTR_CHECK_PKT_LEN.
+ *
+ * @OVS_CHECK_PKT_LEN_ATTR_PKT_LEN: u16 Packet length to check for.
+ * @OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER: Nested OVS_ACTION_ATTR_*
+ * actions to apply if the packer length is greater than the specified
+ * length in the attr - OVS_CHECK_PKT_LEN_ATTR_PKT_LEN.
+ * @OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL - Nested OVS_ACTION_ATTR_*
+ * actions to apply if the packer length is lesser or equal to the specified
+ * length in the attr - OVS_CHECK_PKT_LEN_ATTR_PKT_LEN.
+ */
+enum ovs_check_pkt_len_attr {
+ OVS_CHECK_PKT_LEN_ATTR_UNSPEC,
+ OVS_CHECK_PKT_LEN_ATTR_PKT_LEN,
+ OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER,
+ OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL,
+ __OVS_CHECK_PKT_LEN_ATTR_MAX,
+
+#ifdef __KERNEL__
+ OVS_CHECK_PKT_LEN_ATTR_ARG /* struct check_pkt_len_arg */
+#endif
+};
+
+#define OVS_CHECK_PKT_LEN_ATTR_MAX (__OVS_CHECK_PKT_LEN_ATTR_MAX - 1)
+
+#ifdef __KERNEL__
+struct check_pkt_len_arg {
+ u16 pkt_len; /* Same value as OVS_CHECK_PKT_LEN_ATTR_PKT_LEN'. */
+ bool exec_for_greater; /* When true, actions in IF_GREATER will
+ * not change flow keys. False otherwise.
+ */
+ bool exec_for_lesser_equal; /* When true, actions in IF_LESS_EQUAL
+ * will not change flow keys. False
+ * otherwise.
+ */
+};
+#endif
+
/**
* enum ovs_action_attr - Action types.
*
@@ -842,6 +884,9 @@ struct ovs_action_push_eth {
* packet, or modify the packet (e.g., change the DSCP field).
* @OVS_ACTION_ATTR_CLONE: make a copy of the packet and execute a list of
* actions without affecting the original packet and key.
+ * @OVS_ACTION_ATTR_CHECK_PKT_LEN: Check the packet length and execute a set
+ * of actions if greater than the specified packet length, else execute
+ * another set of actions.
*
* Only a single header can be set with a single %OVS_ACTION_ATTR_SET. Not all
* fields within a header are modifiable, e.g. the IPv4 protocol and fragment
@@ -876,6 +921,7 @@ enum ovs_action_attr {
OVS_ACTION_ATTR_POP_NSH, /* No argument. */
OVS_ACTION_ATTR_METER, /* u32 meter ID. */
OVS_ACTION_ATTR_CLONE, /* Nested OVS_CLONE_ATTR_*. */
+ OVS_ACTION_ATTR_CHECK_PKT_LEN, /* Nested OVS_CHECK_PKT_LEN_ATTR_*. */
__OVS_ACTION_ATTR_MAX, /* Nothing past this will be accepted
* from userspace. */
diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
index 7ee74c3474bf..8b2f993cbb77 100644
--- a/include/uapi/linux/pkt_sched.h
+++ b/include/uapi/linux/pkt_sched.h
@@ -1148,6 +1148,16 @@ enum {
#define TCA_TAPRIO_SCHED_MAX (__TCA_TAPRIO_SCHED_MAX - 1)
+/* The format for the admin sched (dump only):
+ * [TCA_TAPRIO_SCHED_ADMIN_SCHED]
+ * [TCA_TAPRIO_ATTR_SCHED_BASE_TIME]
+ * [TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST]
+ * [TCA_TAPRIO_ATTR_SCHED_ENTRY]
+ * [TCA_TAPRIO_ATTR_SCHED_ENTRY_CMD]
+ * [TCA_TAPRIO_ATTR_SCHED_ENTRY_GATES]
+ * [TCA_TAPRIO_ATTR_SCHED_ENTRY_INTERVAL]
+ */
+
enum {
TCA_TAPRIO_ATTR_UNSPEC,
TCA_TAPRIO_ATTR_PRIOMAP, /* struct tc_mqprio_qopt */
@@ -1156,6 +1166,9 @@ enum {
TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY, /* single entry */
TCA_TAPRIO_ATTR_SCHED_CLOCKID, /* s32 */
TCA_TAPRIO_PAD,
+ TCA_TAPRIO_ATTR_ADMIN_SCHED, /* The admin sched, only used in dump */
+ TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME, /* s64 */
+ TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION, /* s64 */
__TCA_TAPRIO_ATTR_MAX,
};
diff --git a/include/uapi/linux/sockios.h b/include/uapi/linux/sockios.h
index d393e9ed3964..7d1bccbbef78 100644
--- a/include/uapi/linux/sockios.h
+++ b/include/uapi/linux/sockios.h
@@ -19,6 +19,7 @@
#ifndef _LINUX_SOCKIOS_H
#define _LINUX_SOCKIOS_H
+#include <asm/bitsperlong.h>
#include <asm/sockios.h>
/* Linux-specific socket ioctls */
@@ -27,6 +28,26 @@
#define SOCK_IOC_TYPE 0x89
+/*
+ * the timeval/timespec data structure layout is defined by libc,
+ * so we need to cover both possible versions on 32-bit.
+ */
+/* Get stamp (timeval) */
+#define SIOCGSTAMP_NEW _IOR(SOCK_IOC_TYPE, 0x06, long long[2])
+/* Get stamp (timespec) */
+#define SIOCGSTAMPNS_NEW _IOR(SOCK_IOC_TYPE, 0x07, long long[2])
+
+#if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__))
+/* on 64-bit and x32, avoid the ?: operator */
+#define SIOCGSTAMP SIOCGSTAMP_OLD
+#define SIOCGSTAMPNS SIOCGSTAMPNS_OLD
+#else
+#define SIOCGSTAMP ((sizeof(struct timeval)) == 8 ? \
+ SIOCGSTAMP_OLD : SIOCGSTAMP_NEW)
+#define SIOCGSTAMPNS ((sizeof(struct timespec)) == 8 ? \
+ SIOCGSTAMPNS_OLD : SIOCGSTAMPNS_NEW)
+#endif
+
/* Routing table calls. */
#define SIOCADDRT 0x890B /* add routing table entry */
#define SIOCDELRT 0x890C /* delete routing table entry */
diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
index 8bb6cc5f3235..b521464ea962 100644
--- a/include/uapi/linux/tcp.h
+++ b/include/uapi/linux/tcp.h
@@ -160,15 +160,42 @@ enum {
#define TCPI_OPT_ECN_SEEN 16 /* we received at least one packet with ECT */
#define TCPI_OPT_SYN_DATA 32 /* SYN-ACK acked data in SYN sent or rcvd */
+/*
+ * Sender's congestion state indicating normal or abnormal situations
+ * in the last round of packets sent. The state is driven by the ACK
+ * information and timer events.
+ */
enum tcp_ca_state {
+ /*
+ * Nothing bad has been observed recently.
+ * No apparent reordering, packet loss, or ECN marks.
+ */
TCP_CA_Open = 0,
#define TCPF_CA_Open (1<<TCP_CA_Open)
+ /*
+ * The sender enters disordered state when it has received DUPACKs or
+ * SACKs in the last round of packets sent. This could be due to packet
+ * loss or reordering but needs further information to confirm packets
+ * have been lost.
+ */
TCP_CA_Disorder = 1,
#define TCPF_CA_Disorder (1<<TCP_CA_Disorder)
+ /*
+ * The sender enters Congestion Window Reduction (CWR) state when it
+ * has received ACKs with ECN-ECE marks, or has experienced congestion
+ * or packet discard on the sender host (e.g. qdisc).
+ */
TCP_CA_CWR = 2,
#define TCPF_CA_CWR (1<<TCP_CA_CWR)
+ /*
+ * The sender is in fast recovery and retransmitting lost packets,
+ * typically triggered by ACK events.
+ */
TCP_CA_Recovery = 3,
#define TCPF_CA_Recovery (1<<TCP_CA_Recovery)
+ /*
+ * The sender is in loss recovery triggered by retransmission timeout.
+ */
TCP_CA_Loss = 4
#define TCPF_CA_Loss (1<<TCP_CA_Loss)
};
diff --git a/include/uapi/linux/tipc.h b/include/uapi/linux/tipc.h
index 6b2fd4d9655f..7df026ea6aff 100644
--- a/include/uapi/linux/tipc.h
+++ b/include/uapi/linux/tipc.h
@@ -190,6 +190,7 @@ struct sockaddr_tipc {
#define TIPC_MCAST_REPLICAST 134 /* Default: TIPC selects. No arg */
#define TIPC_GROUP_JOIN 135 /* Takes struct tipc_group_req* */
#define TIPC_GROUP_LEAVE 136 /* No argument */
+#define TIPC_SOCK_RECVQ_USED 137 /* Default: none (read only) */
/*
* Flag values
diff --git a/include/uapi/linux/tipc_netlink.h b/include/uapi/linux/tipc_netlink.h
index 0ebe02ef1a86..efb958fd167d 100644
--- a/include/uapi/linux/tipc_netlink.h
+++ b/include/uapi/linux/tipc_netlink.h
@@ -281,6 +281,8 @@ enum {
TIPC_NLA_PROP_TOL, /* u32 */
TIPC_NLA_PROP_WIN, /* u32 */
TIPC_NLA_PROP_MTU, /* u32 */
+ TIPC_NLA_PROP_BROADCAST, /* u32 */
+ TIPC_NLA_PROP_BROADCAST_RATIO, /* u32 */
__TIPC_NLA_PROP_MAX,
TIPC_NLA_PROP_MAX = __TIPC_NLA_PROP_MAX - 1
diff --git a/include/uapi/linux/tls.h b/include/uapi/linux/tls.h
index 401d6f01de6a..5b9c26753e46 100644
--- a/include/uapi/linux/tls.h
+++ b/include/uapi/linux/tls.h
@@ -70,6 +70,13 @@
#define TLS_CIPHER_AES_GCM_256_TAG_SIZE 16
#define TLS_CIPHER_AES_GCM_256_REC_SEQ_SIZE 8
+#define TLS_CIPHER_AES_CCM_128 53
+#define TLS_CIPHER_AES_CCM_128_IV_SIZE 8
+#define TLS_CIPHER_AES_CCM_128_KEY_SIZE 16
+#define TLS_CIPHER_AES_CCM_128_SALT_SIZE 4
+#define TLS_CIPHER_AES_CCM_128_TAG_SIZE 16
+#define TLS_CIPHER_AES_CCM_128_REC_SEQ_SIZE 8
+
#define TLS_SET_RECORD_TYPE 1
#define TLS_GET_RECORD_TYPE 2
@@ -94,4 +101,12 @@ struct tls12_crypto_info_aes_gcm_256 {
unsigned char rec_seq[TLS_CIPHER_AES_GCM_256_REC_SEQ_SIZE];
};
+struct tls12_crypto_info_aes_ccm_128 {
+ struct tls_crypto_info info;
+ unsigned char iv[TLS_CIPHER_AES_CCM_128_IV_SIZE];
+ unsigned char key[TLS_CIPHER_AES_CCM_128_KEY_SIZE];
+ unsigned char salt[TLS_CIPHER_AES_CCM_128_SALT_SIZE];
+ unsigned char rec_seq[TLS_CIPHER_AES_CCM_128_REC_SEQ_SIZE];
+};
+
#endif /* _UAPI_LINUX_TLS_H */