From 2d8ae8c417db284f598dffb178cc01e7db0f1821 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 2 May 2023 15:36:02 +0200 Subject: nfsd: use vfs setgid helper We've aligned setgid behavior over multiple kernel releases. The details can be found in commit cf619f891971 ("Merge tag 'fs.ovl.setgid.v6.2' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/idmapping") and commit 426b4ca2d6a5 ("Merge tag 'fs.setgid.v6.0' of git://git.kernel.org/pub/scm/linux/kernel/git/brauner/linux"). Consistent setgid stripping behavior is now encapsulated in the setattr_should_drop_sgid() helper which is used by all filesystems that strip setgid bits outside of vfs proper. Usually ATTR_KILL_SGID is raised in e.g., chown_common() and is subject to the setattr_should_drop_sgid() check to determine whether the setgid bit can be retained. Since nfsd is raising ATTR_KILL_SGID unconditionally it will cause notify_change() to strip it even if the caller had the necessary privileges to retain it. Ensure that nfsd only raises ATR_KILL_SGID if the caller lacks the necessary privileges to retain the setgid bit. Without this patch the setgid stripping tests in LTP will fail: > As you can see, the problem is S_ISGID (0002000) was dropped on a > non-group-executable file while chown was invoked by super-user, while [...] > fchown02.c:66: TFAIL: testfile2: wrong mode permissions 0100700, expected 0102700 [...] > chown02.c:57: TFAIL: testfile2: wrong mode permissions 0100700, expected 0102700 With this patch all tests pass. Reported-by: Sherry Yang Signed-off-by: Christian Brauner Reviewed-by: Jeff Layton Cc: Signed-off-by: Chuck Lever --- fs/nfsd/vfs.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index db67f8e19344..0016bcc04a59 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -388,7 +388,9 @@ nfsd_sanitize_attrs(struct inode *inode, struct iattr *iap) iap->ia_mode &= ~S_ISGID; } else { /* set ATTR_KILL_* bits and let VFS handle it */ - iap->ia_valid |= (ATTR_KILL_SUID | ATTR_KILL_SGID); + iap->ia_valid |= ATTR_KILL_SUID; + iap->ia_valid |= + setattr_should_drop_sgid(&nop_mnt_idmap, inode); } } } -- cgit v1.2.3 From fc80fc2d4e39137869da3150ee169b40bf879287 Mon Sep 17 00:00:00 2001 From: Ding Hui Date: Mon, 15 May 2023 10:13:07 +0800 Subject: SUNRPC: Fix UAF in svc_tcp_listen_data_ready() After the listener svc_sock is freed, and before invoking svc_tcp_accept() for the established child sock, there is a window that the newsock retaining a freed listener svc_sock in sk_user_data which cloning from parent. In the race window, if data is received on the newsock, we will observe use-after-free report in svc_tcp_listen_data_ready(). Reproduce by two tasks: 1. while :; do rpc.nfsd 0 ; rpc.nfsd; done 2. while :; do echo "" | ncat -4 127.0.0.1 2049 ; done KASAN report: ================================================================== BUG: KASAN: slab-use-after-free in svc_tcp_listen_data_ready+0x1cf/0x1f0 [sunrpc] Read of size 8 at addr ffff888139d96228 by task nc/102553 CPU: 7 PID: 102553 Comm: nc Not tainted 6.3.0+ #18 Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 11/12/2020 Call Trace: dump_stack_lvl+0x33/0x50 print_address_description.constprop.0+0x27/0x310 print_report+0x3e/0x70 kasan_report+0xae/0xe0 svc_tcp_listen_data_ready+0x1cf/0x1f0 [sunrpc] tcp_data_queue+0x9f4/0x20e0 tcp_rcv_established+0x666/0x1f60 tcp_v4_do_rcv+0x51c/0x850 tcp_v4_rcv+0x23fc/0x2e80 ip_protocol_deliver_rcu+0x62/0x300 ip_local_deliver_finish+0x267/0x350 ip_local_deliver+0x18b/0x2d0 ip_rcv+0x2fb/0x370 __netif_receive_skb_one_core+0x166/0x1b0 process_backlog+0x24c/0x5e0 __napi_poll+0xa2/0x500 net_rx_action+0x854/0xc90 __do_softirq+0x1bb/0x5de do_softirq+0xcb/0x100 ... Allocated by task 102371: kasan_save_stack+0x1e/0x40 kasan_set_track+0x21/0x30 __kasan_kmalloc+0x7b/0x90 svc_setup_socket+0x52/0x4f0 [sunrpc] svc_addsock+0x20d/0x400 [sunrpc] __write_ports_addfd+0x209/0x390 [nfsd] write_ports+0x239/0x2c0 [nfsd] nfsctl_transaction_write+0xac/0x110 [nfsd] vfs_write+0x1c3/0xae0 ksys_write+0xed/0x1c0 do_syscall_64+0x38/0x90 entry_SYSCALL_64_after_hwframe+0x72/0xdc Freed by task 102551: kasan_save_stack+0x1e/0x40 kasan_set_track+0x21/0x30 kasan_save_free_info+0x2a/0x50 __kasan_slab_free+0x106/0x190 __kmem_cache_free+0x133/0x270 svc_xprt_free+0x1e2/0x350 [sunrpc] svc_xprt_destroy_all+0x25a/0x440 [sunrpc] nfsd_put+0x125/0x240 [nfsd] nfsd_svc+0x2cb/0x3c0 [nfsd] write_threads+0x1ac/0x2a0 [nfsd] nfsctl_transaction_write+0xac/0x110 [nfsd] vfs_write+0x1c3/0xae0 ksys_write+0xed/0x1c0 do_syscall_64+0x38/0x90 entry_SYSCALL_64_after_hwframe+0x72/0xdc Fix the UAF by simply doing nothing in svc_tcp_listen_data_ready() if state != TCP_LISTEN, that will avoid dereferencing svsk for all child socket. Link: https://lore.kernel.org/lkml/20230507091131.23540-1-dinghui@sangfor.com.cn/ Fixes: fa9251afc33c ("SUNRPC: Call the default socket callbacks instead of open coding") Signed-off-by: Ding Hui Cc: Signed-off-by: Chuck Lever --- net/sunrpc/svcsock.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index f77cebe2c071..15f4d0d40bdd 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -826,12 +826,6 @@ static void svc_tcp_listen_data_ready(struct sock *sk) trace_sk_data_ready(sk); - if (svsk) { - /* Refer to svc_setup_socket() for details. */ - rmb(); - svsk->sk_odata(sk); - } - /* * This callback may called twice when a new connection * is established as a child socket inherits everything @@ -840,13 +834,18 @@ static void svc_tcp_listen_data_ready(struct sock *sk) * when one of child sockets become ESTABLISHED. * 2) data_ready method of the child socket may be called * when it receives data before the socket is accepted. - * In case of 2, we should ignore it silently. + * In case of 2, we should ignore it silently and DO NOT + * dereference svsk. */ - if (sk->sk_state == TCP_LISTEN) { - if (svsk) { - set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags); - svc_xprt_enqueue(&svsk->sk_xprt); - } + if (sk->sk_state != TCP_LISTEN) + return; + + if (svsk) { + /* Refer to svc_setup_socket() for details. */ + rmb(); + svsk->sk_odata(sk); + set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags); + svc_xprt_enqueue(&svsk->sk_xprt); } } -- cgit v1.2.3 From e8277327d74fb28bf46969ad68a225272f04c318 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 15 May 2023 09:32:41 -0400 Subject: SUNRPC: Fix an incorrect comment The correct function name is svc_tcp_listen_data_ready(). Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- net/sunrpc/svcsock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 15f4d0d40bdd..a2d0bf429347 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1463,7 +1463,7 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, svsk->sk_owspace = inet->sk_write_space; /* * This barrier is necessary in order to prevent race condition - * with svc_data_ready(), svc_listen_data_ready() and others + * with svc_data_ready(), svc_tcp_listen_data_ready(), and others * when calling callbacks above. */ wmb(); -- cgit v1.2.3 From cce4ee9c7834f68cf6a221d61d614c2748611c1c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 15 May 2023 09:32:47 -0400 Subject: SUNRPC: Remove dprintk() in svc_handle_xprt() When enabled, this dprintk() fires for every incoming RPC, which is an enormous amount of log traffic. These days, after the first few hundred log messages, the system journald is just going to mute it, along with all other NFSD debug output. Let's rely on trace points for this high-traffic information instead. Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- net/sunrpc/svc_xprt.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 13a14897bc17..42536a0b1db4 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -843,9 +843,6 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt) svc_xprt_received(xprt); } else if (svc_xprt_reserve_slot(rqstp, xprt)) { /* XPT_DATA|XPT_DEFERRED case: */ - dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n", - rqstp, rqstp->rq_pool->sp_id, xprt, - kref_read(&xprt->xpt_ref)); rqstp->rq_deferred = svc_deferred_dequeue(xprt); if (rqstp->rq_deferred) len = svc_deferred_recv(rqstp); -- cgit v1.2.3 From d7900daea0b9818cd1cbeb9c5bd94653c487b0e4 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 15 May 2023 09:32:53 -0400 Subject: SUNRPC: Improve observability in svc_tcp_accept() The -ENOMEM arm could fire repeatedly if the system runs low on memory, so remove it. Don't bother to trace -EAGAIN error events, since those fire after a listener is created (with no work done) and once again after an accept has been handled successfully (again, with no work done). Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- net/sunrpc/svcsock.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index a2d0bf429347..fbe33c922882 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -886,13 +886,8 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt) clear_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags); err = kernel_accept(sock, &newsock, O_NONBLOCK); if (err < 0) { - if (err == -ENOMEM) - printk(KERN_WARNING "%s: no more sockets!\n", - serv->sv_name); - else if (err != -EAGAIN) - net_warn_ratelimited("%s: accept failed (err %d)!\n", - serv->sv_name, -err); - trace_svcsock_accept_err(xprt, serv->sv_name, err); + if (err != -EAGAIN) + trace_svcsock_accept_err(xprt, serv->sv_name, err); return NULL; } if (IS_ERR(sock_alloc_file(newsock, O_NONBLOCK, NULL))) -- cgit v1.2.3 From c42bebca967da88c054ccb5ab152c9822c054662 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 15 May 2023 09:33:00 -0400 Subject: SUNRPC: Trace struct svc_sock lifetime events Capture a timestamp and pointer address during the creation and destruction of struct svc_sock to record its lifetime. This helps to diagnose transport reference counting issues. Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- include/trace/events/sunrpc.h | 39 +++++++++++++++++++++++++++------------ net/sunrpc/svcsock.c | 4 +++- 2 files changed, 30 insertions(+), 13 deletions(-) diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 31bc7025cb44..69e42ef30979 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -2104,31 +2104,46 @@ DEFINE_SVC_DEFERRED_EVENT(drop); DEFINE_SVC_DEFERRED_EVENT(queue); DEFINE_SVC_DEFERRED_EVENT(recv); -TRACE_EVENT(svcsock_new_socket, +DECLARE_EVENT_CLASS(svcsock_lifetime_class, TP_PROTO( + const void *svsk, const struct socket *socket ), - - TP_ARGS(socket), - + TP_ARGS(svsk, socket), TP_STRUCT__entry( + __field(unsigned int, netns_ino) + __field(const void *, svsk) + __field(const void *, sk) __field(unsigned long, type) __field(unsigned long, family) - __field(bool, listener) + __field(unsigned long, state) ), - TP_fast_assign( + struct sock *sk = socket->sk; + + __entry->netns_ino = sock_net(sk)->ns.inum; + __entry->svsk = svsk; + __entry->sk = sk; __entry->type = socket->type; - __entry->family = socket->sk->sk_family; - __entry->listener = (socket->sk->sk_state == TCP_LISTEN); + __entry->family = sk->sk_family; + __entry->state = sk->sk_state; ), - - TP_printk("type=%s family=%s%s", - show_socket_type(__entry->type), + TP_printk("svsk=%p type=%s family=%s%s", + __entry->svsk, show_socket_type(__entry->type), rpc_show_address_family(__entry->family), - __entry->listener ? " (listener)" : "" + __entry->state == TCP_LISTEN ? " (listener)" : "" ) ); +#define DEFINE_SVCSOCK_LIFETIME_EVENT(name) \ + DEFINE_EVENT(svcsock_lifetime_class, name, \ + TP_PROTO( \ + const void *svsk, \ + const struct socket *socket \ + ), \ + TP_ARGS(svsk, socket)) + +DEFINE_SVCSOCK_LIFETIME_EVENT(svcsock_new); +DEFINE_SVCSOCK_LIFETIME_EVENT(svcsock_free); TRACE_EVENT(svcsock_marker, TP_PROTO( diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index fbe33c922882..5f519fc0541b 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1470,7 +1470,7 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, else svc_tcp_init(svsk, serv); - trace_svcsock_new_socket(sock); + trace_svcsock_new(svsk, sock); return svsk; } @@ -1651,6 +1651,8 @@ static void svc_sock_free(struct svc_xprt *xprt) struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); struct socket *sock = svsk->sk_sock; + trace_svcsock_free(svsk, sock); + tls_handshake_cancel(sock->sk); if (sock->file) sockfd_put(sock); -- cgit v1.2.3 From 442a629009818de2f8d9731cafb96e111ca8e4e6 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 15 May 2023 09:35:38 -0400 Subject: NFSD: Clean up nfsctl white-space damage Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfsctl.c | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index b4fd7a7062d5..2ecea3b318eb 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -324,7 +324,7 @@ static ssize_t write_filehandle(struct file *file, char *buf, size_t size) len = qword_get(&mesg, dname, size); if (len <= 0) return -EINVAL; - + path = dname+len+1; len = qword_get(&mesg, path, size); if (len <= 0) @@ -338,7 +338,7 @@ static ssize_t write_filehandle(struct file *file, char *buf, size_t size) return -EINVAL; maxsize = min(maxsize, NFS3_FHSIZE); - if (qword_get(&mesg, mesg, size)>0) + if (qword_get(&mesg, mesg, size) > 0) return -EINVAL; /* we have all the words, they are in buf.. */ @@ -346,7 +346,7 @@ static ssize_t write_filehandle(struct file *file, char *buf, size_t size) if (!dom) return -ENOMEM; - len = exp_rootfh(netns(file), dom, path, &fh, maxsize); + len = exp_rootfh(netns(file), dom, path, &fh, maxsize); auth_domain_put(dom); if (len) return len; @@ -418,8 +418,8 @@ static ssize_t write_threads(struct file *file, char *buf, size_t size) * OR * * Input: - * buf: C string containing whitespace- - * separated unsigned integer values + * buf: C string containing whitespace- + * separated unsigned integer values * representing the number of NFSD * threads to start in each pool * size: non-zero length of C string in @buf @@ -526,7 +526,7 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size) char *sep; struct nfsd_net *nn = net_generic(netns(file), nfsd_net_id); - if (size>0) { + if (size > 0) { if (nn->nfsd_serv) /* Cannot change versions without updating * nn->nfsd_serv->sv_xdrsize, and reallocing @@ -637,11 +637,11 @@ out: * OR * * Input: - * buf: C string containing whitespace- - * separated positive or negative - * integer values representing NFS - * protocol versions to enable ("+n") - * or disable ("-n") + * buf: C string containing whitespace- + * separated positive or negative + * integer values representing NFS + * protocol versions to enable ("+n") + * or disable ("-n") * size: non-zero length of C string in @buf * Output: * On success: status of zero or more protocol versions has @@ -705,7 +705,7 @@ static ssize_t __write_ports_addfd(char *buf, struct net *net, const struct cred } /* - * A transport listener is added by writing it's transport name and + * A transport listener is added by writing its transport name and * a port number. */ static ssize_t __write_ports_addxprt(char *buf, struct net *net, const struct cred *cred) @@ -832,9 +832,9 @@ int nfsd_max_blksize; * OR * * Input: - * buf: C string containing an unsigned - * integer value representing the new - * NFS blksize + * buf: C string containing an unsigned + * integer value representing the new + * NFS blksize * size: non-zero length of C string in @buf * Output: * On success: passed-in buffer filled with '\n'-terminated C string @@ -881,9 +881,9 @@ static ssize_t write_maxblksize(struct file *file, char *buf, size_t size) * OR * * Input: - * buf: C string containing an unsigned - * integer value representing the new - * number of max connections + * buf: C string containing an unsigned + * integer value representing the new + * number of max connections * size: non-zero length of C string in @buf * Output: * On success: passed-in buffer filled with '\n'-terminated C string @@ -1065,7 +1065,7 @@ static ssize_t write_recoverydir(struct file *file, char *buf, size_t size) * OR * * Input: - * buf: any value + * buf: any value * size: non-zero length of C string in @buf * Output: * passed-in buffer filled with "Y" or "N" with a newline -- cgit v1.2.3 From 3434d7aa77d24c5c4b3d4385084cfdb607f05dec Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 15 May 2023 09:35:44 -0400 Subject: NFSD: Clean up nfsctl_transaction_write() For easier readability, follow the common convention: if (error) handle_error; continue_normally; No behavior change is expected. Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfsctl.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 2ecea3b318eb..df0aae211a49 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -109,12 +109,12 @@ static ssize_t nfsctl_transaction_write(struct file *file, const char __user *bu if (IS_ERR(data)) return PTR_ERR(data); - rv = write_op[ino](file, data, size); - if (rv >= 0) { - simple_transaction_set(file, rv); - rv = size; - } - return rv; + rv = write_op[ino](file, data, size); + if (rv < 0) + return rv; + + simple_transaction_set(file, rv); + return size; } static ssize_t nfsctl_transaction_read(struct file *file, char __user *buf, size_t size, loff_t *pos) -- cgit v1.2.3 From 39d432fc76301cf0a0c454022117601994ca9397 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 15 May 2023 09:35:50 -0400 Subject: NFSD: trace nfsctl operations Add trace log eye-catchers that record the arguments used to configure NFSD. This helps when troubleshooting the NFSD administrative interfaces. These tracepoints can capture NFSD start-up and shutdown times and parameters, changes in lease time and thread count, and a request to end the namespace's NFSv4 grace period, in addition to the set of NFS versions that are enabled. Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfsctl.c | 33 +++++-- fs/nfsd/trace.h | 259 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 284 insertions(+), 8 deletions(-) diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index df0aae211a49..1489e0b703b4 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -25,6 +25,7 @@ #include "netns.h" #include "pnfs.h" #include "filecache.h" +#include "trace.h" /* * We have a single directory with several nodes in it. @@ -230,6 +231,7 @@ static ssize_t write_unlock_ip(struct file *file, char *buf, size_t size) if (rpc_pton(net, fo_path, size, sap, salen) == 0) return -EINVAL; + trace_nfsd_ctl_unlock_ip(net, buf); return nlmsvc_unlock_all_by_ip(sap); } @@ -263,7 +265,7 @@ static ssize_t write_unlock_fs(struct file *file, char *buf, size_t size) fo_path = buf; if (qword_get(&buf, fo_path, size) < 0) return -EINVAL; - + trace_nfsd_ctl_unlock_fs(netns(file), fo_path); error = kern_path(fo_path, 0, &path); if (error) return error; @@ -341,6 +343,8 @@ static ssize_t write_filehandle(struct file *file, char *buf, size_t size) if (qword_get(&mesg, mesg, size) > 0) return -EINVAL; + trace_nfsd_ctl_filehandle(netns(file), dname, path, maxsize); + /* we have all the words, they are in buf.. */ dom = unix_domain_find(dname); if (!dom) @@ -399,6 +403,7 @@ static ssize_t write_threads(struct file *file, char *buf, size_t size) return rv; if (newthreads < 0) return -EINVAL; + trace_nfsd_ctl_threads(net, newthreads); rv = nfsd_svc(newthreads, net, file->f_cred); if (rv < 0) return rv; @@ -471,6 +476,7 @@ static ssize_t write_pool_threads(struct file *file, char *buf, size_t size) rv = -EINVAL; if (nthreads[i] < 0) goto out_free; + trace_nfsd_ctl_pool_threads(net, i, nthreads[i]); } rv = nfsd_set_nrthreads(i, nthreads, net); if (rv) @@ -536,6 +542,7 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size) if (buf[size-1] != '\n') return -EINVAL; buf[size-1] = 0; + trace_nfsd_ctl_version(netns(file), buf); vers = mesg; len = qword_get(&mesg, vers, size); @@ -689,6 +696,7 @@ static ssize_t __write_ports_addfd(char *buf, struct net *net, const struct cred err = get_int(&mesg, &fd); if (err != 0 || fd < 0) return -EINVAL; + trace_nfsd_ctl_ports_addfd(net, fd); err = nfsd_create_serv(net); if (err != 0) @@ -720,6 +728,7 @@ static ssize_t __write_ports_addxprt(char *buf, struct net *net, const struct cr if (port < 1 || port > USHRT_MAX) return -EINVAL; + trace_nfsd_ctl_ports_addxprt(net, transport, port); err = nfsd_create_serv(net); if (err != 0) @@ -853,6 +862,8 @@ static ssize_t write_maxblksize(struct file *file, char *buf, size_t size) int rv = get_int(&mesg, &bsize); if (rv) return rv; + trace_nfsd_ctl_maxblksize(netns(file), bsize); + /* force bsize into allowed range and * required alignment. */ @@ -903,6 +914,7 @@ static ssize_t write_maxconn(struct file *file, char *buf, size_t size) if (rv) return rv; + trace_nfsd_ctl_maxconn(netns(file), maxconn); nn->max_connections = maxconn; } @@ -913,6 +925,7 @@ static ssize_t write_maxconn(struct file *file, char *buf, size_t size) static ssize_t __nfsd4_write_time(struct file *file, char *buf, size_t size, time64_t *time, struct nfsd_net *nn) { + struct dentry *dentry = file_dentry(file); char *mesg = buf; int rv, i; @@ -922,6 +935,9 @@ static ssize_t __nfsd4_write_time(struct file *file, char *buf, size_t size, rv = get_int(&mesg, &i); if (rv) return rv; + trace_nfsd_ctl_time(netns(file), dentry->d_name.name, + dentry->d_name.len, i); + /* * Some sanity checking. We don't have a reason for * these particular numbers, but problems with the @@ -1014,6 +1030,7 @@ static ssize_t __write_recoverydir(struct file *file, char *buf, size_t size, len = qword_get(&mesg, recdir, size); if (len <= 0) return -EINVAL; + trace_nfsd_ctl_recoverydir(netns(file), recdir); status = nfs4_reset_recoverydir(recdir); if (status) @@ -1087,7 +1104,7 @@ static ssize_t write_v4_end_grace(struct file *file, char *buf, size_t size) case '1': if (!nn->nfsd_serv) return -EBUSY; - nfsd4_end_grace(nn); + trace_nfsd_end_grace(netns(file)); break; default: return -EINVAL; @@ -1192,8 +1209,8 @@ static int __nfsd_symlink(struct inode *dir, struct dentry *dentry, * @content is assumed to be a NUL-terminated string that lives * longer than the symlink itself. */ -static void nfsd_symlink(struct dentry *parent, const char *name, - const char *content) +static void _nfsd_symlink(struct dentry *parent, const char *name, + const char *content) { struct inode *dir = parent->d_inode; struct dentry *dentry; @@ -1210,8 +1227,8 @@ out: inode_unlock(dir); } #else -static inline void nfsd_symlink(struct dentry *parent, const char *name, - const char *content) +static inline void _nfsd_symlink(struct dentry *parent, const char *name, + const char *content) { } @@ -1389,8 +1406,8 @@ static int nfsd_fill_super(struct super_block *sb, struct fs_context *fc) ret = simple_fill_super(sb, 0x6e667364, nfsd_files); if (ret) return ret; - nfsd_symlink(sb->s_root, "supported_krb5_enctypes", - "/proc/net/rpc/gss_krb5_enctypes"); + _nfsd_symlink(sb->s_root, "supported_krb5_enctypes", + "/proc/net/rpc/gss_krb5_enctypes"); dentry = nfsd_mkdir(sb->s_root, NULL, "clients"); if (IS_ERR(dentry)) return PTR_ERR(dentry); diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index 72a906a053dc..2af74983f146 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -1581,6 +1581,265 @@ TRACE_EVENT(nfsd_cb_recall_any_done, ) ); +TRACE_EVENT(nfsd_ctl_unlock_ip, + TP_PROTO( + const struct net *net, + const char *address + ), + TP_ARGS(net, address), + TP_STRUCT__entry( + __field(unsigned int, netns_ino) + __string(address, address) + ), + TP_fast_assign( + __entry->netns_ino = net->ns.inum; + __assign_str(address, address); + ), + TP_printk("address=%s", + __get_str(address) + ) +); + +TRACE_EVENT(nfsd_ctl_unlock_fs, + TP_PROTO( + const struct net *net, + const char *path + ), + TP_ARGS(net, path), + TP_STRUCT__entry( + __field(unsigned int, netns_ino) + __string(path, path) + ), + TP_fast_assign( + __entry->netns_ino = net->ns.inum; + __assign_str(path, path); + ), + TP_printk("path=%s", + __get_str(path) + ) +); + +TRACE_EVENT(nfsd_ctl_filehandle, + TP_PROTO( + const struct net *net, + const char *domain, + const char *path, + int maxsize + ), + TP_ARGS(net, domain, path, maxsize), + TP_STRUCT__entry( + __field(unsigned int, netns_ino) + __field(int, maxsize) + __string(domain, domain) + __string(path, path) + ), + TP_fast_assign( + __entry->netns_ino = net->ns.inum; + __entry->maxsize = maxsize; + __assign_str(domain, domain); + __assign_str(path, path); + ), + TP_printk("domain=%s path=%s maxsize=%d", + __get_str(domain), __get_str(path), __entry->maxsize + ) +); + +TRACE_EVENT(nfsd_ctl_threads, + TP_PROTO( + const struct net *net, + int newthreads + ), + TP_ARGS(net, newthreads), + TP_STRUCT__entry( + __field(unsigned int, netns_ino) + __field(int, newthreads) + ), + TP_fast_assign( + __entry->netns_ino = net->ns.inum; + __entry->newthreads = newthreads; + ), + TP_printk("newthreads=%d", + __entry->newthreads + ) +); + +TRACE_EVENT(nfsd_ctl_pool_threads, + TP_PROTO( + const struct net *net, + int pool, + int nrthreads + ), + TP_ARGS(net, pool, nrthreads), + TP_STRUCT__entry( + __field(unsigned int, netns_ino) + __field(int, pool) + __field(int, nrthreads) + ), + TP_fast_assign( + __entry->netns_ino = net->ns.inum; + __entry->pool = pool; + __entry->nrthreads = nrthreads; + ), + TP_printk("pool=%d nrthreads=%d", + __entry->pool, __entry->nrthreads + ) +); + +TRACE_EVENT(nfsd_ctl_version, + TP_PROTO( + const struct net *net, + const char *mesg + ), + TP_ARGS(net, mesg), + TP_STRUCT__entry( + __field(unsigned int, netns_ino) + __string(mesg, mesg) + ), + TP_fast_assign( + __entry->netns_ino = net->ns.inum; + __assign_str(mesg, mesg); + ), + TP_printk("%s", + __get_str(mesg) + ) +); + +TRACE_EVENT(nfsd_ctl_ports_addfd, + TP_PROTO( + const struct net *net, + int fd + ), + TP_ARGS(net, fd), + TP_STRUCT__entry( + __field(unsigned int, netns_ino) + __field(int, fd) + ), + TP_fast_assign( + __entry->netns_ino = net->ns.inum; + __entry->fd = fd; + ), + TP_printk("fd=%d", + __entry->fd + ) +); + +TRACE_EVENT(nfsd_ctl_ports_addxprt, + TP_PROTO( + const struct net *net, + const char *transport, + int port + ), + TP_ARGS(net, transport, port), + TP_STRUCT__entry( + __field(unsigned int, netns_ino) + __field(int, port) + __string(transport, transport) + ), + TP_fast_assign( + __entry->netns_ino = net->ns.inum; + __entry->port = port; + __assign_str(transport, transport); + ), + TP_printk("transport=%s port=%d", + __get_str(transport), __entry->port + ) +); + +TRACE_EVENT(nfsd_ctl_maxblksize, + TP_PROTO( + const struct net *net, + int bsize + ), + TP_ARGS(net, bsize), + TP_STRUCT__entry( + __field(unsigned int, netns_ino) + __field(int, bsize) + ), + TP_fast_assign( + __entry->netns_ino = net->ns.inum; + __entry->bsize = bsize; + ), + TP_printk("bsize=%d", + __entry->bsize + ) +); + +TRACE_EVENT(nfsd_ctl_maxconn, + TP_PROTO( + const struct net *net, + int maxconn + ), + TP_ARGS(net, maxconn), + TP_STRUCT__entry( + __field(unsigned int, netns_ino) + __field(int, maxconn) + ), + TP_fast_assign( + __entry->netns_ino = net->ns.inum; + __entry->maxconn = maxconn; + ), + TP_printk("maxconn=%d", + __entry->maxconn + ) +); + +TRACE_EVENT(nfsd_ctl_time, + TP_PROTO( + const struct net *net, + const char *name, + size_t namelen, + int time + ), + TP_ARGS(net, name, namelen, time), + TP_STRUCT__entry( + __field(unsigned int, netns_ino) + __field(int, time) + __string_len(name, name, namelen) + ), + TP_fast_assign( + __entry->netns_ino = net->ns.inum; + __entry->time = time; + __assign_str_len(name, name, namelen); + ), + TP_printk("file=%s time=%d\n", + __get_str(name), __entry->time + ) +); + +TRACE_EVENT(nfsd_ctl_recoverydir, + TP_PROTO( + const struct net *net, + const char *recdir + ), + TP_ARGS(net, recdir), + TP_STRUCT__entry( + __field(unsigned int, netns_ino) + __string(recdir, recdir) + ), + TP_fast_assign( + __entry->netns_ino = net->ns.inum; + __assign_str(recdir, recdir); + ), + TP_printk("recdir=%s", + __get_str(recdir) + ) +); + +TRACE_EVENT(nfsd_end_grace, + TP_PROTO( + const struct net *net + ), + TP_ARGS(net), + TP_STRUCT__entry( + __field(unsigned int, netns_ino) + ), + TP_fast_assign( + __entry->netns_ino = net->ns.inum; + ), + TP_printk("nn=%d", __entry->netns_ino + ) +); + #endif /* _NFSD_TRACE_H */ #undef TRACE_INCLUDE_PATH -- cgit v1.2.3 From 5f7fc5d69f6e92ec0b38774c387f5cf7812c5806 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 15 May 2023 09:47:29 -0400 Subject: SUNRPC: Resupply rq_pages from node-local memory svc_init_buffer() is careful to allocate the initial set of server thread buffer pages from memory on the local NUMA node. svc_alloc_arg() should also be that careful. Signed-off-by: Chuck Lever --- net/sunrpc/svc_xprt.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 42536a0b1db4..9ca3393a197e 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -685,8 +685,9 @@ static int svc_alloc_arg(struct svc_rqst *rqstp) } for (filled = 0; filled < pages; filled = ret) { - ret = alloc_pages_bulk_array(GFP_KERNEL, pages, - rqstp->rq_pages); + ret = alloc_pages_bulk_array_node(GFP_KERNEL, + rqstp->rq_pool->sp_id, + pages, rqstp->rq_pages); if (ret > filled) /* Made progress, don't sleep yet */ continue; -- cgit v1.2.3 From 88e4d41a264d00fbfd344eb2485c1c59096197f4 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 15 May 2023 09:47:36 -0400 Subject: SUNRPC: Use __alloc_bulk_pages() in svc_init_buffer() Clean up: Use the bulk page allocator when filling a server thread's buffer page array. Signed-off-by: Chuck Lever --- net/sunrpc/svc.c | 23 +++++++---------------- 1 file changed, 7 insertions(+), 16 deletions(-) diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 79967b6925bd..e6d4cec61e47 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -597,34 +597,25 @@ svc_destroy(struct kref *ref) } EXPORT_SYMBOL_GPL(svc_destroy); -/* - * Allocate an RPC server's buffer space. - * We allocate pages and place them in rq_pages. - */ -static int +static bool svc_init_buffer(struct svc_rqst *rqstp, unsigned int size, int node) { - unsigned int pages, arghi; + unsigned long pages, ret; /* bc_xprt uses fore channel allocated buffers */ if (svc_is_backchannel(rqstp)) - return 1; + return true; pages = size / PAGE_SIZE + 1; /* extra page as we hold both request and reply. * We assume one is at most one page */ - arghi = 0; WARN_ON_ONCE(pages > RPCSVC_MAXPAGES); if (pages > RPCSVC_MAXPAGES) pages = RPCSVC_MAXPAGES; - while (pages) { - struct page *p = alloc_pages_node(node, GFP_KERNEL, 0); - if (!p) - break; - rqstp->rq_pages[arghi++] = p; - pages--; - } - return pages == 0; + + ret = alloc_pages_bulk_array_node(GFP_KERNEL, node, pages, + rqstp->rq_pages); + return ret == pages; } /* -- cgit v1.2.3 From adaa7a50d027b91ee6d56ae8b0a3f6edf5a78776 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 16 May 2023 10:26:09 -0400 Subject: NFSD: Add encoders for NFSv4 clientids and verifiers Deduplicate some common code. Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 107 ++++++++++++++++++++++++++++-------------------------- 1 file changed, 55 insertions(+), 52 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 76db2fe29624..1488ce978f7c 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3688,6 +3688,30 @@ fail: return -EINVAL; } +static __be32 +nfsd4_encode_verifier4(struct xdr_stream *xdr, const nfs4_verifier *verf) +{ + __be32 *p; + + p = xdr_reserve_space(xdr, NFS4_VERIFIER_SIZE); + if (!p) + return nfserr_resource; + memcpy(p, verf->data, sizeof(verf->data)); + return nfs_ok; +} + +static __be32 +nfsd4_encode_clientid4(struct xdr_stream *xdr, const clientid_t *clientid) +{ + __be32 *p; + + p = xdr_reserve_space(xdr, sizeof(__be64)); + if (!p) + return nfserr_resource; + memcpy(p, clientid, sizeof(*clientid)); + return nfs_ok; +} + static __be32 nfsd4_encode_stateid(struct xdr_stream *xdr, stateid_t *sid) { @@ -3752,15 +3776,8 @@ nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr, union nfsd4_op_u *u) { struct nfsd4_commit *commit = &u->commit; - struct xdr_stream *xdr = resp->xdr; - __be32 *p; - p = xdr_reserve_space(xdr, NFS4_VERIFIER_SIZE); - if (!p) - return nfserr_resource; - p = xdr_encode_opaque_fixed(p, commit->co_verf.data, - NFS4_VERIFIER_SIZE); - return 0; + return nfsd4_encode_verifier4(resp->xdr, &commit->co_verf); } static __be32 @@ -4213,15 +4230,9 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, int starting_len = xdr->buf->len; __be32 *p; - p = xdr_reserve_space(xdr, NFS4_VERIFIER_SIZE); - if (!p) - return nfserr_resource; - - /* XXX: Following NFSv3, we ignore the READDIR verifier for now. */ - *p++ = cpu_to_be32(0); - *p++ = cpu_to_be32(0); - xdr->buf->head[0].iov_len = (char *)xdr->p - - (char *)xdr->buf->head[0].iov_base; + nfserr = nfsd4_encode_verifier4(xdr, &readdir->rd_verf); + if (nfserr != nfs_ok) + return nfserr; /* * Number of bytes left for directory entries allowing for the @@ -4448,23 +4459,25 @@ nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr, { struct nfsd4_setclientid *scd = &u->setclientid; struct xdr_stream *xdr = resp->xdr; - __be32 *p; if (!nfserr) { - p = xdr_reserve_space(xdr, 8 + NFS4_VERIFIER_SIZE); - if (!p) - return nfserr_resource; - p = xdr_encode_opaque_fixed(p, &scd->se_clientid, 8); - p = xdr_encode_opaque_fixed(p, &scd->se_confirm, - NFS4_VERIFIER_SIZE); - } - else if (nfserr == nfserr_clid_inuse) { - p = xdr_reserve_space(xdr, 8); - if (!p) - return nfserr_resource; - *p++ = cpu_to_be32(0); - *p++ = cpu_to_be32(0); + nfserr = nfsd4_encode_clientid4(xdr, &scd->se_clientid); + if (nfserr != nfs_ok) + goto out; + nfserr = nfsd4_encode_verifier4(xdr, &scd->se_confirm); + } else if (nfserr == nfserr_clid_inuse) { + /* empty network id */ + if (xdr_stream_encode_u32(xdr, 0) < 0) { + nfserr = nfserr_resource; + goto out; + } + /* empty universal address */ + if (xdr_stream_encode_u32(xdr, 0) < 0) { + nfserr = nfserr_resource; + goto out; + } } +out: return nfserr; } @@ -4473,17 +4486,12 @@ nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, union nfsd4_op_u *u) { struct nfsd4_write *write = &u->write; - struct xdr_stream *xdr = resp->xdr; - __be32 *p; - p = xdr_reserve_space(xdr, 16); - if (!p) + if (xdr_stream_encode_u32(resp->xdr, write->wr_bytes_written) < 0) return nfserr_resource; - *p++ = cpu_to_be32(write->wr_bytes_written); - *p++ = cpu_to_be32(write->wr_how_written); - p = xdr_encode_opaque_fixed(p, write->wr_verifier.data, - NFS4_VERIFIER_SIZE); - return 0; + if (xdr_stream_encode_u32(resp->xdr, write->wr_how_written) < 0) + return nfserr_resource; + return nfsd4_encode_verifier4(resp->xdr, &write->wr_verifier); } static __be32 @@ -4505,20 +4513,15 @@ nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr, server_scope = nn->nfsd_name; server_scope_sz = strlen(nn->nfsd_name); - p = xdr_reserve_space(xdr, - 8 /* eir_clientid */ + - 4 /* eir_sequenceid */ + - 4 /* eir_flags */ + - 4 /* spr_how */); - if (!p) + if (nfsd4_encode_clientid4(xdr, &exid->clientid) != nfs_ok) + return nfserr_resource; + if (xdr_stream_encode_u32(xdr, exid->seqid) < 0) + return nfserr_resource; + if (xdr_stream_encode_u32(xdr, exid->flags) < 0) return nfserr_resource; - p = xdr_encode_opaque_fixed(p, &exid->clientid, 8); - *p++ = cpu_to_be32(exid->seqid); - *p++ = cpu_to_be32(exid->flags); - - *p++ = cpu_to_be32(exid->spa_how); - + if (xdr_stream_encode_u32(xdr, exid->spa_how) < 0) + return nfserr_resource; switch (exid->spa_how) { case SP4_NONE: break; -- cgit v1.2.3 From 66a21db7db59f279a9fcb6445fa36dc6eb001b3c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 16 May 2023 10:26:15 -0400 Subject: NFSD: Replace encode_cinfo() De-duplicate "reserve_space; encode_cinfo". Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 72 +++++++++++++++++++------------------------------------ 1 file changed, 24 insertions(+), 48 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 1488ce978f7c..c5c6873b938d 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2566,12 +2566,16 @@ static __be32 *encode_time_delta(__be32 *p, struct inode *inode) return p; } -static __be32 *encode_cinfo(__be32 *p, struct nfsd4_change_info *c) +static __be32 +nfsd4_encode_change_info4(struct xdr_stream *xdr, struct nfsd4_change_info *c) { - *p++ = cpu_to_be32(c->atomic); - p = xdr_encode_hyper(p, c->before_change); - p = xdr_encode_hyper(p, c->after_change); - return p; + if (xdr_stream_encode_bool(xdr, c->atomic) < 0) + return nfserr_resource; + if (xdr_stream_encode_u64(xdr, c->before_change) < 0) + return nfserr_resource; + if (xdr_stream_encode_u64(xdr, c->after_change) < 0) + return nfserr_resource; + return nfs_ok; } /* Encode as an array of strings the string given with components @@ -3786,12 +3790,10 @@ nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr, { struct nfsd4_create *create = &u->create; struct xdr_stream *xdr = resp->xdr; - __be32 *p; - p = xdr_reserve_space(xdr, 20); - if (!p) - return nfserr_resource; - encode_cinfo(p, &create->cr_cinfo); + nfserr = nfsd4_encode_change_info4(xdr, &create->cr_cinfo); + if (nfserr) + return nfserr; return nfsd4_encode_bitmap(xdr, create->cr_bmval[0], create->cr_bmval[1], create->cr_bmval[2]); } @@ -3909,13 +3911,8 @@ nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, { struct nfsd4_link *link = &u->link; struct xdr_stream *xdr = resp->xdr; - __be32 *p; - p = xdr_reserve_space(xdr, 20); - if (!p) - return nfserr_resource; - p = encode_cinfo(p, &link->li_cinfo); - return 0; + return nfsd4_encode_change_info4(xdr, &link->li_cinfo); } @@ -3930,11 +3927,11 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, nfserr = nfsd4_encode_stateid(xdr, &open->op_stateid); if (nfserr) return nfserr; - p = xdr_reserve_space(xdr, 24); - if (!p) + nfserr = nfsd4_encode_change_info4(xdr, &open->op_cinfo); + if (nfserr) + return nfserr; + if (xdr_stream_encode_u32(xdr, open->op_rflags) < 0) return nfserr_resource; - p = encode_cinfo(p, &open->op_cinfo); - *p++ = cpu_to_be32(open->op_rflags); nfserr = nfsd4_encode_bitmap(xdr, open->op_bmval[0], open->op_bmval[1], open->op_bmval[2]); @@ -4310,13 +4307,8 @@ nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, { struct nfsd4_remove *remove = &u->remove; struct xdr_stream *xdr = resp->xdr; - __be32 *p; - p = xdr_reserve_space(xdr, 20); - if (!p) - return nfserr_resource; - p = encode_cinfo(p, &remove->rm_cinfo); - return 0; + return nfsd4_encode_change_info4(xdr, &remove->rm_cinfo); } static __be32 @@ -4325,14 +4317,11 @@ nfsd4_encode_rename(struct nfsd4_compoundres *resp, __be32 nfserr, { struct nfsd4_rename *rename = &u->rename; struct xdr_stream *xdr = resp->xdr; - __be32 *p; - p = xdr_reserve_space(xdr, 40); - if (!p) - return nfserr_resource; - p = encode_cinfo(p, &rename->rn_sinfo); - p = encode_cinfo(p, &rename->rn_tinfo); - return 0; + nfserr = nfsd4_encode_change_info4(xdr, &rename->rn_sinfo); + if (nfserr) + return nfserr; + return nfsd4_encode_change_info4(xdr, &rename->rn_tinfo); } static __be32 @@ -5102,15 +5091,8 @@ nfsd4_encode_setxattr(struct nfsd4_compoundres *resp, __be32 nfserr, { struct nfsd4_setxattr *setxattr = &u->setxattr; struct xdr_stream *xdr = resp->xdr; - __be32 *p; - - p = xdr_reserve_space(xdr, 20); - if (!p) - return nfserr_resource; - encode_cinfo(p, &setxattr->setxa_cinfo); - - return 0; + return nfsd4_encode_change_info4(xdr, &setxattr->setxa_cinfo); } /* @@ -5256,14 +5238,8 @@ nfsd4_encode_removexattr(struct nfsd4_compoundres *resp, __be32 nfserr, { struct nfsd4_removexattr *removexattr = &u->removexattr; struct xdr_stream *xdr = resp->xdr; - __be32 *p; - p = xdr_reserve_space(xdr, 20); - if (!p) - return nfserr_resource; - - p = encode_cinfo(p, &removexattr->rmxa_cinfo); - return 0; + return nfsd4_encode_change_info4(xdr, &removexattr->rmxa_cinfo); } typedef __be32(*nfsd4_enc)(struct nfsd4_compoundres *, __be32, union nfsd4_op_u *u); -- cgit v1.2.3 From 82078b9895bd46ce69b4a73e2da40e7e2202fdb5 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 18 May 2023 13:45:36 -0400 Subject: NFSD: Ensure that xdr_write_pages updates rq_next_page All other NFSv[23] procedures manage to keep page_ptr and rq_next_page in lock step. Signed-off-by: Chuck Lever --- fs/nfsd/nfs3xdr.c | 11 +++++++---- fs/nfsd/nfsxdr.c | 11 +++++++---- include/linux/sunrpc/svc.h | 21 +++++++++++++++++++++ 3 files changed, 35 insertions(+), 8 deletions(-) diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 3308dd671ef0..f32128955ec8 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -828,7 +828,8 @@ nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, struct xdr_stream *xdr) return false; if (xdr_stream_encode_u32(xdr, resp->len) < 0) return false; - xdr_write_pages(xdr, resp->pages, 0, resp->len); + svcxdr_encode_opaque_pages(rqstp, xdr, resp->pages, 0, + resp->len); if (svc_encode_result_payload(rqstp, head->iov_len, resp->len) < 0) return false; break; @@ -859,8 +860,9 @@ nfs3svc_encode_readres(struct svc_rqst *rqstp, struct xdr_stream *xdr) return false; if (xdr_stream_encode_u32(xdr, resp->count) < 0) return false; - xdr_write_pages(xdr, resp->pages, rqstp->rq_res.page_base, - resp->count); + svcxdr_encode_opaque_pages(rqstp, xdr, resp->pages, + rqstp->rq_res.page_base, + resp->count); if (svc_encode_result_payload(rqstp, head->iov_len, resp->count) < 0) return false; break; @@ -961,7 +963,8 @@ nfs3svc_encode_readdirres(struct svc_rqst *rqstp, struct xdr_stream *xdr) return false; if (!svcxdr_encode_cookieverf3(xdr, resp->verf)) return false; - xdr_write_pages(xdr, dirlist->pages, 0, dirlist->len); + svcxdr_encode_opaque_pages(rqstp, xdr, dirlist->pages, 0, + dirlist->len); /* no more entries */ if (xdr_stream_encode_item_absent(xdr) < 0) return false; diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c index caf6355b18fa..5777f40c7353 100644 --- a/fs/nfsd/nfsxdr.c +++ b/fs/nfsd/nfsxdr.c @@ -468,7 +468,8 @@ nfssvc_encode_readlinkres(struct svc_rqst *rqstp, struct xdr_stream *xdr) case nfs_ok: if (xdr_stream_encode_u32(xdr, resp->len) < 0) return false; - xdr_write_pages(xdr, &resp->page, 0, resp->len); + svcxdr_encode_opaque_pages(rqstp, xdr, &resp->page, 0, + resp->len); if (svc_encode_result_payload(rqstp, head->iov_len, resp->len) < 0) return false; break; @@ -491,8 +492,9 @@ nfssvc_encode_readres(struct svc_rqst *rqstp, struct xdr_stream *xdr) return false; if (xdr_stream_encode_u32(xdr, resp->count) < 0) return false; - xdr_write_pages(xdr, resp->pages, rqstp->rq_res.page_base, - resp->count); + svcxdr_encode_opaque_pages(rqstp, xdr, resp->pages, + rqstp->rq_res.page_base, + resp->count); if (svc_encode_result_payload(rqstp, head->iov_len, resp->count) < 0) return false; break; @@ -511,7 +513,8 @@ nfssvc_encode_readdirres(struct svc_rqst *rqstp, struct xdr_stream *xdr) return false; switch (resp->status) { case nfs_ok: - xdr_write_pages(xdr, dirlist->pages, 0, dirlist->len); + svcxdr_encode_opaque_pages(rqstp, xdr, dirlist->pages, 0, + dirlist->len); /* no more entries */ if (xdr_stream_encode_item_absent(xdr) < 0) return false; diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 762d7231e574..3b10636c51a9 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -508,6 +508,27 @@ static inline void svcxdr_init_encode(struct svc_rqst *rqstp) xdr->rqst = NULL; } +/** + * svcxdr_encode_opaque_pages - Insert pages into an xdr_stream + * @xdr: xdr_stream to be updated + * @pages: array of pages to insert + * @base: starting offset of first data byte in @pages + * @len: number of data bytes in @pages to insert + * + * After the @pages are added, the tail iovec is instantiated pointing + * to end of the head buffer, and the stream is set up to encode + * subsequent items into the tail. + */ +static inline void svcxdr_encode_opaque_pages(struct svc_rqst *rqstp, + struct xdr_stream *xdr, + struct page **pages, + unsigned int base, + unsigned int len) +{ + xdr_write_pages(xdr, pages, base, len); + xdr->page_ptr = rqstp->rq_next_page - 1; +} + /** * svcxdr_set_auth_slack - * @rqstp: RPC transaction -- cgit v1.2.3 From ba21e20b309564c64761f4953db4456ec8c4e49c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 18 May 2023 13:45:43 -0400 Subject: NFSD: Use svcxdr_encode_opaque_pages() in nfsd4_encode_splice_read() Commit 15b23ef5d348 ("nfsd4: fix corruption of NFSv4 read data") encountered exactly the same issue: after a splice read, a filesystem-owned page is left in rq_pages[]; the symptoms are the same as described there. If the computed number of pages in nfsd4_encode_splice_read() is not exactly the same as the actual number of pages that were consumed by nfsd_splice_actor() (say, because of a bug) then hilarity ensues. Instead of recomputing the page offset based on the size of the payload, use rq_next_page, which is already properly updated by nfsd_splice_actor(), to cause svc_rqst_release_pages() to operate correctly in every instance. This is a defensive change since we believe that after commit 27c934dd8832 ("nfsd: don't replace page in rq_pages if it's a continuation of last page") has been applied, there are no known opportunities for nfsd_splice_actor() to screw up. So I'm not marking it for stable backport. Reported-by: Andy Zlotek Suggested-by: Calum Mackay Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 43 +++++++++++++++++++++---------------------- 1 file changed, 21 insertions(+), 22 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index c5c6873b938d..b78c2391a2a1 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -4032,6 +4032,11 @@ nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, __be32 nfserr, return nfsd4_encode_stateid(xdr, &od->od_stateid); } +/* + * The operation of this function assumes that this is the only + * READ operation in the COMPOUND. If there are multiple READs, + * we use nfsd4_encode_readv(). + */ static __be32 nfsd4_encode_splice_read( struct nfsd4_compoundres *resp, struct nfsd4_read *read, @@ -4042,8 +4047,12 @@ static __be32 nfsd4_encode_splice_read( int status, space_left; __be32 nfserr; - /* Make sure there will be room for padding if needed */ - if (xdr->end - xdr->p < 1) + /* + * Make sure there is room at the end of buf->head for + * svcxdr_encode_opaque_pages() to create a tail buffer + * to XDR-pad the payload. + */ + if (xdr->iov != xdr->buf->head || xdr->end - xdr->p < 1) return nfserr_resource; nfserr = nfsd_splice_read(read->rd_rqstp, read->rd_fhp, @@ -4052,6 +4061,8 @@ static __be32 nfsd4_encode_splice_read( read->rd_length = maxcount; if (nfserr) goto out_err; + svcxdr_encode_opaque_pages(read->rd_rqstp, xdr, buf->pages, + buf->page_base, maxcount); status = svc_encode_result_payload(read->rd_rqstp, buf->head[0].iov_len, maxcount); if (status) { @@ -4059,31 +4070,19 @@ static __be32 nfsd4_encode_splice_read( goto out_err; } - buf->page_len = maxcount; - buf->len += maxcount; - xdr->page_ptr += (buf->page_base + maxcount + PAGE_SIZE - 1) - / PAGE_SIZE; - - /* Use rest of head for padding and remaining ops: */ - buf->tail[0].iov_base = xdr->p; - buf->tail[0].iov_len = 0; - xdr->iov = buf->tail; - if (maxcount&3) { - int pad = 4 - (maxcount&3); - - *(xdr->p++) = 0; - - buf->tail[0].iov_base += maxcount&3; - buf->tail[0].iov_len = pad; - buf->len += pad; - } - + /* + * Prepare to encode subsequent operations. + * + * xdr_truncate_encode() is not safe to use after a successful + * splice read has been done, so the following stream + * manipulations are open-coded. + */ space_left = min_t(int, (void *)xdr->end - (void *)xdr->p, buf->buflen - buf->len); buf->buflen = buf->len + space_left; xdr->end = (__be32 *)((void *)xdr->end + space_left); - return 0; + return nfs_ok; out_err: /* -- cgit v1.2.3 From ed4a567a179ec15c15f78fa60ca6de9cc4f34897 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 18 May 2023 13:45:50 -0400 Subject: NFSD: Update rq_next_page between COMPOUND operations A GETATTR with a large result can advance xdr->page_ptr without updating rq_next_page. If a splice READ follows that GETATTR in the COMPOUND, nfsd_splice_actor can start splicing at the wrong page. I've also seen READLINK and READDIR leave rq_next_page in an unmodified state. There are potentially a myriad of combinations like this, so play it safe: move the rq_next_page update to nfsd4_encode_operation. Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index b78c2391a2a1..3887da048232 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -5438,6 +5438,12 @@ status: release: if (opdesc && opdesc->op_release) opdesc->op_release(&op->u); + + /* + * Account for pages consumed while encoding this operation. + * The xdr_stream primitives don't manage rq_next_page. + */ + rqstp->rq_next_page = xdr->page_ptr + 1; } /* @@ -5506,9 +5512,6 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, struct xdr_stream *xdr) p = resp->statusp; *p++ = resp->cstate.status; - - rqstp->rq_next_page = xdr->page_ptr + 1; - *p++ = htonl(resp->taglen); memcpy(p, resp->tag, resp->taglen); p += XDR_QUADLEN(resp->taglen); -- cgit v1.2.3 From 507df40ebf31655203dd05e6e31db5849a83347a Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 18 May 2023 13:45:56 -0400 Subject: NFSD: Hoist rq_vec preparation into nfsd_read() Accrue the following benefits: a) Deduplicate this common bit of code. b) Don't prepare rq_vec for NFSv2 and NFSv3 spliced reads, which don't use rq_vec. This is already the case for nfsd4_encode_read(). c) Eventually, converting NFSD's read path to use a bvec iterator will be simpler. In the next patch, nfsd_iter_read() will replace nfsd_readv() for all NFS versions. Signed-off-by: Chuck Lever --- fs/nfsd/nfs3proc.c | 14 +---------- fs/nfsd/nfsproc.c | 14 +---------- fs/nfsd/vfs.c | 68 +++++++++++++++++++++++++++++++++++++++++++++++------- fs/nfsd/vfs.h | 8 +++++-- 4 files changed, 68 insertions(+), 36 deletions(-) diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index e6bb8eeb5bc2..fc8d5b7db9f8 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -151,8 +151,6 @@ nfsd3_proc_read(struct svc_rqst *rqstp) { struct nfsd3_readargs *argp = rqstp->rq_argp; struct nfsd3_readres *resp = rqstp->rq_resp; - unsigned int len; - int v; dprintk("nfsd: READ(3) %s %lu bytes at %Lu\n", SVCFH_fmt(&argp->fh), @@ -166,17 +164,7 @@ nfsd3_proc_read(struct svc_rqst *rqstp) if (argp->offset + argp->count > (u64)OFFSET_MAX) argp->count = (u64)OFFSET_MAX - argp->offset; - v = 0; - len = argp->count; resp->pages = rqstp->rq_next_page; - while (len > 0) { - struct page *page = *(rqstp->rq_next_page++); - - rqstp->rq_vec[v].iov_base = page_address(page); - rqstp->rq_vec[v].iov_len = min_t(unsigned int, len, PAGE_SIZE); - len -= rqstp->rq_vec[v].iov_len; - v++; - } /* Obtain buffer pointer for payload. * 1 (status) + 22 (post_op_attr) + 1 (count) + 1 (eof) @@ -187,7 +175,7 @@ nfsd3_proc_read(struct svc_rqst *rqstp) fh_copy(&resp->fh, &argp->fh); resp->status = nfsd_read(rqstp, &resp->fh, argp->offset, - rqstp->rq_vec, v, &resp->count, &resp->eof); + &resp->count, &resp->eof); return rpc_success; } diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index c37195572fd0..a7315928a760 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -176,9 +176,7 @@ nfsd_proc_read(struct svc_rqst *rqstp) { struct nfsd_readargs *argp = rqstp->rq_argp; struct nfsd_readres *resp = rqstp->rq_resp; - unsigned int len; u32 eof; - int v; dprintk("nfsd: READ %s %d bytes at %d\n", SVCFH_fmt(&argp->fh), @@ -187,17 +185,7 @@ nfsd_proc_read(struct svc_rqst *rqstp) argp->count = min_t(u32, argp->count, NFSSVC_MAXBLKSIZE_V2); argp->count = min_t(u32, argp->count, rqstp->rq_res.buflen); - v = 0; - len = argp->count; resp->pages = rqstp->rq_next_page; - while (len > 0) { - struct page *page = *(rqstp->rq_next_page++); - - rqstp->rq_vec[v].iov_base = page_address(page); - rqstp->rq_vec[v].iov_len = min_t(unsigned int, len, PAGE_SIZE); - len -= rqstp->rq_vec[v].iov_len; - v++; - } /* Obtain buffer pointer for payload. 19 is 1 word for * status, 17 words for fattr, and 1 word for the byte count. @@ -207,7 +195,7 @@ nfsd_proc_read(struct svc_rqst *rqstp) resp->count = argp->count; fh_copy(&resp->fh, &argp->fh); resp->status = nfsd_read(rqstp, &resp->fh, argp->offset, - rqstp->rq_vec, v, &resp->count, &eof); + &resp->count, &eof); if (resp->status == nfs_ok) resp->status = fh_getattr(&resp->fh, &resp->stat); else if (resp->status == nfserr_jukebox) diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 0016bcc04a59..d9055b5c496b 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1036,6 +1036,50 @@ __be32 nfsd_readv(struct svc_rqst *rqstp, struct svc_fh *fhp, return nfsd_finish_read(rqstp, fhp, file, offset, count, eof, host_err); } +/** + * nfsd_iter_read - Perform a VFS read using an iterator + * @rqstp: RPC transaction context + * @fhp: file handle of file to be read + * @file: opened struct file of file to be read + * @offset: starting byte offset + * @count: IN: requested number of bytes; OUT: number of bytes read + * @base: offset in first page of read buffer + * @eof: OUT: set non-zero if operation reached the end of the file + * + * Some filesystems or situations cannot use nfsd_splice_read. This + * function is the slightly less-performant fallback for those cases. + * + * Returns nfs_ok on success, otherwise an nfserr stat value is + * returned. + */ +__be32 nfsd_iter_read(struct svc_rqst *rqstp, struct svc_fh *fhp, + struct file *file, loff_t offset, unsigned long *count, + unsigned int base, u32 *eof) +{ + unsigned long v, total; + struct iov_iter iter; + loff_t ppos = offset; + struct page *page; + ssize_t host_err; + + v = 0; + total = *count; + while (total) { + page = *(rqstp->rq_next_page++); + rqstp->rq_vec[v].iov_base = page_address(page) + base; + rqstp->rq_vec[v].iov_len = min_t(size_t, total, PAGE_SIZE - base); + total -= rqstp->rq_vec[v].iov_len; + ++v; + base = 0; + } + WARN_ON_ONCE(v > ARRAY_SIZE(rqstp->rq_vec)); + + trace_nfsd_read_vector(rqstp, fhp, offset, *count); + iov_iter_kvec(&iter, ITER_DEST, rqstp->rq_vec, v, *count); + host_err = vfs_iter_read(file, &iter, &ppos, 0); + return nfsd_finish_read(rqstp, fhp, file, offset, count, eof, host_err); +} + /* * Gathered writes: If another process is currently writing to the file, * there's a high chance this is another nfsd (triggered by a bulk write @@ -1161,14 +1205,24 @@ out_nfserr: return nfserr; } -/* - * Read data from a file. count must contain the requested read count - * on entry. On return, *count contains the number of bytes actually read. +/** + * nfsd_read - Read data from a file + * @rqstp: RPC transaction context + * @fhp: file handle of file to be read + * @offset: starting byte offset + * @count: IN: requested number of bytes; OUT: number of bytes read + * @eof: OUT: set non-zero if operation reached the end of the file + * + * The caller must verify that there is enough space in @rqstp.rq_res + * to perform this operation. + * * N.B. After this call fhp needs an fh_put + * + * Returns nfs_ok on success, otherwise an nfserr stat value is + * returned. */ __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, - loff_t offset, struct kvec *vec, int vlen, unsigned long *count, - u32 *eof) + loff_t offset, unsigned long *count, u32 *eof) { struct nfsd_file *nf; struct file *file; @@ -1183,12 +1237,10 @@ __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, if (file->f_op->splice_read && test_bit(RQ_SPLICE_OK, &rqstp->rq_flags)) err = nfsd_splice_read(rqstp, fhp, file, offset, count, eof); else - err = nfsd_readv(rqstp, fhp, file, offset, vec, vlen, count, eof); + err = nfsd_iter_read(rqstp, fhp, file, offset, count, 0, eof); nfsd_file_put(nf); - trace_nfsd_read_done(rqstp, fhp, offset, *count); - return err; } diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index 43fb57a301d3..6381a2890b0b 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -115,8 +115,12 @@ __be32 nfsd_readv(struct svc_rqst *rqstp, struct svc_fh *fhp, struct kvec *vec, int vlen, unsigned long *count, u32 *eof); -__be32 nfsd_read(struct svc_rqst *, struct svc_fh *, - loff_t, struct kvec *, int, unsigned long *, +__be32 nfsd_iter_read(struct svc_rqst *rqstp, struct svc_fh *fhp, + struct file *file, loff_t offset, + unsigned long *count, unsigned int base, + u32 *eof); +__be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, + loff_t offset, unsigned long *count, u32 *eof); __be32 nfsd_write(struct svc_rqst *, struct svc_fh *, loff_t, struct kvec *, int, unsigned long *, -- cgit v1.2.3 From 703d7521555504b3a316b105b4806d641b7ebc76 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 18 May 2023 13:46:03 -0400 Subject: NFSD: Hoist rq_vec preparation into nfsd_read() [step two] Now that the preparation of an rq_vec has been removed from the generic read path, nfsd_splice_read() no longer needs to reset rq_next_page. nfsd4_encode_read() calls nfsd_splice_read() directly. As far as I can ascertain, resetting rq_next_page for NFSv4 splice reads is unnecessary because rq_next_page is already set correctly. Moreover, resetting it might even be incorrect if previous operations in the COMPOUND have already consumed at least a page of the send buffer. I would expect that the result would be encoding the READ payload over previously-encoded results. Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 10 +++++----- fs/nfsd/vfs.c | 13 ++++++++++++- include/linux/sunrpc/xdr.h | 3 +-- net/sunrpc/xdr.c | 26 ++++++++++++-------------- 4 files changed, 30 insertions(+), 22 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 3887da048232..c67bc904c7b7 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -4103,13 +4103,13 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp, __be32 zero = xdr_zero; __be32 nfserr; - read->rd_vlen = xdr_reserve_space_vec(xdr, resp->rqstp->rq_vec, maxcount); - if (read->rd_vlen < 0) + if (xdr_reserve_space_vec(xdr, maxcount) < 0) return nfserr_resource; - nfserr = nfsd_readv(resp->rqstp, read->rd_fhp, file, read->rd_offset, - resp->rqstp->rq_vec, read->rd_vlen, &maxcount, - &read->rd_eof); + nfserr = nfsd_iter_read(resp->rqstp, read->rd_fhp, file, + read->rd_offset, &maxcount, + xdr->buf->page_len & ~PAGE_MASK, + &read->rd_eof); read->rd_length = maxcount; if (nfserr) return nfserr; diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index d9055b5c496b..37febe1ff039 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1003,6 +1003,18 @@ static __be32 nfsd_finish_read(struct svc_rqst *rqstp, struct svc_fh *fhp, } } +/** + * nfsd_splice_read - Perform a VFS read using a splice pipe + * @rqstp: RPC transaction context + * @fhp: file handle of file to be read + * @file: opened struct file of file to be read + * @offset: starting byte offset + * @count: IN: requested number of bytes; OUT: number of bytes read + * @eof: OUT: set non-zero if operation reached the end of the file + * + * Returns nfs_ok on success, otherwise an nfserr stat value is + * returned. + */ __be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, loff_t offset, unsigned long *count, u32 *eof) @@ -1016,7 +1028,6 @@ __be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp, ssize_t host_err; trace_nfsd_read_splice(rqstp, fhp, offset, *count); - rqstp->rq_next_page = rqstp->rq_respages + 1; host_err = splice_direct_to_actor(file, &sd, nfsd_direct_splice_actor); return nfsd_finish_read(rqstp, fhp, file, offset, count, eof, host_err); } diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 72014c9216fc..f89ec4b5ea16 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -242,8 +242,7 @@ extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, extern void xdr_init_encode_pages(struct xdr_stream *xdr, struct xdr_buf *buf, struct page **pages, struct rpc_rqst *rqst); extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes); -extern int xdr_reserve_space_vec(struct xdr_stream *xdr, struct kvec *vec, - size_t nbytes); +extern int xdr_reserve_space_vec(struct xdr_stream *xdr, size_t nbytes); extern void __xdr_commit_encode(struct xdr_stream *xdr); extern void xdr_truncate_encode(struct xdr_stream *xdr, size_t len); extern void xdr_truncate_decode(struct xdr_stream *xdr, size_t len); diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 36835b2f5446..2a22e78af116 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -1070,22 +1070,22 @@ __be32 * xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes) } EXPORT_SYMBOL_GPL(xdr_reserve_space); - /** * xdr_reserve_space_vec - Reserves a large amount of buffer space for sending * @xdr: pointer to xdr_stream - * @vec: pointer to a kvec array * @nbytes: number of bytes to reserve * - * Reserves enough buffer space to encode 'nbytes' of data and stores the - * pointers in 'vec'. The size argument passed to xdr_reserve_space() is - * determined based on the number of bytes remaining in the current page to - * avoid invalidating iov_base pointers when xdr_commit_encode() is called. + * The size argument passed to xdr_reserve_space() is determined based + * on the number of bytes remaining in the current page to avoid + * invalidating iov_base pointers when xdr_commit_encode() is called. + * + * Return values: + * %0: success + * %-EMSGSIZE: not enough space is available in @xdr */ -int xdr_reserve_space_vec(struct xdr_stream *xdr, struct kvec *vec, size_t nbytes) +int xdr_reserve_space_vec(struct xdr_stream *xdr, size_t nbytes) { - int thislen; - int v = 0; + size_t thislen; __be32 *p; /* @@ -1097,21 +1097,19 @@ int xdr_reserve_space_vec(struct xdr_stream *xdr, struct kvec *vec, size_t nbyte xdr->end = xdr->p; } + /* XXX: Let's find a way to make this more efficient */ while (nbytes) { thislen = xdr->buf->page_len % PAGE_SIZE; thislen = min_t(size_t, nbytes, PAGE_SIZE - thislen); p = xdr_reserve_space(xdr, thislen); if (!p) - return -EIO; + return -EMSGSIZE; - vec[v].iov_base = p; - vec[v].iov_len = thislen; - v++; nbytes -= thislen; } - return v; + return 0; } EXPORT_SYMBOL_GPL(xdr_reserve_space_vec); -- cgit v1.2.3 From df56b384de521236c261bf34856dd0d3ba772850 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 18 May 2023 13:46:09 -0400 Subject: NFSD: Remove nfsd_readv() nfsd_readv()'s consumers now use nfsd_iter_read(). Signed-off-by: Chuck Lever --- fs/nfsd/vfs.c | 15 --------------- fs/nfsd/vfs.h | 5 ----- 2 files changed, 20 deletions(-) diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 37febe1ff039..59b7d60ae33e 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1032,21 +1032,6 @@ __be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp, return nfsd_finish_read(rqstp, fhp, file, offset, count, eof, host_err); } -__be32 nfsd_readv(struct svc_rqst *rqstp, struct svc_fh *fhp, - struct file *file, loff_t offset, - struct kvec *vec, int vlen, unsigned long *count, - u32 *eof) -{ - struct iov_iter iter; - loff_t ppos = offset; - ssize_t host_err; - - trace_nfsd_read_vector(rqstp, fhp, offset, *count); - iov_iter_kvec(&iter, ITER_DEST, vec, vlen, *count); - host_err = vfs_iter_read(file, &iter, &ppos, 0); - return nfsd_finish_read(rqstp, fhp, file, offset, count, eof, host_err); -} - /** * nfsd_iter_read - Perform a VFS read using an iterator * @rqstp: RPC transaction context diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index 6381a2890b0b..a6890ea7b765 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -110,11 +110,6 @@ __be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, loff_t offset, unsigned long *count, u32 *eof); -__be32 nfsd_readv(struct svc_rqst *rqstp, struct svc_fh *fhp, - struct file *file, loff_t offset, - struct kvec *vec, int vlen, - unsigned long *count, - u32 *eof); __be32 nfsd_iter_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, loff_t offset, unsigned long *count, unsigned int base, -- cgit v1.2.3 From 518f375c15af724cd89a4ec888dea942bb27f77f Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 19 May 2023 07:17:23 -0400 Subject: nfsd: don't provide pre/post-op attrs if fh_getattr fails nfsd calls fh_getattr to get the latest inode attrs for pre/post-op info. In the event that fh_getattr fails, it resorts to scraping cached values out of the inode directly. Since these attributes are optional, we can just skip providing them altogether when this happens. Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever Reviewed-by: Neil Brown --- fs/nfsd/nfsfh.c | 26 +++++++------------------- 1 file changed, 7 insertions(+), 19 deletions(-) diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index ccd8485fee04..e8e13ae72e3c 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -623,16 +623,9 @@ void fh_fill_pre_attrs(struct svc_fh *fhp) inode = d_inode(fhp->fh_dentry); err = fh_getattr(fhp, &stat); - if (err) { - /* Grab the times from inode anyway */ - stat.mtime = inode->i_mtime; - stat.ctime = inode->i_ctime; - stat.size = inode->i_size; - if (v4 && IS_I_VERSION(inode)) { - stat.change_cookie = inode_query_iversion(inode); - stat.result_mask |= STATX_CHANGE_COOKIE; - } - } + if (err) + return; + if (v4) fhp->fh_pre_change = nfsd4_change_attribute(&stat, inode); @@ -660,15 +653,10 @@ void fh_fill_post_attrs(struct svc_fh *fhp) printk("nfsd: inode locked twice during operation.\n"); err = fh_getattr(fhp, &fhp->fh_post_attr); - if (err) { - fhp->fh_post_saved = false; - fhp->fh_post_attr.ctime = inode->i_ctime; - if (v4 && IS_I_VERSION(inode)) { - fhp->fh_post_attr.change_cookie = inode_query_iversion(inode); - fhp->fh_post_attr.result_mask |= STATX_CHANGE_COOKIE; - } - } else - fhp->fh_post_saved = true; + if (err) + return; + + fhp->fh_post_saved = true; if (v4) fhp->fh_post_change = nfsd4_change_attribute(&fhp->fh_post_attr, inode); -- cgit v1.2.3 From 665e89ab7c5af1f2d260834c861a74b01a30f95f Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Sat, 3 Jun 2023 07:14:14 +1000 Subject: lockd: drop inappropriate svc_get() from locked_get() The below-mentioned patch was intended to simplify refcounting on the svc_serv used by locked. The goal was to only ever have a single reference from the single thread. To that end we dropped a call to lockd_start_svc() (except when creating thread) which would take a reference, and dropped the svc_put(serv) that would drop that reference. Unfortunately we didn't also remove the svc_get() from lockd_create_svc() in the case where the svc_serv already existed. So after the patch: - on the first call the svc_serv was allocated and the one reference was given to the thread, so there are no extra references - on subsequent calls svc_get() was called so there is now an extra reference. This is clearly not consistent. The inconsistency is also clear in the current code in lockd_get() takes *two* references, one on nlmsvc_serv and one by incrementing nlmsvc_users. This clearly does not match lockd_put(). So: drop that svc_get() from lockd_get() (which used to be in lockd_create_svc(). Reported-by: Ido Schimmel Closes: https://lore.kernel.org/linux-nfs/ZHsI%2FH16VX9kJQX1@shredder/T/#u Fixes: b73a2972041b ("lockd: move lockd_start_svc() call into lockd_create_svc()") Signed-off-by: NeilBrown Tested-by: Ido Schimmel Signed-off-by: Chuck Lever --- fs/lockd/svc.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 04ba95b83d16..22d3ff3818f5 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -355,7 +355,6 @@ static int lockd_get(void) int error; if (nlmsvc_serv) { - svc_get(nlmsvc_serv); nlmsvc_users++; return 0; } -- cgit v1.2.3 From fe2b401e55482cf90a0056209c8a232b2d39056c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 5 Jun 2023 09:11:24 -0400 Subject: svcrdma: Allocate new transports on device's NUMA node The physical device's NUMA node ID is available when allocating an svc_xprt for an incoming connection. Use that value to ensure the svc_xprt structure is allocated on the NUMA node closest to the device. Signed-off-by: Chuck Lever --- net/sunrpc/xprtrdma/svc_rdma_transport.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index ca04f7a6a085..2abd895046ee 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -64,7 +64,7 @@ #define RPCDBG_FACILITY RPCDBG_SVCXPRT static struct svcxprt_rdma *svc_rdma_create_xprt(struct svc_serv *serv, - struct net *net); + struct net *net, int node); static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, struct net *net, struct sockaddr *sa, int salen, @@ -123,14 +123,14 @@ static void qp_event_handler(struct ib_event *event, void *context) } static struct svcxprt_rdma *svc_rdma_create_xprt(struct svc_serv *serv, - struct net *net) + struct net *net, int node) { - struct svcxprt_rdma *cma_xprt = kzalloc(sizeof *cma_xprt, GFP_KERNEL); + struct svcxprt_rdma *cma_xprt; - if (!cma_xprt) { - dprintk("svcrdma: failed to create new transport\n"); + cma_xprt = kzalloc_node(sizeof(*cma_xprt), GFP_KERNEL, node); + if (!cma_xprt) return NULL; - } + svc_xprt_init(net, &svc_rdma_class, &cma_xprt->sc_xprt, serv); INIT_LIST_HEAD(&cma_xprt->sc_accept_q); INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q); @@ -193,9 +193,9 @@ static void handle_connect_req(struct rdma_cm_id *new_cma_id, struct svcxprt_rdma *newxprt; struct sockaddr *sa; - /* Create a new transport */ newxprt = svc_rdma_create_xprt(listen_xprt->sc_xprt.xpt_server, - listen_xprt->sc_xprt.xpt_net); + listen_xprt->sc_xprt.xpt_net, + ibdev_to_node(new_cma_id->device)); if (!newxprt) return; newxprt->sc_cm_id = new_cma_id; @@ -304,7 +304,7 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, if (sa->sa_family != AF_INET && sa->sa_family != AF_INET6) return ERR_PTR(-EAFNOSUPPORT); - cma_xprt = svc_rdma_create_xprt(serv, net); + cma_xprt = svc_rdma_create_xprt(serv, net, NUMA_NO_NODE); if (!cma_xprt) return ERR_PTR(-ENOMEM); set_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags); -- cgit v1.2.3 From c5d68d25bd6b5798bf0eb96661e1b26748e970d7 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 5 Jun 2023 09:11:30 -0400 Subject: svcrdma: Clean up allocation of svc_rdma_recv_ctxt The physical device's favored NUMA node ID is available when allocating a recv_ctxt. Use that value instead of relying on the assumption that the memory allocation happens to be running on a node close to the device. This clean up eliminates the hack of destroying recv_ctxts that were not created by the receive CQ thread -- recv_ctxts are now always allocated on a "good" node. Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc_rdma.h | 1 - net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 18 +++++++----------- 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index fbc4bd423b35..a0f3ea357977 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -135,7 +135,6 @@ struct svc_rdma_recv_ctxt { struct ib_sge rc_recv_sge; void *rc_recv_buf; struct xdr_stream rc_stream; - bool rc_temp; u32 rc_byte_len; unsigned int rc_page_count; u32 rc_inv_rkey; diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index a22fe7587fa6..46a719ba4917 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -125,14 +125,15 @@ static void svc_rdma_recv_cid_init(struct svcxprt_rdma *rdma, static struct svc_rdma_recv_ctxt * svc_rdma_recv_ctxt_alloc(struct svcxprt_rdma *rdma) { + int node = ibdev_to_node(rdma->sc_cm_id->device); struct svc_rdma_recv_ctxt *ctxt; dma_addr_t addr; void *buffer; - ctxt = kmalloc(sizeof(*ctxt), GFP_KERNEL); + ctxt = kmalloc_node(sizeof(*ctxt), GFP_KERNEL, node); if (!ctxt) goto fail0; - buffer = kmalloc(rdma->sc_max_req_size, GFP_KERNEL); + buffer = kmalloc_node(rdma->sc_max_req_size, GFP_KERNEL, node); if (!buffer) goto fail1; addr = ib_dma_map_single(rdma->sc_pd->device, buffer, @@ -155,7 +156,6 @@ svc_rdma_recv_ctxt_alloc(struct svcxprt_rdma *rdma) ctxt->rc_recv_sge.length = rdma->sc_max_req_size; ctxt->rc_recv_sge.lkey = rdma->sc_pd->local_dma_lkey; ctxt->rc_recv_buf = buffer; - ctxt->rc_temp = false; return ctxt; fail2: @@ -232,10 +232,7 @@ void svc_rdma_recv_ctxt_put(struct svcxprt_rdma *rdma, pcl_free(&ctxt->rc_write_pcl); pcl_free(&ctxt->rc_reply_pcl); - if (!ctxt->rc_temp) - llist_add(&ctxt->rc_node, &rdma->sc_recv_ctxts); - else - svc_rdma_recv_ctxt_destroy(rdma, ctxt); + llist_add(&ctxt->rc_node, &rdma->sc_recv_ctxts); } /** @@ -258,7 +255,7 @@ void svc_rdma_release_ctxt(struct svc_xprt *xprt, void *vctxt) } static bool svc_rdma_refresh_recvs(struct svcxprt_rdma *rdma, - unsigned int wanted, bool temp) + unsigned int wanted) { const struct ib_recv_wr *bad_wr = NULL; struct svc_rdma_recv_ctxt *ctxt; @@ -275,7 +272,6 @@ static bool svc_rdma_refresh_recvs(struct svcxprt_rdma *rdma, break; trace_svcrdma_post_recv(ctxt); - ctxt->rc_temp = temp; ctxt->rc_recv_wr.next = recv_chain; recv_chain = &ctxt->rc_recv_wr; rdma->sc_pending_recvs++; @@ -309,7 +305,7 @@ err_free: */ bool svc_rdma_post_recvs(struct svcxprt_rdma *rdma) { - return svc_rdma_refresh_recvs(rdma, rdma->sc_max_requests, true); + return svc_rdma_refresh_recvs(rdma, rdma->sc_max_requests); } /** @@ -343,7 +339,7 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc) * client reconnects. */ if (rdma->sc_pending_recvs < rdma->sc_max_requests) - if (!svc_rdma_refresh_recvs(rdma, rdma->sc_recv_batch, false)) + if (!svc_rdma_refresh_recvs(rdma, rdma->sc_recv_batch)) goto dropped; /* All wc fields are now known to be valid */ -- cgit v1.2.3 From ed51b426101427c90c14696f073ef8fe71a806f9 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 5 Jun 2023 09:11:37 -0400 Subject: svcrdma: Clean up allocation of svc_rdma_send_ctxt The physical device's favored NUMA node ID is available when allocating a send_ctxt. Use that value instead of relying on the assumption that the memory allocation happens to be running on a node close to the device. Signed-off-by: Chuck Lever --- net/sunrpc/xprtrdma/svc_rdma_sendto.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index 22a871e6fe4d..a35d1e055b1a 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -123,18 +123,17 @@ static void svc_rdma_send_cid_init(struct svcxprt_rdma *rdma, static struct svc_rdma_send_ctxt * svc_rdma_send_ctxt_alloc(struct svcxprt_rdma *rdma) { + int node = ibdev_to_node(rdma->sc_cm_id->device); struct svc_rdma_send_ctxt *ctxt; dma_addr_t addr; void *buffer; - size_t size; int i; - size = sizeof(*ctxt); - size += rdma->sc_max_send_sges * sizeof(struct ib_sge); - ctxt = kmalloc(size, GFP_KERNEL); + ctxt = kmalloc_node(struct_size(ctxt, sc_sges, rdma->sc_max_send_sges), + GFP_KERNEL, node); if (!ctxt) goto fail0; - buffer = kmalloc(rdma->sc_max_req_size, GFP_KERNEL); + buffer = kmalloc_node(rdma->sc_max_req_size, GFP_KERNEL, node); if (!buffer) goto fail1; addr = ib_dma_map_single(rdma->sc_pd->device, buffer, -- cgit v1.2.3 From ac3c32bbdb0e3f787333ae67d650868b6ef5c9a7 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 5 Jun 2023 09:11:43 -0400 Subject: svcrdma: Clean up allocation of svc_rdma_rw_ctxt The physical device's favored NUMA node ID is available when allocating a rw_ctxt. Use that value instead of relying on the assumption that the memory allocation happens to be running on a node close to the device. Signed-off-by: Chuck Lever --- net/sunrpc/xprtrdma/svc_rdma_rw.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c index 11cf7c646644..068c365e7812 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_rw.c +++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c @@ -62,8 +62,8 @@ svc_rdma_get_rw_ctxt(struct svcxprt_rdma *rdma, unsigned int sges) if (node) { ctxt = llist_entry(node, struct svc_rdma_rw_ctxt, rw_node); } else { - ctxt = kmalloc(struct_size(ctxt, rw_first_sgl, SG_CHUNK_SIZE), - GFP_KERNEL); + ctxt = kmalloc_node(struct_size(ctxt, rw_first_sgl, SG_CHUNK_SIZE), + GFP_KERNEL, ibdev_to_node(rdma->sc_cm_id->device)); if (!ctxt) goto out_noctx; @@ -234,7 +234,8 @@ svc_rdma_write_info_alloc(struct svcxprt_rdma *rdma, { struct svc_rdma_write_info *info; - info = kmalloc(sizeof(*info), GFP_KERNEL); + info = kmalloc_node(sizeof(*info), GFP_KERNEL, + ibdev_to_node(rdma->sc_cm_id->device)); if (!info) return info; @@ -304,7 +305,8 @@ svc_rdma_read_info_alloc(struct svcxprt_rdma *rdma) { struct svc_rdma_read_info *info; - info = kmalloc(sizeof(*info), GFP_KERNEL); + info = kmalloc_node(sizeof(*info), GFP_KERNEL, + ibdev_to_node(rdma->sc_cm_id->device)); if (!info) return info; -- cgit v1.2.3 From b1c6ffb267285e20bfe53b7180e4ce785cab2a97 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 5 Jun 2023 09:12:38 -0400 Subject: mailmap: Add Bruce Fields' latest e-mail addresses Ensure that Bruce's old e-mail addresses map to his current one so he doesn't miss out on all the fun. Signed-off-by: Chuck Lever --- .mailmap | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.mailmap b/.mailmap index bf076bbc36b1..0f2458442ea2 100644 --- a/.mailmap +++ b/.mailmap @@ -181,6 +181,8 @@ Henrik Rydberg Herbert Xu Huacai Chen Huacai Chen +J. Bruce Fields +J. Bruce Fields Jacob Shin Jaegeuk Kim Jaegeuk Kim -- cgit v1.2.3 From 8111c17cfcb322d13622eb3fb1a5de5b9e6b163b Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 5 Jun 2023 09:12:45 -0400 Subject: NFSD: Add "official" reviewers for this subsystem At LFS 2023, it was suggested we should publicly document the name and email of reviewers who new contributors can trust. This also gives them some recognition for their work as reviewers. Acked-by: Tom Talpey Signed-off-by: Chuck Lever --- MAINTAINERS | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 0dab9737ec16..4c85519d95b2 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11264,6 +11264,10 @@ W: http://kernelnewbies.org/KernelJanitors KERNEL NFSD, SUNRPC, AND LOCKD SERVERS M: Chuck Lever M: Jeff Layton +R: Neil Brown +R: Olga Kornievskaia +R: Dai Ngo +R: Tom Talpey L: linux-nfs@vger.kernel.org S: Supported W: http://nfs.sourceforge.net/ -- cgit v1.2.3 From 58f5d894006d82ed7335e1c37182fbc5f08c2f51 Mon Sep 17 00:00:00 2001 From: Dai Ngo Date: Tue, 6 Jun 2023 16:41:02 -0700 Subject: NFSD: add encoding of op_recall flag for write delegation Modified nfsd4_encode_open to encode the op_recall flag properly for OPEN result with write delegation granted. Signed-off-by: Dai Ngo Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever Cc: stable@vger.kernel.org --- fs/nfsd/nfs4xdr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index c67bc904c7b7..a615fe665381 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3970,7 +3970,7 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, p = xdr_reserve_space(xdr, 32); if (!p) return nfserr_resource; - *p++ = cpu_to_be32(0); + *p++ = cpu_to_be32(open->op_recall); /* * TODO: space_limit's in delegations -- cgit v1.2.3 From 6be7afcd92cd426428b94dd7e258b2472a45cde7 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 12 Jun 2023 10:10:01 -0400 Subject: SUNRPC: Revert cc93ce9529a6 ("svcrdma: Retain the page backing rq_res.head[0].iov_base") Pre-requisite for releasing pages in the send completion handler. Reverted by hand: patch -R would not apply cleanly. Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- net/sunrpc/xprtrdma/svc_rdma_sendto.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index a35d1e055b1a..8e7ccef74207 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -975,11 +975,6 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) ret = svc_rdma_send_reply_msg(rdma, sctxt, rctxt, rqstp); if (ret < 0) goto put_ctxt; - - /* Prevent svc_xprt_release() from releasing the page backing - * rq_res.head[0].iov_base. It's no longer being accessed by - * the I/O device. */ - rqstp->rq_respages++; return 0; reply_chunk: -- cgit v1.2.3 From a944209c11aff9a5c9b7987fc958cc2344dca51f Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 12 Jun 2023 10:10:07 -0400 Subject: SUNRPC: Revert 579900670ac7 ("svcrdma: Remove unused sc_pages field") Pre-requisite for releasing pages in the send completion handler. Reverted by hand: patch -R would not apply cleanly. Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc_rdma.h | 3 ++- net/sunrpc/xprtrdma/svc_rdma_sendto.c | 25 +++++++++++++++++++++++++ 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index a0f3ea357977..8e654da55170 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -158,8 +158,9 @@ struct svc_rdma_send_ctxt { struct xdr_buf sc_hdrbuf; struct xdr_stream sc_stream; void *sc_xprt_buf; + int sc_page_count; int sc_cur_sge_no; - + struct page *sc_pages[RPCSVC_MAXPAGES]; struct ib_sge sc_sges[]; }; diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index 8e7ccef74207..4c62bc41ea40 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -213,6 +213,7 @@ out: ctxt->sc_send_wr.num_sge = 0; ctxt->sc_cur_sge_no = 0; + ctxt->sc_page_count = 0; return ctxt; out_empty: @@ -227,6 +228,8 @@ out_empty: * svc_rdma_send_ctxt_put - Return send_ctxt to free list * @rdma: controlling svcxprt_rdma * @ctxt: object to return to the free list + * + * Pages left in sc_pages are DMA unmapped and released. */ void svc_rdma_send_ctxt_put(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *ctxt) @@ -234,6 +237,9 @@ void svc_rdma_send_ctxt_put(struct svcxprt_rdma *rdma, struct ib_device *device = rdma->sc_cm_id->device; unsigned int i; + for (i = 0; i < ctxt->sc_page_count; ++i) + put_page(ctxt->sc_pages[i]); + /* The first SGE contains the transport header, which * remains mapped until @ctxt is destroyed. */ @@ -798,6 +804,25 @@ int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma, svc_rdma_xb_dma_map, &args); } +/* The svc_rqst and all resources it owns are released as soon as + * svc_rdma_sendto returns. Transfer pages under I/O to the ctxt + * so they are released by the Send completion handler. + */ +static inline void svc_rdma_save_io_pages(struct svc_rqst *rqstp, + struct svc_rdma_send_ctxt *ctxt) +{ + int i, pages = rqstp->rq_next_page - rqstp->rq_respages; + + ctxt->sc_page_count += pages; + for (i = 0; i < pages; i++) { + ctxt->sc_pages[i] = rqstp->rq_respages[i]; + rqstp->rq_respages[i] = NULL; + } + + /* Prevent svc_xprt_release from releasing pages in rq_pages */ + rqstp->rq_next_page = rqstp->rq_respages; +} + /* Prepare the portion of the RPC Reply that will be transmitted * via RDMA Send. The RPC-over-RDMA transport header is prepared * in sc_sges[0], and the RPC xdr_buf is prepared in following sges. -- cgit v1.2.3 From c4b50cdf9d9d7962d58ece5efba865f56ec40398 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 12 Jun 2023 10:10:14 -0400 Subject: svcrdma: Revert 2a1e4f21d841 ("svcrdma: Normalize Send page handling") Get rid of the completion wait in svc_rdma_sendto(), and release pages in the send completion handler again. A subsequent patch will handle releasing those pages more efficiently. Reverted by hand: patch -R would not apply cleanly. Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc_rdma.h | 1 - net/sunrpc/xprtrdma/svc_rdma_backchannel.c | 8 +------- net/sunrpc/xprtrdma/svc_rdma_sendto.c | 27 ++++++++++++--------------- 3 files changed, 13 insertions(+), 23 deletions(-) diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 8e654da55170..a5ee0af2a310 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -154,7 +154,6 @@ struct svc_rdma_send_ctxt { struct ib_send_wr sc_send_wr; struct ib_cqe sc_cqe; - struct completion sc_done; struct xdr_buf sc_hdrbuf; struct xdr_stream sc_stream; void *sc_xprt_buf; diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c index aa2227a7e552..7420a2c990c7 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c +++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c @@ -93,13 +93,7 @@ static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma, */ get_page(virt_to_page(rqst->rq_buffer)); sctxt->sc_send_wr.opcode = IB_WR_SEND; - ret = svc_rdma_send(rdma, sctxt); - if (ret < 0) - return ret; - - ret = wait_for_completion_killable(&sctxt->sc_done); - svc_rdma_send_ctxt_put(rdma, sctxt); - return ret; + return svc_rdma_send(rdma, sctxt); } /* Server-side transport endpoint wants a whole page for its send diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index 4c62bc41ea40..1ae4236d04a3 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -147,7 +147,6 @@ svc_rdma_send_ctxt_alloc(struct svcxprt_rdma *rdma) ctxt->sc_send_wr.wr_cqe = &ctxt->sc_cqe; ctxt->sc_send_wr.sg_list = ctxt->sc_sges; ctxt->sc_send_wr.send_flags = IB_SEND_SIGNALED; - init_completion(&ctxt->sc_done); ctxt->sc_cqe.done = svc_rdma_wc_send; ctxt->sc_xprt_buf = buffer; xdr_buf_init(&ctxt->sc_hdrbuf, ctxt->sc_xprt_buf, @@ -286,12 +285,12 @@ static void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc) container_of(cqe, struct svc_rdma_send_ctxt, sc_cqe); svc_rdma_wake_send_waiters(rdma, 1); - complete(&ctxt->sc_done); if (unlikely(wc->status != IB_WC_SUCCESS)) goto flushed; trace_svcrdma_wc_send(wc, &ctxt->sc_cid); + svc_rdma_send_ctxt_put(rdma, ctxt); return; flushed: @@ -299,6 +298,7 @@ flushed: trace_svcrdma_wc_send_err(wc, &ctxt->sc_cid); else trace_svcrdma_wc_send_flush(wc, &ctxt->sc_cid); + svc_rdma_send_ctxt_put(rdma, ctxt); svc_xprt_deferred_close(&rdma->sc_xprt); } @@ -315,8 +315,6 @@ int svc_rdma_send(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *ctxt) struct ib_send_wr *wr = &ctxt->sc_send_wr; int ret; - reinit_completion(&ctxt->sc_done); - /* Sync the transport header buffer */ ib_dma_sync_single_for_device(rdma->sc_pd->device, wr->sg_list[0].addr, @@ -808,8 +806,8 @@ int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma, * svc_rdma_sendto returns. Transfer pages under I/O to the ctxt * so they are released by the Send completion handler. */ -static inline void svc_rdma_save_io_pages(struct svc_rqst *rqstp, - struct svc_rdma_send_ctxt *ctxt) +static void svc_rdma_save_io_pages(struct svc_rqst *rqstp, + struct svc_rdma_send_ctxt *ctxt) { int i, pages = rqstp->rq_next_page - rqstp->rq_respages; @@ -852,6 +850,8 @@ static int svc_rdma_send_reply_msg(struct svcxprt_rdma *rdma, if (ret < 0) return ret; + svc_rdma_save_io_pages(rqstp, sctxt); + if (rctxt->rc_inv_rkey) { sctxt->sc_send_wr.opcode = IB_WR_SEND_WITH_INV; sctxt->sc_send_wr.ex.invalidate_rkey = rctxt->rc_inv_rkey; @@ -859,13 +859,7 @@ static int svc_rdma_send_reply_msg(struct svcxprt_rdma *rdma, sctxt->sc_send_wr.opcode = IB_WR_SEND; } - ret = svc_rdma_send(rdma, sctxt); - if (ret < 0) - return ret; - - ret = wait_for_completion_killable(&sctxt->sc_done); - svc_rdma_send_ctxt_put(rdma, sctxt); - return ret; + return svc_rdma_send(rdma, sctxt); } /** @@ -931,8 +925,7 @@ void svc_rdma_send_error_msg(struct svcxprt_rdma *rdma, sctxt->sc_sges[0].length = sctxt->sc_hdrbuf.len; if (svc_rdma_send(rdma, sctxt)) goto put_ctxt; - - wait_for_completion_killable(&sctxt->sc_done); + return; put_ctxt: svc_rdma_send_ctxt_put(rdma, sctxt); @@ -1006,6 +999,10 @@ reply_chunk: if (ret != -E2BIG && ret != -EINVAL) goto put_ctxt; + /* Send completion releases payload pages that were part + * of previously posted RDMA Writes. + */ + svc_rdma_save_io_pages(rqstp, sctxt); svc_rdma_send_error_msg(rdma, sctxt, rctxt, ret); return 0; -- cgit v1.2.3 From baf6d18b116b7dc84ed5e212c3a89f17cdc3f28c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 12 Jun 2023 10:10:20 -0400 Subject: svcrdma: Prevent page release when nothing was received I noticed that svc_rqst_release_pages() was still unnecessarily releasing a page when svc_rdma_recvfrom() returns zero. Fixes: a53d5cb0646a ("svcrdma: Avoid releasing a page in svc_xprt_release()") Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index 46a719ba4917..16b673a7d280 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -792,6 +792,12 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) struct svc_rdma_recv_ctxt *ctxt; int ret; + /* Prevent svc_xprt_release() from releasing pages in rq_pages + * when returning 0 or an error. + */ + rqstp->rq_respages = rqstp->rq_pages; + rqstp->rq_next_page = rqstp->rq_respages; + rqstp->rq_xprt_ctxt = NULL; ctxt = NULL; @@ -815,12 +821,6 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) DMA_FROM_DEVICE); svc_rdma_build_arg_xdr(rqstp, ctxt); - /* Prevent svc_xprt_release from releasing pages in rq_pages - * if we return 0 or an error. - */ - rqstp->rq_respages = rqstp->rq_pages; - rqstp->rq_next_page = rqstp->rq_respages; - ret = svc_rdma_xdr_decode_req(&rqstp->rq_arg, ctxt); if (ret < 0) goto out_err; -- cgit v1.2.3 From 5581cf8efc3863e3831a3ee50854e823ec618df8 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 12 Jun 2023 10:10:27 -0400 Subject: SUNRPC: Optimize page release in svc_rdma_sendto() Now that we have bulk page allocation and release APIs, it's more efficient to use those than it is for nfsd threads to wait for send completions. Previous patches have eliminated the calls to wait_for_completion() and complete(), in order to avoid scheduler overhead. Now release pages-under-I/O in the send completion handler using the efficient bulk release API. I've measured a 7% reduction in cumulative CPU utilization in svc_rdma_sendto(), svc_rdma_wc_send(), and svc_xprt_release(). In particular, using release_pages() instead of complete() cuts the time per svc_rdma_wc_send() call by two-thirds. This helps improve scalability because svc_rdma_wc_send() is single-threaded per connection. Reviewed-by: Tom Talpey Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- net/sunrpc/xprtrdma/svc_rdma_sendto.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index 1ae4236d04a3..24228f3611e8 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -236,8 +236,8 @@ void svc_rdma_send_ctxt_put(struct svcxprt_rdma *rdma, struct ib_device *device = rdma->sc_cm_id->device; unsigned int i; - for (i = 0; i < ctxt->sc_page_count; ++i) - put_page(ctxt->sc_pages[i]); + if (ctxt->sc_page_count) + release_pages(ctxt->sc_pages, ctxt->sc_page_count); /* The first SGE contains the transport header, which * remains mapped until @ctxt is destroyed. -- cgit v1.2.3 From f8335a212ac1495da3e9d98ff46c4bcd6a359011 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 12 Jun 2023 10:13:33 -0400 Subject: SUNRPC: Move initialization of rq_stime Micro-optimization: Call ktime_get() only when ->xpo_recvfrom() has given us a full RPC message to process. rq_stime isn't used otherwise, so this avoids pointless work. Reviewed-by: Jeff Layton Acked-by: Tom Talpey Signed-off-by: Chuck Lever --- net/sunrpc/svc_xprt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 9ca3393a197e..79b88889f073 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -849,7 +849,6 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt) len = svc_deferred_recv(rqstp); else len = xprt->xpt_ops->xpo_recvfrom(rqstp); - rqstp->rq_stime = ktime_get(); rqstp->rq_reserved = serv->sv_max_mesg; atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved); } else @@ -892,6 +891,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) err = -EAGAIN; if (len <= 0) goto out_release; + trace_svc_xdr_recvfrom(&rqstp->rq_arg); clear_bit(XPT_OLD, &xprt->xpt_flags); @@ -900,6 +900,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) if (serv->sv_stats) serv->sv_stats->netcnt++; + rqstp->rq_stime = ktime_get(); return len; out_release: rqstp->rq_res.len = 0; -- cgit v1.2.3 From 262176798b18b12fd8ab84c94cfece0a6a652476 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 12 Jun 2023 10:13:39 -0400 Subject: NFSD: Add an nfsd4_encode_nfstime4() helper Clean up: de-duplicate some common code. Reviewed-by: Jeff Layton Acked-by: Tom Talpey Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 46 ++++++++++++++++++++++++++-------------------- 1 file changed, 26 insertions(+), 20 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index a615fe665381..26b1343c8035 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2541,6 +2541,20 @@ static __be32 *encode_change(__be32 *p, struct kstat *stat, struct inode *inode, return p; } +static __be32 nfsd4_encode_nfstime4(struct xdr_stream *xdr, + struct timespec64 *tv) +{ + __be32 *p; + + p = xdr_reserve_space(xdr, XDR_UNIT * 3); + if (!p) + return nfserr_resource; + + p = xdr_encode_hyper(p, (s64)tv->tv_sec); + *p = cpu_to_be32(tv->tv_nsec); + return nfs_ok; +} + /* * ctime (in NFSv4, time_metadata) is not writeable, and the client * doesn't really care what resolution could theoretically be stored by @@ -3352,11 +3366,9 @@ out_acl: p = xdr_encode_hyper(p, dummy64); } if (bmval1 & FATTR4_WORD1_TIME_ACCESS) { - p = xdr_reserve_space(xdr, 12); - if (!p) - goto out_resource; - p = xdr_encode_hyper(p, (s64)stat.atime.tv_sec); - *p++ = cpu_to_be32(stat.atime.tv_nsec); + status = nfsd4_encode_nfstime4(xdr, &stat.atime); + if (status) + goto out; } if (bmval1 & FATTR4_WORD1_TIME_DELTA) { p = xdr_reserve_space(xdr, 12); @@ -3365,25 +3377,19 @@ out_acl: p = encode_time_delta(p, d_inode(dentry)); } if (bmval1 & FATTR4_WORD1_TIME_METADATA) { - p = xdr_reserve_space(xdr, 12); - if (!p) - goto out_resource; - p = xdr_encode_hyper(p, (s64)stat.ctime.tv_sec); - *p++ = cpu_to_be32(stat.ctime.tv_nsec); + status = nfsd4_encode_nfstime4(xdr, &stat.ctime); + if (status) + goto out; } if (bmval1 & FATTR4_WORD1_TIME_MODIFY) { - p = xdr_reserve_space(xdr, 12); - if (!p) - goto out_resource; - p = xdr_encode_hyper(p, (s64)stat.mtime.tv_sec); - *p++ = cpu_to_be32(stat.mtime.tv_nsec); + status = nfsd4_encode_nfstime4(xdr, &stat.mtime); + if (status) + goto out; } if (bmval1 & FATTR4_WORD1_TIME_CREATE) { - p = xdr_reserve_space(xdr, 12); - if (!p) - goto out_resource; - p = xdr_encode_hyper(p, (s64)stat.btime.tv_sec); - *p++ = cpu_to_be32(stat.btime.tv_nsec); + status = nfsd4_encode_nfstime4(xdr, &stat.btime); + if (status) + goto out; } if (bmval1 & FATTR4_WORD1_MOUNTED_ON_FILEID) { u64 ino = stat.ino; -- cgit v1.2.3 From 91f8ce28466e480fd9ef27e028ac89dbfd264c24 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 12 Jun 2023 10:13:45 -0400 Subject: svcrdma: Convert "might sleep" comment into a code annotation Try to catch incorrect calling contexts mechanically rather than by code review. Reviewed-by: Jeff Layton Acked-by: Tom Talpey Signed-off-by: Chuck Lever --- net/sunrpc/xprtrdma/svc_rdma_rw.c | 5 +++-- net/sunrpc/xprtrdma/svc_rdma_sendto.c | 2 ++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c index 068c365e7812..59ea87b5f458 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_rw.c +++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c @@ -353,8 +353,7 @@ static void svc_rdma_wc_read_done(struct ib_cq *cq, struct ib_wc *wc) return; } -/* This function sleeps when the transport's Send Queue is congested. - * +/* * Assumptions: * - If ib_post_send() succeeds, only one completion is expected, * even if one or more WRs are flushed. This is true when posting @@ -369,6 +368,8 @@ static int svc_rdma_post_chunk_ctxt(struct svc_rdma_chunk_ctxt *cc) struct ib_cqe *cqe; int ret; + might_sleep(); + if (cc->cc_sqecount > rdma->sc_sq_depth) return -EINVAL; diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index 24228f3611e8..c6644cca52c5 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -315,6 +315,8 @@ int svc_rdma_send(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *ctxt) struct ib_send_wr *wr = &ctxt->sc_send_wr; int ret; + might_sleep(); + /* Sync the transport header buffer */ ib_dma_sync_single_for_device(rdma->sc_pd->device, wr->sg_list[0].addr, -- cgit v1.2.3 From a23c76e92d8215456ca2fbd5cf6c1ff71b744c1d Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 12 Jun 2023 10:13:51 -0400 Subject: svcrdma: trace cc_release calls This event brackets the svcrdma_post_* trace points. If this trace event is enabled but does not appear as expected, that indicates a chunk_ctxt leak. Reviewed-by: Jeff Layton Acked-by: Tom Talpey Signed-off-by: Chuck Lever --- include/trace/events/rpcrdma.h | 8 ++++++++ net/sunrpc/xprtrdma/svc_rdma_rw.c | 2 ++ 2 files changed, 10 insertions(+) diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index 8f461e04e5f0..f8069ef2ee0f 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -2112,6 +2112,14 @@ DEFINE_POST_CHUNK_EVENT(read); DEFINE_POST_CHUNK_EVENT(write); DEFINE_POST_CHUNK_EVENT(reply); +DEFINE_EVENT(svcrdma_post_chunk_class, svcrdma_cc_release, + TP_PROTO( + const struct rpc_rdma_cid *cid, + int sqecount + ), + TP_ARGS(cid, sqecount) +); + TRACE_EVENT(svcrdma_wc_read, TP_PROTO( const struct ib_wc *wc, diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c index 59ea87b5f458..9836406cc41e 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_rw.c +++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c @@ -191,6 +191,8 @@ static void svc_rdma_cc_release(struct svc_rdma_chunk_ctxt *cc, struct svc_rdma_rw_ctxt *ctxt; LLIST_HEAD(free); + trace_svcrdma_cc_release(&cc->cc_cid, cc->cc_sqecount); + first = last = NULL; while ((ctxt = svc_rdma_next_ctxt(&cc->cc_rwctxts)) != NULL) { list_del(&ctxt->rw_list); -- cgit v1.2.3 From b55c63332e9a33833d3daedb83bc1fbbcfc00e1c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 12 Jun 2023 10:13:58 -0400 Subject: svcrdma: Remove an unused argument from __svc_rdma_put_rw_ctxt() Clean up. Reviewed-by: Jeff Layton Acked-by: Tom Talpey Signed-off-by: Chuck Lever --- net/sunrpc/xprtrdma/svc_rdma_rw.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c index 9836406cc41e..e460e25a1d6d 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_rw.c +++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c @@ -84,8 +84,7 @@ out_noctx: return NULL; } -static void __svc_rdma_put_rw_ctxt(struct svcxprt_rdma *rdma, - struct svc_rdma_rw_ctxt *ctxt, +static void __svc_rdma_put_rw_ctxt(struct svc_rdma_rw_ctxt *ctxt, struct llist_head *list) { sg_free_table_chained(&ctxt->rw_sg_table, SG_CHUNK_SIZE); @@ -95,7 +94,7 @@ static void __svc_rdma_put_rw_ctxt(struct svcxprt_rdma *rdma, static void svc_rdma_put_rw_ctxt(struct svcxprt_rdma *rdma, struct svc_rdma_rw_ctxt *ctxt) { - __svc_rdma_put_rw_ctxt(rdma, ctxt, &rdma->sc_rw_ctxts); + __svc_rdma_put_rw_ctxt(ctxt, &rdma->sc_rw_ctxts); } /** @@ -200,7 +199,7 @@ static void svc_rdma_cc_release(struct svc_rdma_chunk_ctxt *cc, rdma_rw_ctx_destroy(&ctxt->rw_ctx, rdma->sc_qp, rdma->sc_port_num, ctxt->rw_sg_table.sgl, ctxt->rw_nents, dir); - __svc_rdma_put_rw_ctxt(rdma, ctxt, &free); + __svc_rdma_put_rw_ctxt(ctxt, &free); ctxt->rw_node.next = first; first = &ctxt->rw_node; -- cgit v1.2.3 From 02cea33f56241819f72edbd8f470a45e720d9ca3 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 12 Jun 2023 10:14:04 -0400 Subject: SUNRPC: Fix comments for transport class registration The preceding block comment before svc_register_xprt_class() is not related to that function. While we're here, add proper documenting comments for these two publicly-visible functions. Reviewed-by: Jeff Layton Acked-by: Tom Talpey Signed-off-by: Chuck Lever --- net/sunrpc/svc_xprt.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 79b88889f073..5a3512bdfdce 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -74,6 +74,13 @@ static LIST_HEAD(svc_xprt_class_list); * that no other thread will be using the transport or will * try to set XPT_DEAD. */ + +/** + * svc_reg_xprt_class - Register a server-side RPC transport class + * @xcl: New transport class to be registered + * + * Returns zero on success; otherwise a negative errno is returned. + */ int svc_reg_xprt_class(struct svc_xprt_class *xcl) { struct svc_xprt_class *cl; @@ -96,6 +103,11 @@ out: } EXPORT_SYMBOL_GPL(svc_reg_xprt_class); +/** + * svc_unreg_xprt_class - Unregister a server-side RPC transport class + * @xcl: Transport class to be unregistered + * + */ void svc_unreg_xprt_class(struct svc_xprt_class *xcl) { dprintk("svc: Removing svc transport class '%s'\n", xcl->xcl_name); -- cgit v1.2.3 From 6c53da5d66b1f0d30b1039e14eed943efe377805 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 12 Jun 2023 10:14:10 -0400 Subject: SUNRPC: Remove transport class dprintk call sites Remove a couple of dprintk call sites that are of little value. Reviewed-by: Jeff Layton Acked-by: Tom Talpey Signed-off-by: Chuck Lever --- net/sunrpc/svc_xprt.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 5a3512bdfdce..62c7919ea610 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -86,8 +86,6 @@ int svc_reg_xprt_class(struct svc_xprt_class *xcl) struct svc_xprt_class *cl; int res = -EEXIST; - dprintk("svc: Adding svc transport class '%s'\n", xcl->xcl_name); - INIT_LIST_HEAD(&xcl->xcl_list); spin_lock(&svc_xprt_class_lock); /* Make sure there isn't already a class with the same name */ @@ -110,7 +108,6 @@ EXPORT_SYMBOL_GPL(svc_reg_xprt_class); */ void svc_unreg_xprt_class(struct svc_xprt_class *xcl) { - dprintk("svc: Removing svc transport class '%s'\n", xcl->xcl_name); spin_lock(&svc_xprt_class_lock); list_del_init(&xcl->xcl_list); spin_unlock(&svc_xprt_class_lock); -- cgit v1.2.3 From a9156d7e7d6a811a0ac1a2bc9ab6006bb928e871 Mon Sep 17 00:00:00 2001 From: Azeem Shaikh Date: Wed, 14 Jun 2023 13:37:57 +0000 Subject: SUNRPC: Use sysfs_emit in place of strlcpy/sprintf Part of an effort to remove strlcpy() tree-wide [1]. Direct replacement is safe here since the getter in kernel_params_ops handles -errno return [2]. [1] https://github.com/KSPP/linux/issues/89 [2] https://elixir.bootlin.com/linux/v6.4-rc6/source/include/linux/moduleparam.h#L52 Signed-off-by: Azeem Shaikh Signed-off-by: Chuck Lever --- net/sunrpc/svc.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index e6d4cec61e47..b011c318fef1 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -109,15 +109,15 @@ param_get_pool_mode(char *buf, const struct kernel_param *kp) switch (*ip) { case SVC_POOL_AUTO: - return strlcpy(buf, "auto\n", 20); + return sysfs_emit(buf, "auto\n"); case SVC_POOL_GLOBAL: - return strlcpy(buf, "global\n", 20); + return sysfs_emit(buf, "global\n"); case SVC_POOL_PERCPU: - return strlcpy(buf, "percpu\n", 20); + return sysfs_emit(buf, "percpu\n"); case SVC_POOL_PERNODE: - return strlcpy(buf, "pernode\n", 20); + return sysfs_emit(buf, "pernode\n"); default: - return sprintf(buf, "%d\n", *ip); + return sysfs_emit(buf, "%d\n", *ip); } } -- cgit v1.2.3 From 00a87e5d1d67ada9fc2d3a1f6407ae339b425bce Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 16 Jun 2023 09:19:45 -0400 Subject: SUNRPC: Address RCU warning in net/sunrpc/svc.c $ make C=1 W=1 net/sunrpc/svc.o make[1]: Entering directory 'linux/obj/manet.1015granger.net' GEN Makefile CALL linux/server-development/scripts/checksyscalls.sh DESCEND objtool INSTALL libsubcmd_headers DESCEND bpf/resolve_btfids INSTALL libsubcmd_headers CC [M] net/sunrpc/svc.o CHECK linux/server-development/net/sunrpc/svc.c linux/server-development/net/sunrpc/svc.c:1225:9: warning: incorrect type in argument 1 (different address spaces) linux/server-development/net/sunrpc/svc.c:1225:9: expected struct spinlock [usertype] *lock linux/server-development/net/sunrpc/svc.c:1225:9: got struct spinlock [noderef] __rcu * linux/server-development/net/sunrpc/svc.c:1227:40: warning: incorrect type in argument 1 (different address spaces) linux/server-development/net/sunrpc/svc.c:1227:40: expected struct spinlock [usertype] *lock linux/server-development/net/sunrpc/svc.c:1227:40: got struct spinlock [noderef] __rcu * make[1]: Leaving directory 'linux/obj/manet.1015granger.net' Warning introduced by commit 913292c97d75 ("sched.h: Annotate sighand_struct with __rcu"). Signed-off-by: Chuck Lever --- net/sunrpc/svc.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index b011c318fef1..e7c101290425 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -1164,6 +1164,7 @@ static void __svc_unregister(struct net *net, const u32 program, const u32 versi */ static void svc_unregister(const struct svc_serv *serv, struct net *net) { + struct sighand_struct *sighand; struct svc_program *progp; unsigned long flags; unsigned int i; @@ -1180,9 +1181,12 @@ static void svc_unregister(const struct svc_serv *serv, struct net *net) } } - spin_lock_irqsave(¤t->sighand->siglock, flags); + rcu_read_lock(); + sighand = rcu_dereference(current->sighand); + spin_lock_irqsave(&sighand->siglock, flags); recalc_sigpending(); - spin_unlock_irqrestore(¤t->sighand->siglock, flags); + spin_unlock_irqrestore(&sighand->siglock, flags); + rcu_read_unlock(); } /* -- cgit v1.2.3 From ed9ab7346e908496816cffdecd46932035f66e2e Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 16 Jun 2023 17:51:34 -0400 Subject: nfsd: move init of percpu reply_cache_stats counters back to nfsd_init_net Commit f5f9d4a314da ("nfsd: move reply cache initialization into nfsd startup") moved the initialization of the reply cache into nfsd startup, but didn't account for the stats counters, which can be accessed before nfsd is ever started. The result can be a NULL pointer dereference when someone accesses /proc/fs/nfsd/reply_cache_stats while nfsd is still shut down. This is a regression and a user-triggerable oops in the right situation: - non-x86_64 arch - /proc/fs/nfsd is mounted in the namespace - nfsd is not started in the namespace - unprivileged user calls "cat /proc/fs/nfsd/reply_cache_stats" Although this is easy to trigger on some arches (like aarch64), on x86_64, calling this_cpu_ptr(NULL) evidently returns a pointer to the fixed_percpu_data. That struct looks just enough like a newly initialized percpu var to allow nfsd_reply_cache_stats_show to access it without Oopsing. Move the initialization of the per-net+per-cpu reply-cache counters back into nfsd_init_net, while leaving the rest of the reply cache allocations to be done at nfsd startup time. Kudos to Eirik who did most of the legwork to track this down. Cc: stable@vger.kernel.org # v6.3+ Fixes: f5f9d4a314da ("nfsd: move reply cache initialization into nfsd startup") Reported-and-tested-by: Eirik Fuller Closes: https://bugzilla.redhat.com/show_bug.cgi?id=2215429 Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/cache.h | 2 ++ fs/nfsd/nfscache.c | 25 ++++++++++++++----------- fs/nfsd/nfsctl.c | 10 +++++++++- 3 files changed, 25 insertions(+), 12 deletions(-) diff --git a/fs/nfsd/cache.h b/fs/nfsd/cache.h index f21259ead64b..4c9b87850ab1 100644 --- a/fs/nfsd/cache.h +++ b/fs/nfsd/cache.h @@ -80,6 +80,8 @@ enum { int nfsd_drc_slab_create(void); void nfsd_drc_slab_free(void); +int nfsd_net_reply_cache_init(struct nfsd_net *nn); +void nfsd_net_reply_cache_destroy(struct nfsd_net *nn); int nfsd_reply_cache_init(struct nfsd_net *); void nfsd_reply_cache_shutdown(struct nfsd_net *); int nfsd_cache_lookup(struct svc_rqst *); diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index 041faa13b852..a8eda1c85829 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -148,12 +148,23 @@ void nfsd_drc_slab_free(void) kmem_cache_destroy(drc_slab); } -static int nfsd_reply_cache_stats_init(struct nfsd_net *nn) +/** + * nfsd_net_reply_cache_init - per net namespace reply cache set-up + * @nn: nfsd_net being initialized + * + * Returns zero on succes; otherwise a negative errno is returned. + */ +int nfsd_net_reply_cache_init(struct nfsd_net *nn) { return nfsd_percpu_counters_init(nn->counter, NFSD_NET_COUNTERS_NUM); } -static void nfsd_reply_cache_stats_destroy(struct nfsd_net *nn) +/** + * nfsd_net_reply_cache_destroy - per net namespace reply cache tear-down + * @nn: nfsd_net being freed + * + */ +void nfsd_net_reply_cache_destroy(struct nfsd_net *nn) { nfsd_percpu_counters_destroy(nn->counter, NFSD_NET_COUNTERS_NUM); } @@ -169,17 +180,13 @@ int nfsd_reply_cache_init(struct nfsd_net *nn) hashsize = nfsd_hashsize(nn->max_drc_entries); nn->maskbits = ilog2(hashsize); - status = nfsd_reply_cache_stats_init(nn); - if (status) - goto out_nomem; - nn->nfsd_reply_cache_shrinker.scan_objects = nfsd_reply_cache_scan; nn->nfsd_reply_cache_shrinker.count_objects = nfsd_reply_cache_count; nn->nfsd_reply_cache_shrinker.seeks = 1; status = register_shrinker(&nn->nfsd_reply_cache_shrinker, "nfsd-reply:%s", nn->nfsd_name); if (status) - goto out_stats_destroy; + return status; nn->drc_hashtbl = kvzalloc(array_size(hashsize, sizeof(*nn->drc_hashtbl)), GFP_KERNEL); @@ -195,9 +202,6 @@ int nfsd_reply_cache_init(struct nfsd_net *nn) return 0; out_shrinker: unregister_shrinker(&nn->nfsd_reply_cache_shrinker); -out_stats_destroy: - nfsd_reply_cache_stats_destroy(nn); -out_nomem: printk(KERN_ERR "nfsd: failed to allocate reply cache\n"); return -ENOMEM; } @@ -217,7 +221,6 @@ void nfsd_reply_cache_shutdown(struct nfsd_net *nn) rp, nn); } } - nfsd_reply_cache_stats_destroy(nn); kvfree(nn->drc_hashtbl); nn->drc_hashtbl = NULL; diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 1489e0b703b4..1a892a0b35de 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -1505,6 +1505,9 @@ static __net_init int nfsd_init_net(struct net *net) retval = nfsd_idmap_init(net); if (retval) goto out_idmap_error; + retval = nfsd_net_reply_cache_init(nn); + if (retval) + goto out_repcache_error; nn->nfsd_versions = NULL; nn->nfsd4_minorversions = NULL; nfsd4_init_leases_net(nn); @@ -1513,6 +1516,8 @@ static __net_init int nfsd_init_net(struct net *net) return 0; +out_repcache_error: + nfsd_idmap_shutdown(net); out_idmap_error: nfsd_export_shutdown(net); out_export_error: @@ -1521,9 +1526,12 @@ out_export_error: static __net_exit void nfsd_exit_net(struct net *net) { + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + + nfsd_net_reply_cache_destroy(nn); nfsd_idmap_shutdown(net); nfsd_export_shutdown(net); - nfsd_netns_free_versions(net_generic(net, nfsd_net_id)); + nfsd_netns_free_versions(nn); } static struct pernet_operations nfsd_net_ops = { -- cgit v1.2.3 From 5e092be7418fdf0e1e288529bd7e657cb9d7954c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 16 Jun 2023 23:10:53 -0400 Subject: NFSD: Distinguish per-net namespace initialization I find the naming of nfsd_init_net() and nfsd_startup_net() to be confusingly similar. Rename the namespace initialization and tear- down ops and add comments to distinguish their separate purposes. Signed-off-by: Chuck Lever --- fs/nfsd/nfsctl.c | 23 +++++++++++++++++++---- fs/nfsd/nfssvc.c | 5 +++++ 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 1a892a0b35de..1b8b1aab9a15 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -1494,7 +1494,17 @@ static int create_proc_exports_entry(void) unsigned int nfsd_net_id; -static __net_init int nfsd_init_net(struct net *net) +/** + * nfsd_net_init - Prepare the nfsd_net portion of a new net namespace + * @net: a freshly-created network namespace + * + * This information stays around as long as the network namespace is + * alive whether or not there is an NFSD instance running in the + * namespace. + * + * Returns zero on success, or a negative errno otherwise. + */ +static __net_init int nfsd_net_init(struct net *net) { int retval; struct nfsd_net *nn = net_generic(net, nfsd_net_id); @@ -1524,7 +1534,12 @@ out_export_error: return retval; } -static __net_exit void nfsd_exit_net(struct net *net) +/** + * nfsd_net_exit - Release the nfsd_net portion of a net namespace + * @net: a network namespace that is about to be destroyed + * + */ +static __net_exit void nfsd_net_exit(struct net *net) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); @@ -1535,8 +1550,8 @@ static __net_exit void nfsd_exit_net(struct net *net) } static struct pernet_operations nfsd_net_ops = { - .init = nfsd_init_net, - .exit = nfsd_exit_net, + .init = nfsd_net_init, + .exit = nfsd_net_exit, .id = &nfsd_net_id, .size = sizeof(struct nfsd_net), }; diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 9c7b1ef5be40..2154fa63c5f2 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -402,6 +402,11 @@ void nfsd_reset_write_verifier(struct nfsd_net *nn) write_sequnlock(&nn->writeverf_lock); } +/* + * Crank up a set of per-namespace resources for a new NFSD instance, + * including lockd, a duplicate reply cache, an open file cache + * instance, and a cache of NFSv4 state objects. + */ static int nfsd_startup_net(struct net *net, const struct cred *cred) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); -- cgit v1.2.3 From 88770b8de38eeaf093c877ed78a7e6e1660df8df Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 18 Jun 2023 12:04:01 -0400 Subject: svcrdma: Fix stale comment Commit 7d81ee8722d6 ("svcrdma: Single-stage RDMA Read") changed the behavior of svc_rdma_recvfrom() but neglected to update the documenting comment. Signed-off-by: Chuck Lever --- net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index 16b673a7d280..85c8bcaebb80 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -771,9 +771,6 @@ static bool svc_rdma_is_reverse_direction_reply(struct svc_xprt *xprt, * * The next ctxt is removed from the "receive" lists. * - * - If the ctxt completes a Read, then finish assembling the Call - * message and return the number of bytes in the message. - * * - If the ctxt completes a Receive, then construct the Call * message from the contents of the Receive buffer. * @@ -782,7 +779,8 @@ static bool svc_rdma_is_reverse_direction_reply(struct svc_xprt *xprt, * in the message. * * - If there are Read chunks in this message, post Read WRs to - * pull that payload and return 0. + * pull that payload. When the Read WRs complete, build the + * full message and return the number of bytes in it. */ int svc_rdma_recvfrom(struct svc_rqst *rqstp) { -- cgit v1.2.3 From 75bfb70457a4c4c9f0095e39885382fc5049c5ce Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 21 Jun 2023 15:52:05 +0100 Subject: nfsd: remove redundant assignments to variable len There are a few assignments to variable len where the value is not being read and so the assignments are redundant and can be removed. In one case, the variable len can be removed completely. Cleans up 4 clang scan warnings of the form: fs/nfsd/export.c:100:7: warning: Although the value stored to 'len' is used in the enclosing expression, the value is never actually read from 'len' [deadcode.DeadStores] Signed-off-by: Colin Ian King Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/export.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index ae85257b4238..11a0eaa2f914 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -97,7 +97,7 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen) goto out; err = -EINVAL; - if ((len=qword_get(&mesg, buf, PAGE_SIZE)) <= 0) + if (qword_get(&mesg, buf, PAGE_SIZE) <= 0) goto out; err = -ENOENT; @@ -107,7 +107,7 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen) dprintk("found domain %s\n", buf); err = -EINVAL; - if ((len=qword_get(&mesg, buf, PAGE_SIZE)) <= 0) + if (qword_get(&mesg, buf, PAGE_SIZE) <= 0) goto out; fsidtype = simple_strtoul(buf, &ep, 10); if (*ep) @@ -593,7 +593,6 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) { /* client path expiry [flags anonuid anongid fsid] */ char *buf; - int len; int err; struct auth_domain *dom = NULL; struct svc_export exp = {}, *expp; @@ -609,8 +608,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) /* client */ err = -EINVAL; - len = qword_get(&mesg, buf, PAGE_SIZE); - if (len <= 0) + if (qword_get(&mesg, buf, PAGE_SIZE) <= 0) goto out; err = -ENOENT; @@ -620,7 +618,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) /* path */ err = -EINVAL; - if ((len = qword_get(&mesg, buf, PAGE_SIZE)) <= 0) + if (qword_get(&mesg, buf, PAGE_SIZE) <= 0) goto out1; err = kern_path(buf, 0, &exp.ex_path); @@ -665,7 +663,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) goto out3; exp.ex_fsid = an_int; - while ((len = qword_get(&mesg, buf, PAGE_SIZE)) > 0) { + while (qword_get(&mesg, buf, PAGE_SIZE) > 0) { if (strcmp(buf, "fsloc") == 0) err = fsloc_parse(&mesg, buf, &exp.ex_fslocs); else if (strcmp(buf, "uuid") == 0) -- cgit v1.2.3