From f1919826896c82b6af9c46f69e02f2bc04df4be7 Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zyan@redhat.com>
Date: Sun, 8 Apr 2018 09:54:39 +0800
Subject: ceph: check if mds create snaprealm when setting quota

If mds does not, return -EOPNOTSUPP.

Link: http://tracker.ceph.com/issues/23491
Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 fs/ceph/xattr.c | 28 +++++++++++++++++++++++++---
 1 file changed, 25 insertions(+), 3 deletions(-)

diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index 7e72348639e4..315f7e63e7cc 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -228,7 +228,15 @@ static size_t ceph_vxattrcb_dir_rctime(struct ceph_inode_info *ci, char *val,
 
 static bool ceph_vxattrcb_quota_exists(struct ceph_inode_info *ci)
 {
-	return (ci->i_max_files || ci->i_max_bytes);
+	bool ret = false;
+	spin_lock(&ci->i_ceph_lock);
+	if ((ci->i_max_files || ci->i_max_bytes) &&
+	    ci->i_vino.snap == CEPH_NOSNAP &&
+	    ci->i_snap_realm &&
+	    ci->i_snap_realm->ino == ci->i_vino.ino)
+		ret = true;
+	spin_unlock(&ci->i_ceph_lock);
+	return ret;
 }
 
 static size_t ceph_vxattrcb_quota(struct ceph_inode_info *ci, char *val,
@@ -1008,14 +1016,19 @@ int __ceph_setxattr(struct inode *inode, const char *name,
 	char *newval = NULL;
 	struct ceph_inode_xattr *xattr = NULL;
 	int required_blob_size;
+	bool check_realm = false;
 	bool lock_snap_rwsem = false;
 
 	if (ceph_snap(inode) != CEPH_NOSNAP)
 		return -EROFS;
 
 	vxattr = ceph_match_vxattr(inode, name);
-	if (vxattr && vxattr->readonly)
-		return -EOPNOTSUPP;
+	if (vxattr) {
+		if (vxattr->readonly)
+			return -EOPNOTSUPP;
+		if (value && !strncmp(vxattr->name, "ceph.quota", 10))
+			check_realm = true;
+	}
 
 	/* pass any unhandled ceph.* xattrs through to the MDS */
 	if (!strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN))
@@ -1109,6 +1122,15 @@ do_sync_unlocked:
 		err = -EBUSY;
 	} else {
 		err = ceph_sync_setxattr(inode, name, value, size, flags);
+		if (err >= 0 && check_realm) {
+			/* check if snaprealm was created for quota inode */
+			spin_lock(&ci->i_ceph_lock);
+			if ((ci->i_max_files || ci->i_max_bytes) &&
+			    !(ci->i_snap_realm &&
+			      ci->i_snap_realm->ino == ci->i_vino.ino))
+				err = -EOPNOTSUPP;
+			spin_unlock(&ci->i_ceph_lock);
+		}
 	}
 out:
 	ceph_free_cap_flush(prealloc_cf);
-- 
cgit v1.2.3


From facb9f6eba3df4e8027301cc0e514dc582a1b366 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Mon, 23 Apr 2018 15:25:10 +0200
Subject: libceph: un-backoff on tick when we have a authenticated session

This means that if we do some backoff, then authenticate, and are
healthy for an extended period of time, a subsequent failure won't
leave us starting our hunting sequence with a large backoff.

Mirrors ceph.git commit d466bc6e66abba9b464b0b69687cf45c9dccf383.

Cc: stable@vger.kernel.org # 4.7+
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Jason Dillaman <dillaman@redhat.com>
---
 net/ceph/mon_client.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index b3dac24412d3..02c441c12c38 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -209,6 +209,14 @@ static void reopen_session(struct ceph_mon_client *monc)
 	__open_session(monc);
 }
 
+static void un_backoff(struct ceph_mon_client *monc)
+{
+	monc->hunt_mult /= 2; /* reduce by 50% */
+	if (monc->hunt_mult < 1)
+		monc->hunt_mult = 1;
+	dout("%s hunt_mult now %d\n", __func__, monc->hunt_mult);
+}
+
 /*
  * Reschedule delayed work timer.
  */
@@ -963,6 +971,7 @@ static void delayed_work(struct work_struct *work)
 		if (!monc->hunting) {
 			ceph_con_keepalive(&monc->con);
 			__validate_auth(monc);
+			un_backoff(monc);
 		}
 
 		if (is_auth &&
@@ -1123,9 +1132,7 @@ static void finish_hunting(struct ceph_mon_client *monc)
 		dout("%s found mon%d\n", __func__, monc->cur_mon);
 		monc->hunting = false;
 		monc->had_a_connection = true;
-		monc->hunt_mult /= 2; /* reduce by 50% */
-		if (monc->hunt_mult < 1)
-			monc->hunt_mult = 1;
+		un_backoff(monc);
 	}
 }
 
-- 
cgit v1.2.3


From 7b4c443d139f1d2b5570da475f7a9cbcef86740c Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Mon, 23 Apr 2018 15:25:10 +0200
Subject: libceph: reschedule a tick in finish_hunting()

If we go without an established session for a while, backoff delay will
climb to 30 seconds.  The keepalive timeout is also 30 seconds, so it's
pretty easily hit after a prolonged hunting for a monitor: we don't get
a chance to send out a keepalive in time, which means we never get back
a keepalive ack in time, cutting an established session and attempting
to connect to a different monitor every 30 seconds:

  [Sun Apr 1 23:37:05 2018] libceph: mon0 10.80.20.99:6789 session established
  [Sun Apr 1 23:37:36 2018] libceph: mon0 10.80.20.99:6789 session lost, hunting for new mon
  [Sun Apr 1 23:37:36 2018] libceph: mon2 10.80.20.103:6789 session established
  [Sun Apr 1 23:38:07 2018] libceph: mon2 10.80.20.103:6789 session lost, hunting for new mon
  [Sun Apr 1 23:38:07 2018] libceph: mon1 10.80.20.100:6789 session established
  [Sun Apr 1 23:38:37 2018] libceph: mon1 10.80.20.100:6789 session lost, hunting for new mon
  [Sun Apr 1 23:38:37 2018] libceph: mon2 10.80.20.103:6789 session established
  [Sun Apr 1 23:39:08 2018] libceph: mon2 10.80.20.103:6789 session lost, hunting for new mon

The regular keepalive interval is 10 seconds.  After ->hunting is
cleared in finish_hunting(), call __schedule_delayed() to ensure we
send out a keepalive after 10 seconds.

Cc: stable@vger.kernel.org # 4.7+
Link: http://tracker.ceph.com/issues/23537
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Jason Dillaman <dillaman@redhat.com>
---
 net/ceph/mon_client.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index 02c441c12c38..21ac6e3b96bb 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -1133,6 +1133,7 @@ static void finish_hunting(struct ceph_mon_client *monc)
 		monc->hunting = false;
 		monc->had_a_connection = true;
 		un_backoff(monc);
+		__schedule_delayed(monc);
 	}
 }
 
-- 
cgit v1.2.3


From 9c55ad1c214d9f8c4594ac2c3fa392c1c32431a7 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Tue, 24 Apr 2018 19:10:55 +0200
Subject: libceph: validate con->state at the top of try_write()

ceph_con_workfn() validates con->state before calling try_read() and
then try_write().  However, try_read() temporarily releases con->mutex,
notably in process_message() and ceph_con_in_msg_alloc(), opening the
window for ceph_con_close() to sneak in, close the connection and
release con->sock.  When try_write() is called on the assumption that
con->state is still valid (i.e. not STANDBY or CLOSED), a NULL sock
gets passed to the networking stack:

  BUG: unable to handle kernel NULL pointer dereference at 0000000000000020
  IP: selinux_socket_sendmsg+0x5/0x20

Make sure con->state is valid at the top of try_write() and add an
explicit BUG_ON for this, similar to try_read().

Cc: stable@vger.kernel.org
Link: https://tracker.ceph.com/issues/23706
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Jason Dillaman <dillaman@redhat.com>
---
 net/ceph/messenger.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index fcb40c12b1f8..3b3d33ea9ed8 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -2569,6 +2569,11 @@ static int try_write(struct ceph_connection *con)
 	int ret = 1;
 
 	dout("try_write start %p state %lu\n", con, con->state);
+	if (con->state != CON_STATE_PREOPEN &&
+	    con->state != CON_STATE_CONNECTING &&
+	    con->state != CON_STATE_NEGOTIATING &&
+	    con->state != CON_STATE_OPEN)
+		return 0;
 
 more:
 	dout("try_write out_kvec_bytes %d\n", con->out_kvec_bytes);
@@ -2594,6 +2599,8 @@ more:
 	}
 
 more_kvec:
+	BUG_ON(!con->sock);
+
 	/* kvec data queued? */
 	if (con->out_kvec_left) {
 		ret = write_partial_kvec(con);
-- 
cgit v1.2.3