aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBoris Burkov <boris@bur.io>2023-03-28 13:55:25 -0700
committerDavid Sterba <dsterba@suse.com>2023-10-12 16:44:11 +0200
commit5343cd9364ea26c9f4f78896a87ed1b5b5e652d9 (patch)
tree9a4f7d24df5ca55a292be21e234a1da22d0af2ec
parentcecbb533b5fcec4ff77e786b7f94457f6cacd9e7 (diff)
btrfs: qgroup: simple quota auto hierarchy for nested subvolumes
Consider the following sequence: - enable quotas - create subvol S id 256 at dir outer/ - create a qgroup 1/100 - add 0/256 (S's auto qgroup) to 1/100 - create subvol T id 257 at dir outer/inner/ With full qgroups, there is no relationship between 0/257 and either of 0/256 or 1/100. There is an inherit feature that the creator of inner/ can use to specify it ought to be in 1/100. Simple quotas are targeted at container isolation, where such automatic inheritance for not necessarily trusted/controlled nested subvol creation would be quite helpful. Therefore, add a new default behavior for simple quotas: when you create a nested subvol, automatically inherit as parents any parents of the qgroup of the subvol the new inode is going in. In our example, 257/0 would also be under 1/100, allowing easy control of a total quota over an arbitrary hierarchy of subvolumes. I think this _might_ be a generally useful behavior, so it could be interesting to put it behind a new inheritance flag that simple quotas always use while traditional quotas let the user specify, but this is a minimally intrusive change to start. Signed-off-by: Boris Burkov <boris@bur.io> Signed-off-by: David Sterba <dsterba@suse.com>
-rw-r--r--fs/btrfs/ioctl.c2
-rw-r--r--fs/btrfs/qgroup.c57
-rw-r--r--fs/btrfs/qgroup.h6
-rw-r--r--fs/btrfs/transaction.c14
4 files changed, 67 insertions, 12 deletions
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 58189d99a22c..6603b6fff93f 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -652,7 +652,7 @@ static noinline int create_subvol(struct mnt_idmap *idmap,
/* Tree log can't currently deal with an inode which is a new root. */
btrfs_set_log_full_commit(trans);
- ret = btrfs_qgroup_inherit(trans, 0, objectid, inherit);
+ ret = btrfs_qgroup_inherit(trans, 0, objectid, root->root_key.objectid, inherit);
if (ret)
goto out;
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 4c70417f13b0..5b3ad2f15039 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1549,8 +1549,7 @@ out:
return ret;
}
-int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, u64 src,
- u64 dst)
+int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, u64 src, u64 dst)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_qgroup *parent;
@@ -3018,6 +3017,47 @@ int btrfs_run_qgroups(struct btrfs_trans_handle *trans)
return ret;
}
+static int qgroup_auto_inherit(struct btrfs_fs_info *fs_info,
+ u64 inode_rootid,
+ struct btrfs_qgroup_inherit **inherit)
+{
+ int i = 0;
+ u64 num_qgroups = 0;
+ struct btrfs_qgroup *inode_qg;
+ struct btrfs_qgroup_list *qg_list;
+ struct btrfs_qgroup_inherit *res;
+ size_t struct_sz;
+ u64 *qgids;
+
+ if (*inherit)
+ return -EEXIST;
+
+ inode_qg = find_qgroup_rb(fs_info, inode_rootid);
+ if (!inode_qg)
+ return -ENOENT;
+
+ num_qgroups = list_count_nodes(&inode_qg->groups);
+
+ if (!num_qgroups)
+ return 0;
+
+ struct_sz = struct_size(res, qgroups, num_qgroups);
+ if (struct_sz == SIZE_MAX)
+ return -ERANGE;
+
+ res = kzalloc(struct_sz, GFP_NOFS);
+ if (!res)
+ return -ENOMEM;
+ res->num_qgroups = num_qgroups;
+ qgids = res->qgroups;
+
+ list_for_each_entry(qg_list, &inode_qg->groups, next_group)
+ qgids[i] = qg_list->group->qgroupid;
+
+ *inherit = res;
+ return 0;
+}
+
/*
* Copy the accounting information between qgroups. This is necessary
* when a snapshot or a subvolume is created. Throwing an error will
@@ -3025,7 +3065,8 @@ int btrfs_run_qgroups(struct btrfs_trans_handle *trans)
* when a readonly fs is a reasonable outcome.
*/
int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
- u64 objectid, struct btrfs_qgroup_inherit *inherit)
+ u64 objectid, u64 inode_rootid,
+ struct btrfs_qgroup_inherit *inherit)
{
int ret = 0;
int i;
@@ -3037,6 +3078,7 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
struct btrfs_qgroup *dstgroup;
struct btrfs_qgroup *prealloc;
struct btrfs_qgroup_list **qlist_prealloc = NULL;
+ bool free_inherit = false;
bool need_rescan = false;
u32 level_size = 0;
u64 nums;
@@ -3073,6 +3115,13 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
goto out;
}
+ if (btrfs_qgroup_mode(fs_info) == BTRFS_QGROUP_MODE_SIMPLE && !inherit) {
+ ret = qgroup_auto_inherit(fs_info, inode_rootid, &inherit);
+ if (ret)
+ goto out;
+ free_inherit = true;
+ }
+
if (inherit) {
i_qgroups = (u64 *)(inherit + 1);
nums = inherit->num_qgroups + 2 * inherit->num_ref_copies +
@@ -3256,6 +3305,8 @@ out:
kfree(qlist_prealloc[i]);
kfree(qlist_prealloc);
}
+ if (free_inherit)
+ kfree(inherit);
kfree(prealloc);
return ret;
}
diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h
index 1e0eb04ef96c..9858244eef20 100644
--- a/fs/btrfs/qgroup.h
+++ b/fs/btrfs/qgroup.h
@@ -312,8 +312,7 @@ int btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info);
void btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info);
int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info,
bool interruptible);
-int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, u64 src,
- u64 dst);
+int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, u64 src, u64 dst);
int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, u64 src,
u64 dst);
int btrfs_create_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid);
@@ -343,7 +342,8 @@ int btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, u64 bytenr,
int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans);
int btrfs_run_qgroups(struct btrfs_trans_handle *trans);
int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
- u64 objectid, struct btrfs_qgroup_inherit *inherit);
+ u64 objectid, u64 inode_rootid,
+ struct btrfs_qgroup_inherit *inherit);
void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info,
u64 ref_root, u64 num_bytes,
enum btrfs_qgroup_rsv_type type);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 6c24d00c3612..b87b00884e47 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -1620,8 +1620,8 @@ static int qgroup_account_snapshot(struct btrfs_trans_handle *trans,
int ret;
/*
- * Save some performance in the case that full qgroups are not enabled.
- * If this check races with the ioctl, rescan will kick in anyway.
+ * Save some performance in the case that qgroups are not enabled. If
+ * this check races with the ioctl, rescan will kick in anyway.
*/
if (!btrfs_qgroup_full_accounting(fs_info))
return 0;
@@ -1662,7 +1662,7 @@ static int qgroup_account_snapshot(struct btrfs_trans_handle *trans,
/* Now qgroup are all updated, we can inherit it to new qgroups */
ret = btrfs_qgroup_inherit(trans, src->root_key.objectid, dst_objectid,
- inherit);
+ parent->root_key.objectid, inherit);
if (ret < 0)
goto out;
@@ -1929,8 +1929,12 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
* To co-operate with that hack, we do hack again.
* Or snapshot will be greatly slowed down by a subtree qgroup rescan
*/
- ret = qgroup_account_snapshot(trans, root, parent_root,
- pending->inherit, objectid);
+ if (btrfs_qgroup_mode(fs_info) == BTRFS_QGROUP_MODE_FULL)
+ ret = qgroup_account_snapshot(trans, root, parent_root,
+ pending->inherit, objectid);
+ else if (btrfs_qgroup_mode(fs_info) == BTRFS_QGROUP_MODE_SIMPLE)
+ ret = btrfs_qgroup_inherit(trans, root->root_key.objectid, objectid,
+ parent_root->root_key.objectid, pending->inherit);
if (ret < 0)
goto fail;