diff options
Diffstat (limited to 'block/blk-cgroup.c')
-rw-r--r-- | block/blk-cgroup.c | 229 |
1 files changed, 107 insertions, 122 deletions
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index cb110fc51940..c46778d1f3c2 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -108,64 +108,41 @@ static struct cgroup_subsys_state *blkcg_css(void) return task_css(current, io_cgrp_id); } -static bool blkcg_policy_enabled(struct request_queue *q, +static bool blkcg_policy_enabled(struct gendisk *disk, const struct blkcg_policy *pol) { - return pol && test_bit(pol->plid, q->blkcg_pols); + return pol && test_bit(pol->plid, disk->blkcg_pols); } -static void blkg_free_workfn(struct work_struct *work) +static void blkg_free(struct blkcg_gq *blkg) { - struct blkcg_gq *blkg = container_of(work, struct blkcg_gq, - free_work); - struct request_queue *q = blkg->q; int i; /* * pd_free_fn() can also be called from blkcg_deactivate_policy(), * in order to make sure pd_free_fn() is called in order, the deletion - * of the list blkg->q_node is delayed to here from blkg_destroy(), and + * of the list blkg->entry is delayed to here from blkg_destroy(), and * blkcg_mutex is used to synchronize blkg_free_workfn() and * blkcg_deactivate_policy(). */ - if (q) - mutex_lock(&q->blkcg_mutex); - + mutex_lock(&blkg->disk->blkcg_mutex); for (i = 0; i < BLKCG_MAX_POLS; i++) if (blkg->pd[i]) blkcg_policy[i]->pd_free_fn(blkg->pd[i]); - if (blkg->parent) blkg_put(blkg->parent); + list_del_init(&blkg->entry); + mutex_unlock(&blkg->disk->blkcg_mutex); - if (q) { - list_del_init(&blkg->q_node); - mutex_unlock(&q->blkcg_mutex); - blk_put_queue(q); - } - + put_disk(blkg->disk); free_percpu(blkg->iostat_cpu); percpu_ref_exit(&blkg->refcnt); kfree(blkg); } -/** - * blkg_free - free a blkg - * @blkg: blkg to free - * - * Free @blkg which may be partially allocated. - */ -static void blkg_free(struct blkcg_gq *blkg) +static void blkg_free_workfn(struct work_struct *work) { - if (!blkg) - return; - - /* - * Both ->pd_free_fn() and request queue's release handler may - * sleep, so free us by scheduling one work func - */ - INIT_WORK(&blkg->free_work, blkg_free_workfn); - schedule_work(&blkg->free_work); + blkg_free(container_of(work, struct blkcg_gq, free_work)); } static void __blkg_release(struct rcu_head *rcu) @@ -176,7 +153,10 @@ static void __blkg_release(struct rcu_head *rcu) /* release the blkcg and parent blkg refs this blkg has been holding */ css_put(&blkg->blkcg->css); - blkg_free(blkg); + + /* ->pd_free_fn() may sleep, so free from a work queue */ + INIT_WORK(&blkg->free_work, blkg_free_workfn); + schedule_work(&blkg->free_work); } /* @@ -265,19 +245,18 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct gendisk *disk, blkg = kzalloc_node(sizeof(*blkg), gfp_mask, disk->queue->node); if (!blkg) return NULL; - if (percpu_ref_init(&blkg->refcnt, blkg_release, 0, gfp_mask)) - goto err_free; - + goto out_free_blkg; blkg->iostat_cpu = alloc_percpu_gfp(struct blkg_iostat_set, gfp_mask); if (!blkg->iostat_cpu) - goto err_free; + goto out_exit_refcnt; - if (!blk_get_queue(disk->queue)) - goto err_free; + if (test_bit(GD_DEAD, &disk->state)) + goto out_free_iostat; + get_device(disk_to_dev(disk)); + blkg->disk = disk; - blkg->q = disk->queue; - INIT_LIST_HEAD(&blkg->q_node); + INIT_LIST_HEAD(&blkg->entry); spin_lock_init(&blkg->async_bio_lock); bio_list_init(&blkg->async_bios); INIT_WORK(&blkg->async_bio_work, blkg_async_bio_workfn); @@ -293,14 +272,13 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct gendisk *disk, struct blkcg_policy *pol = blkcg_policy[i]; struct blkg_policy_data *pd; - if (!blkcg_policy_enabled(disk->queue, pol)) + if (!blkcg_policy_enabled(disk, pol)) continue; /* alloc per-policy data and attach it to blkg */ - pd = pol->pd_alloc_fn(gfp_mask, disk->queue, blkcg); + pd = pol->pd_alloc_fn(disk, blkcg, gfp_mask); if (!pd) - goto err_free; - + goto out_free_pds; blkg->pd[i] = pd; pd->blkg = blkg; pd->plid = i; @@ -309,8 +287,17 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct gendisk *disk, return blkg; -err_free: - blkg_free(blkg); +out_free_pds: + while (--i >= 0) + if (blkg->pd[i]) + blkcg_policy[i]->pd_free_fn(blkg->pd[i]); + put_disk(blkg->disk); +out_free_iostat: + free_percpu(blkg->iostat_cpu); +out_exit_refcnt: + percpu_ref_exit(&blkg->refcnt); +out_free_blkg: + kfree(blkg); return NULL; } @@ -350,7 +337,7 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg, struct gendisk *disk, /* link parent */ if (blkcg_parent(blkcg)) { - blkg->parent = blkg_lookup(blkcg_parent(blkcg), disk->queue); + blkg->parent = blkg_lookup(blkcg_parent(blkcg), disk); if (WARN_ON_ONCE(!blkg->parent)) { ret = -ENODEV; goto err_put_css; @@ -371,7 +358,7 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg, struct gendisk *disk, ret = radix_tree_insert(&blkcg->blkg_tree, disk->queue->id, blkg); if (likely(!ret)) { hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list); - list_add(&blkg->q_node, &disk->queue->blkg_list); + list_add(&blkg->entry, &disk->blkg_list); for (i = 0; i < BLKCG_MAX_POLS; i++) { struct blkcg_policy *pol = blkcg_policy[i]; @@ -396,7 +383,8 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg, struct gendisk *disk, err_put_css: css_put(&blkcg->css); err_free_blkg: - blkg_free(new_blkg); + if (new_blkg) + blkg_free(new_blkg); return ERR_PTR(ret); } @@ -422,12 +410,12 @@ static struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg, WARN_ON_ONCE(!rcu_read_lock_held()); - blkg = blkg_lookup(blkcg, q); + blkg = blkg_lookup(blkcg, disk); if (blkg) return blkg; spin_lock_irqsave(&q->queue_lock, flags); - blkg = blkg_lookup(blkcg, q); + blkg = blkg_lookup(blkcg, disk); if (blkg) { if (blkcg != &blkcg_root && blkg != rcu_dereference(blkcg->blkg_hint)) @@ -443,10 +431,10 @@ static struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg, while (true) { struct blkcg *pos = blkcg; struct blkcg *parent = blkcg_parent(blkcg); - struct blkcg_gq *ret_blkg = q->root_blkg; + struct blkcg_gq *ret_blkg = disk->root_blkg; while (parent) { - blkg = blkg_lookup(parent, q); + blkg = blkg_lookup(parent, disk); if (blkg) { /* remember closest blkg */ ret_blkg = blkg; @@ -475,7 +463,7 @@ static void blkg_destroy(struct blkcg_gq *blkg) struct blkcg *blkcg = blkg->blkcg; int i; - lockdep_assert_held(&blkg->q->queue_lock); + lockdep_assert_held(&blkg->disk->queue->queue_lock); lockdep_assert_held(&blkcg->lock); /* @@ -499,7 +487,7 @@ static void blkg_destroy(struct blkcg_gq *blkg) blkg->online = false; - radix_tree_delete(&blkcg->blkg_tree, blkg->q->id); + radix_tree_delete(&blkcg->blkg_tree, blkg->disk->queue->id); hlist_del_init_rcu(&blkg->blkcg_node); /* @@ -525,7 +513,7 @@ static void blkg_destroy_all(struct gendisk *disk) restart: spin_lock_irq(&q->queue_lock); - list_for_each_entry_safe(blkg, n, &q->blkg_list, q_node) { + list_for_each_entry_safe(blkg, n, &disk->blkg_list, entry) { struct blkcg *blkcg = blkg->blkcg; spin_lock(&blkcg->lock); @@ -544,7 +532,7 @@ restart: } } - q->root_blkg = NULL; + disk->root_blkg = NULL; spin_unlock_irq(&q->queue_lock); } @@ -586,9 +574,7 @@ static int blkcg_reset_stats(struct cgroup_subsys_state *css, const char *blkg_dev_name(struct blkcg_gq *blkg) { - if (!blkg->q->disk || !blkg->q->disk->bdi->dev) - return NULL; - return bdi_dev_name(blkg->q->disk->bdi); + return bdi_dev_name(blkg->disk->bdi); } /** @@ -620,10 +606,10 @@ void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg, rcu_read_lock(); hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) { - spin_lock_irq(&blkg->q->queue_lock); - if (blkcg_policy_enabled(blkg->q, pol)) + spin_lock_irq(&blkg->disk->queue->queue_lock); + if (blkcg_policy_enabled(blkg->disk, pol)) total += prfill(sf, blkg->pd[pol->plid], data); - spin_unlock_irq(&blkg->q->queue_lock); + spin_unlock_irq(&blkg->disk->queue->queue_lock); } rcu_read_unlock(); @@ -729,12 +715,12 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, rcu_read_lock(); spin_lock_irq(&q->queue_lock); - if (!blkcg_policy_enabled(q, pol)) { + if (!blkcg_policy_enabled(disk, pol)) { ret = -EOPNOTSUPP; goto fail_unlock; } - blkg = blkg_lookup(blkcg, q); + blkg = blkg_lookup(blkcg, disk); if (blkg) goto success; @@ -748,7 +734,7 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, struct blkcg_gq *new_blkg; parent = blkcg_parent(blkcg); - while (parent && !blkg_lookup(parent, q)) { + while (parent && !blkg_lookup(parent, disk)) { pos = parent; parent = blkcg_parent(parent); } @@ -772,13 +758,13 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, rcu_read_lock(); spin_lock_irq(&q->queue_lock); - if (!blkcg_policy_enabled(q, pol)) { + if (!blkcg_policy_enabled(disk, pol)) { blkg_free(new_blkg); ret = -EOPNOTSUPP; goto fail_preloaded; } - blkg = blkg_lookup(pos, q); + blkg = blkg_lookup(pos, disk); if (blkg) { blkg_free(new_blkg); } else { @@ -952,7 +938,7 @@ static void blkcg_fill_root_iostats(void) class_dev_iter_init(&iter, &block_class, NULL, &disk_type); while ((dev = class_dev_iter_next(&iter))) { struct block_device *bdev = dev_to_bdev(dev); - struct blkcg_gq *blkg = bdev->bd_disk->queue->root_blkg; + struct blkcg_gq *blkg = bdev->bd_disk->root_blkg; struct blkg_iostat tmp; int cpu; unsigned long flags; @@ -1047,9 +1033,9 @@ static int blkcg_print_stat(struct seq_file *sf, void *v) rcu_read_lock(); hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) { - spin_lock_irq(&blkg->q->queue_lock); + spin_lock_irq(&blkg->disk->queue->queue_lock); blkcg_print_one_stat(blkg, sf); - spin_unlock_irq(&blkg->q->queue_lock); + spin_unlock_irq(&blkg->disk->queue->queue_lock); } rcu_read_unlock(); return 0; @@ -1119,7 +1105,7 @@ static void blkcg_destroy_blkgs(struct blkcg *blkcg) while (!hlist_empty(&blkcg->blkg_list)) { struct blkcg_gq *blkg = hlist_entry(blkcg->blkg_list.first, struct blkcg_gq, blkcg_node); - struct request_queue *q = blkg->q; + struct request_queue *q = blkg->disk->queue; if (need_resched() || !spin_trylock(&q->queue_lock)) { /* @@ -1299,8 +1285,8 @@ int blkcg_init_disk(struct gendisk *disk) bool preloaded; int ret; - INIT_LIST_HEAD(&q->blkg_list); - mutex_init(&q->blkcg_mutex); + INIT_LIST_HEAD(&disk->blkg_list); + mutex_init(&disk->blkcg_mutex); new_blkg = blkg_alloc(&blkcg_root, disk, GFP_KERNEL); if (!new_blkg) @@ -1314,7 +1300,7 @@ int blkcg_init_disk(struct gendisk *disk) blkg = blkg_create(&blkcg_root, disk, new_blkg); if (IS_ERR(blkg)) goto err_unlock; - q->root_blkg = blkg; + disk->root_blkg = blkg; spin_unlock_irq(&q->queue_lock); if (preloaded) @@ -1377,9 +1363,9 @@ static void blkcg_bind(struct cgroup_subsys_state *root_css) static void blkcg_exit(struct task_struct *tsk) { - if (tsk->throttle_queue) - blk_put_queue(tsk->throttle_queue); - tsk->throttle_queue = NULL; + if (tsk->throttle_disk) + put_disk(tsk->throttle_disk); + tsk->throttle_disk = NULL; } struct cgroup_subsys io_cgrp_subsys = { @@ -1405,14 +1391,14 @@ struct cgroup_subsys io_cgrp_subsys = { EXPORT_SYMBOL_GPL(io_cgrp_subsys); /** - * blkcg_activate_policy - activate a blkcg policy on a request_queue - * @q: request_queue of interest + * blkcg_activate_policy - activate a blkcg policy on a gendisk + * @disk: gendisk of interest * @pol: blkcg policy to activate * - * Activate @pol on @q. Requires %GFP_KERNEL context. @q goes through + * Activate @pol on @disk. Requires %GFP_KERNEL context. @disk goes through * bypass mode to populate its blkgs with policy_data for @pol. * - * Activation happens with @q bypassed, so nobody would be accessing blkgs + * Activation happens with @disk bypassed, so nobody would be accessing blkgs * from IO path. Update of each blkg is protected by both queue and blkcg * locks so that holding either lock and testing blkcg_policy_enabled() is * always enough for dereferencing policy data. @@ -1420,14 +1406,14 @@ EXPORT_SYMBOL_GPL(io_cgrp_subsys); * The caller is responsible for synchronizing [de]activations and policy * [un]registerations. Returns 0 on success, -errno on failure. */ -int blkcg_activate_policy(struct request_queue *q, - const struct blkcg_policy *pol) +int blkcg_activate_policy(struct gendisk *disk, const struct blkcg_policy *pol) { + struct request_queue *q = disk->queue; struct blkg_policy_data *pd_prealloc = NULL; struct blkcg_gq *blkg, *pinned_blkg = NULL; int ret; - if (blkcg_policy_enabled(q, pol)) + if (blkcg_policy_enabled(disk, pol)) return 0; if (queue_is_mq(q)) @@ -1436,7 +1422,7 @@ retry: spin_lock_irq(&q->queue_lock); /* blkg_list is pushed at the head, reverse walk to allocate parents first */ - list_for_each_entry_reverse(blkg, &q->blkg_list, q_node) { + list_for_each_entry_reverse(blkg, &disk->blkg_list, entry) { struct blkg_policy_data *pd; if (blkg->pd[pol->plid]) @@ -1447,8 +1433,8 @@ retry: pd = pd_prealloc; pd_prealloc = NULL; } else { - pd = pol->pd_alloc_fn(GFP_NOWAIT | __GFP_NOWARN, q, - blkg->blkcg); + pd = pol->pd_alloc_fn(disk, blkg->blkcg, + GFP_NOWAIT | __GFP_NOWARN); } if (!pd) { @@ -1465,8 +1451,8 @@ retry: if (pd_prealloc) pol->pd_free_fn(pd_prealloc); - pd_prealloc = pol->pd_alloc_fn(GFP_KERNEL, q, - blkg->blkcg); + pd_prealloc = pol->pd_alloc_fn(disk, blkg->blkcg, + GFP_KERNEL); if (pd_prealloc) goto retry; else @@ -1481,16 +1467,16 @@ retry: /* all allocated, init in the same order */ if (pol->pd_init_fn) - list_for_each_entry_reverse(blkg, &q->blkg_list, q_node) + list_for_each_entry_reverse(blkg, &disk->blkg_list, entry) pol->pd_init_fn(blkg->pd[pol->plid]); - list_for_each_entry_reverse(blkg, &q->blkg_list, q_node) { + list_for_each_entry_reverse(blkg, &disk->blkg_list, entry) { if (pol->pd_online_fn) pol->pd_online_fn(blkg->pd[pol->plid]); blkg->pd[pol->plid]->online = true; } - __set_bit(pol->plid, q->blkcg_pols); + __set_bit(pol->plid, disk->blkcg_pols); ret = 0; spin_unlock_irq(&q->queue_lock); @@ -1506,7 +1492,7 @@ out: enomem: /* alloc failed, nothing's initialized yet, free everything */ spin_lock_irq(&q->queue_lock); - list_for_each_entry(blkg, &q->blkg_list, q_node) { + list_for_each_entry(blkg, &disk->blkg_list, entry) { struct blkcg *blkcg = blkg->blkcg; spin_lock(&blkcg->lock); @@ -1523,30 +1509,31 @@ enomem: EXPORT_SYMBOL_GPL(blkcg_activate_policy); /** - * blkcg_deactivate_policy - deactivate a blkcg policy on a request_queue - * @q: request_queue of interest + * blkcg_deactivate_policy - deactivate a blkcg policy on a gendisk + * @disk: gendisk of interest * @pol: blkcg policy to deactivate * - * Deactivate @pol on @q. Follows the same synchronization rules as + * Deactivate @pol on @disk. Follows the same synchronization rules as * blkcg_activate_policy(). */ -void blkcg_deactivate_policy(struct request_queue *q, +void blkcg_deactivate_policy(struct gendisk *disk, const struct blkcg_policy *pol) { + struct request_queue *q = disk->queue; struct blkcg_gq *blkg; - if (!blkcg_policy_enabled(q, pol)) + if (!blkcg_policy_enabled(disk, pol)) return; if (queue_is_mq(q)) blk_mq_freeze_queue(q); - mutex_lock(&q->blkcg_mutex); + mutex_lock(&disk->blkcg_mutex); spin_lock_irq(&q->queue_lock); - __clear_bit(pol->plid, q->blkcg_pols); + __clear_bit(pol->plid, disk->blkcg_pols); - list_for_each_entry(blkg, &q->blkg_list, q_node) { + list_for_each_entry(blkg, &disk->blkg_list, entry) { struct blkcg *blkcg = blkg->blkcg; spin_lock(&blkcg->lock); @@ -1560,7 +1547,7 @@ void blkcg_deactivate_policy(struct request_queue *q, } spin_unlock_irq(&q->queue_lock); - mutex_unlock(&q->blkcg_mutex); + mutex_unlock(&disk->blkcg_mutex); if (queue_is_mq(q)) blk_mq_unfreeze_queue(q); @@ -1830,29 +1817,29 @@ static void blkcg_maybe_throttle_blkg(struct blkcg_gq *blkg, bool use_memdelay) * * This is only called if we've been marked with set_notify_resume(). Obviously * we can be set_notify_resume() for reasons other than blkcg throttling, so we - * check to see if current->throttle_queue is set and if not this doesn't do + * check to see if current->throttle_disk is set and if not this doesn't do * anything. This should only ever be called by the resume code, it's not meant * to be called by people willy-nilly as it will actually do the work to * throttle the task if it is setup for throttling. */ void blkcg_maybe_throttle_current(void) { - struct request_queue *q = current->throttle_queue; + struct gendisk *disk = current->throttle_disk; struct blkcg *blkcg; struct blkcg_gq *blkg; bool use_memdelay = current->use_memdelay; - if (!q) + if (!disk) return; - current->throttle_queue = NULL; + current->throttle_disk = NULL; current->use_memdelay = false; rcu_read_lock(); blkcg = css_to_blkcg(blkcg_css()); if (!blkcg) goto out; - blkg = blkg_lookup(blkcg, q); + blkg = blkg_lookup(blkcg, disk); if (!blkg) goto out; if (!blkg_tryget(blkg)) @@ -1861,11 +1848,10 @@ void blkcg_maybe_throttle_current(void) blkcg_maybe_throttle_blkg(blkg, use_memdelay); blkg_put(blkg); - blk_put_queue(q); + put_disk(disk); return; out: rcu_read_unlock(); - blk_put_queue(q); } /** @@ -1887,18 +1873,17 @@ out: */ void blkcg_schedule_throttle(struct gendisk *disk, bool use_memdelay) { - struct request_queue *q = disk->queue; - if (unlikely(current->flags & PF_KTHREAD)) return; - if (current->throttle_queue != q) { - if (!blk_get_queue(q)) + if (current->throttle_disk != disk) { + if (test_bit(GD_DEAD, &disk->state)) return; + get_device(disk_to_dev(disk)); - if (current->throttle_queue) - blk_put_queue(current->throttle_queue); - current->throttle_queue = q; + if (current->throttle_disk) + put_disk(current->throttle_disk); + current->throttle_disk = disk; } if (use_memdelay) @@ -1959,7 +1944,7 @@ static inline struct blkcg_gq *blkg_tryget_closest(struct bio *bio, * Associate @bio with the blkg found by combining the css's blkg and the * request_queue of the @bio. An association failure is handled by walking up * the blkg tree. Therefore, the blkg associated can be anything between @blkg - * and q->root_blkg. This situation only happens when a cgroup is dying and + * and disk->root_blkg. This situation only happens when a cgroup is dying and * then the remaining bios will spill to the closest alive blkg. * * A reference will be taken on the blkg and will be released when @bio is @@ -1974,8 +1959,8 @@ void bio_associate_blkg_from_css(struct bio *bio, if (css && css->parent) { bio->bi_blkg = blkg_tryget_closest(bio, css); } else { - blkg_get(bdev_get_queue(bio->bi_bdev)->root_blkg); - bio->bi_blkg = bdev_get_queue(bio->bi_bdev)->root_blkg; + blkg_get(bio->bi_bdev->bd_disk->root_blkg); + bio->bi_blkg = bio->bi_bdev->bd_disk->root_blkg; } } EXPORT_SYMBOL_GPL(bio_associate_blkg_from_css); |