diff --git a/block/bio-integrity.c b/block/bio-integrity.c index d25114715459..0827b19820c5 100644 --- a/block/bio-integrity.c +++ b/block/bio-integrity.c @@ -373,7 +373,7 @@ void bio_integrity_advance(struct bio *bio, unsigned int bytes_done) struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk); unsigned bytes = bio_integrity_bytes(bi, bytes_done >> 9); - bip->bip_iter.bi_sector += bytes_done >> 9; + bip->bip_iter.bi_sector += bio_integrity_intervals(bi, bytes_done >> 9); bvec_iter_advance(bip->bip_vec, &bip->bip_iter, bytes); } diff --git a/block/bio.c b/block/bio.c index cd38db6651d2..6c916766868c 100644 --- a/block/bio.c +++ b/block/bio.c @@ -569,7 +569,8 @@ static void bio_truncate(struct bio *bio, unsigned new_size) offset = new_size - done; else offset = 0; - zero_user(bv.bv_page, offset, bv.bv_len - offset); + zero_user(bv.bv_page, bv.bv_offset + offset, + bv.bv_len - offset); truncated = true; } done += bv.bv_len; diff --git a/block/blk-core.c b/block/blk-core.c index 6d74c1fc5d70..f35c5fa9eabd 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -864,17 +864,21 @@ end_io: static void __submit_bio_fops(struct gendisk *disk, struct bio *bio) { - if (unlikely(bio_queue_enter(bio) != 0)) - return; - if (submit_bio_checks(bio) && blk_crypto_bio_prep(&bio)) - disk->fops->submit_bio(bio); - blk_queue_exit(disk->queue); + if (blk_crypto_bio_prep(&bio)) { + if (likely(bio_queue_enter(bio) == 0)) { + disk->fops->submit_bio(bio); + blk_queue_exit(disk->queue); + } + } } static void __submit_bio(struct bio *bio) { struct gendisk *disk = bio->bi_bdev->bd_disk; + if (unlikely(!submit_bio_checks(bio))) + return; + if (!disk->fops->submit_bio) blk_mq_submit_bio(bio); else diff --git a/block/blk-flush.c b/block/blk-flush.c index 1fce6d16e6d3..b5b4a26ef092 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -235,8 +235,10 @@ static void flush_end_io(struct request *flush_rq, blk_status_t error) * avoiding use-after-free. */ WRITE_ONCE(flush_rq->state, MQ_RQ_IDLE); - if (fq->rq_status != BLK_STS_OK) + if (fq->rq_status != BLK_STS_OK) { error = fq->rq_status; + fq->rq_status = BLK_STS_OK; + } if (!q->elevator) { flush_rq->tag = BLK_MQ_NO_TAG; diff --git a/block/blk-ia-ranges.c b/block/blk-ia-ranges.c index b925f3db3ab7..18c68d8b9138 100644 --- a/block/blk-ia-ranges.c +++ b/block/blk-ia-ranges.c @@ -144,7 +144,7 @@ int disk_register_independent_access_ranges(struct gendisk *disk, &q->kobj, "%s", "independent_access_ranges"); if (ret) { q->ia_ranges = NULL; - kfree(iars); + kobject_put(&iars->kobj); return ret; } diff --git a/block/blk-iocost.c b/block/blk-iocost.c index a5b37cc65b17..769b64394298 100644 --- a/block/blk-iocost.c +++ b/block/blk-iocost.c @@ -2311,7 +2311,14 @@ static void ioc_timer_fn(struct timer_list *timer) hwm = current_hweight_max(iocg); new_hwi = hweight_after_donation(iocg, old_hwi, hwm, usage, &now); - if (new_hwi < hwm) { + /* + * Donation calculation assumes hweight_after_donation + * to be positive, a condition that a donor w/ hwa < 2 + * can't meet. Don't bother with donation if hwa is + * below 2. It's not gonna make a meaningful difference + * anyway. + */ + if (new_hwi < hwm && hwa >= 2) { iocg->hweight_donating = hwa; iocg->hweight_after_donation = new_hwi; list_add(&iocg->surplus_list, &surpluses); diff --git a/block/blk-merge.c b/block/blk-merge.c index 893c1a60b701..ba761c3f482b 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -1067,7 +1067,6 @@ static enum bio_merge_status blk_attempt_bio_merge(struct request_queue *q, * @q: request_queue new bio is being queued at * @bio: new bio being queued * @nr_segs: number of segments in @bio - * @same_queue_rq: output value, will be true if there's an existing request * from the passed in @q already in the plug list * * Determine whether @bio being queued on @q can be merged with the previous @@ -1084,7 +1083,7 @@ static enum bio_merge_status blk_attempt_bio_merge(struct request_queue *q, * Caller must ensure !blk_queue_nomerges(q) beforehand. */ bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio, - unsigned int nr_segs, bool *same_queue_rq) + unsigned int nr_segs) { struct blk_plug *plug; struct request *rq; @@ -1096,12 +1095,6 @@ bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio, /* check the previously added entry for a quick merge attempt */ rq = rq_list_peek(&plug->mq_list); if (rq->q == q) { - /* - * Only blk-mq multiple hardware queues case checks the rq in - * the same queue, there should be only one such rq in a queue - */ - *same_queue_rq = true; - if (blk_attempt_bio_merge(q, rq, bio, nr_segs, false) == BIO_MERGE_OK) return true; diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index 995336abee33..af38be47ce68 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -16,6 +16,21 @@ #include "blk-mq-sched.h" #include "blk-mq-tag.h" +/* + * Recalculate wakeup batch when tag is shared by hctx. + */ +static void blk_mq_update_wake_batch(struct blk_mq_tags *tags, + unsigned int users) +{ + if (!users) + return; + + sbitmap_queue_recalculate_wake_batch(&tags->bitmap_tags, + users); + sbitmap_queue_recalculate_wake_batch(&tags->breserved_tags, + users); +} + /* * If a previously inactive queue goes active, bump the active user count. * We need to do this before try to allocate driver tag, then even if fail @@ -24,18 +39,26 @@ */ bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx) { + unsigned int users; + if (blk_mq_is_shared_tags(hctx->flags)) { struct request_queue *q = hctx->queue; - if (!test_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags) && - !test_and_set_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags)) - atomic_inc(&hctx->tags->active_queues); + if (test_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags) || + test_and_set_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags)) { + return true; + } } else { - if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state) && - !test_and_set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state)) - atomic_inc(&hctx->tags->active_queues); + if (test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state) || + test_and_set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state)) { + return true; + } } + users = atomic_inc_return(&hctx->tags->active_queues); + + blk_mq_update_wake_batch(hctx->tags, users); + return true; } @@ -56,6 +79,7 @@ void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool include_reserve) void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx) { struct blk_mq_tags *tags = hctx->tags; + unsigned int users; if (blk_mq_is_shared_tags(hctx->flags)) { struct request_queue *q = hctx->queue; @@ -68,7 +92,9 @@ void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx) return; } - atomic_dec(&tags->active_queues); + users = atomic_dec_return(&tags->active_queues); + + blk_mq_update_wake_batch(tags, users); blk_mq_tag_wakeup_all(tags, false); } diff --git a/block/blk-mq.c b/block/blk-mq.c index 695dc12282d9..a1092c7e6ab2 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2497,11 +2497,10 @@ static inline unsigned short blk_plug_max_rq_count(struct blk_plug *plug) } static bool blk_mq_attempt_bio_merge(struct request_queue *q, - struct bio *bio, unsigned int nr_segs, - bool *same_queue_rq) + struct bio *bio, unsigned int nr_segs) { if (!blk_queue_nomerges(q) && bio_mergeable(bio)) { - if (blk_attempt_plug_merge(q, bio, nr_segs, same_queue_rq)) + if (blk_attempt_plug_merge(q, bio, nr_segs)) return true; if (blk_mq_sched_bio_merge(q, bio, nr_segs)) return true; @@ -2511,9 +2510,7 @@ static bool blk_mq_attempt_bio_merge(struct request_queue *q, static struct request *blk_mq_get_new_requests(struct request_queue *q, struct blk_plug *plug, - struct bio *bio, - unsigned int nsegs, - bool *same_queue_rq) + struct bio *bio) { struct blk_mq_alloc_data data = { .q = q, @@ -2522,11 +2519,9 @@ static struct request *blk_mq_get_new_requests(struct request_queue *q, }; struct request *rq; - if (blk_mq_attempt_bio_merge(q, bio, nsegs, same_queue_rq)) + if (unlikely(bio_queue_enter(bio))) return NULL; - rq_qos_throttle(q, bio); - if (plug) { data.nr_tags = plug->nr_ios; plug->nr_ios = 1; @@ -2534,68 +2529,40 @@ static struct request *blk_mq_get_new_requests(struct request_queue *q, } rq = __blk_mq_alloc_requests(&data); - if (rq) - return rq; + if (!rq) + goto fail; + return rq; +fail: rq_qos_cleanup(q, bio); if (bio->bi_opf & REQ_NOWAIT) bio_wouldblock_error(bio); - - return NULL; -} - -static inline bool blk_mq_can_use_cached_rq(struct request *rq, struct bio *bio) -{ - if (blk_mq_get_hctx_type(bio->bi_opf) != rq->mq_hctx->type) - return false; - - if (op_is_flush(rq->cmd_flags) != op_is_flush(bio->bi_opf)) - return false; - - return true; -} - -static inline struct request *blk_mq_get_request(struct request_queue *q, - struct blk_plug *plug, - struct bio *bio, - unsigned int nsegs, - bool *same_queue_rq) -{ - struct request *rq; - bool checked = false; - - if (plug) { - rq = rq_list_peek(&plug->cached_rq); - if (rq && rq->q == q) { - if (unlikely(!submit_bio_checks(bio))) - return NULL; - if (blk_mq_attempt_bio_merge(q, bio, nsegs, - same_queue_rq)) - return NULL; - checked = true; - if (!blk_mq_can_use_cached_rq(rq, bio)) - goto fallback; - rq->cmd_flags = bio->bi_opf; - plug->cached_rq = rq_list_next(rq); - INIT_LIST_HEAD(&rq->queuelist); - rq_qos_throttle(q, bio); - return rq; - } - } - -fallback: - if (unlikely(bio_queue_enter(bio))) - return NULL; - if (unlikely(!checked && !submit_bio_checks(bio))) - goto out_put; - rq = blk_mq_get_new_requests(q, plug, bio, nsegs, same_queue_rq); - if (rq) - return rq; -out_put: blk_queue_exit(q); return NULL; } +static inline struct request *blk_mq_get_cached_request(struct request_queue *q, + struct blk_plug *plug, struct bio *bio) +{ + struct request *rq; + + if (!plug) + return NULL; + rq = rq_list_peek(&plug->cached_rq); + if (!rq || rq->q != q) + return NULL; + + if (blk_mq_get_hctx_type(bio->bi_opf) != rq->mq_hctx->type) + return NULL; + if (op_is_flush(rq->cmd_flags) != op_is_flush(bio->bi_opf)) + return NULL; + + rq->cmd_flags = bio->bi_opf; + plug->cached_rq = rq_list_next(rq); + INIT_LIST_HEAD(&rq->queuelist); + return rq; +} + /** * blk_mq_submit_bio - Create and send a request to block device. * @bio: Bio pointer. @@ -2612,10 +2579,9 @@ out_put: void blk_mq_submit_bio(struct bio *bio) { struct request_queue *q = bdev_get_queue(bio->bi_bdev); + struct blk_plug *plug = blk_mq_plug(q, bio); const int is_sync = op_is_sync(bio->bi_opf); struct request *rq; - struct blk_plug *plug; - bool same_queue_rq = false; unsigned int nr_segs = 1; blk_status_t ret; @@ -2629,11 +2595,18 @@ void blk_mq_submit_bio(struct bio *bio) if (!bio_integrity_prep(bio)) return; - plug = blk_mq_plug(q, bio); - rq = blk_mq_get_request(q, plug, bio, nr_segs, &same_queue_rq); - if (unlikely(!rq)) + if (blk_mq_attempt_bio_merge(q, bio, nr_segs)) return; + rq_qos_throttle(q, bio); + + rq = blk_mq_get_cached_request(q, plug, bio); + if (!rq) { + rq = blk_mq_get_new_requests(q, plug, bio); + if (unlikely(!rq)) + return; + } + trace_block_getrq(bio); rq_qos_track(q, rq, bio); @@ -2653,16 +2626,7 @@ void blk_mq_submit_bio(struct bio *bio) return; } - if (plug && (q->nr_hw_queues == 1 || - blk_mq_is_shared_tags(rq->mq_hctx->flags) || - q->mq_ops->commit_rqs || !blk_queue_nonrot(q))) { - /* - * Use plugging if we have a ->commit_rqs() hook as well, as - * we know the driver uses bd->last in a smart fashion. - * - * Use normal plugging if this disk is slow HDD, as sequential - * IO may benefit a lot from plug merging. - */ + if (plug) { unsigned int request_count = plug->rq_count; struct request *last = NULL; @@ -2680,40 +2644,12 @@ void blk_mq_submit_bio(struct bio *bio) } blk_add_rq_to_plug(plug, rq); - } else if (rq->rq_flags & RQF_ELV) { - /* Insert the request at the IO scheduler queue */ + } else if ((rq->rq_flags & RQF_ELV) || + (rq->mq_hctx->dispatch_busy && + (q->nr_hw_queues == 1 || !is_sync))) { blk_mq_sched_insert_request(rq, false, true, true); - } else if (plug && !blk_queue_nomerges(q)) { - struct request *next_rq = NULL; - - /* - * We do limited plugging. If the bio can be merged, do that. - * Otherwise the existing request in the plug list will be - * issued. So the plug list will have one request at most - * The plug list might get flushed before this. If that happens, - * the plug list is empty, and same_queue_rq is invalid. - */ - if (same_queue_rq) { - next_rq = rq_list_pop(&plug->mq_list); - plug->rq_count--; - } - blk_add_rq_to_plug(plug, rq); - trace_block_plug(q); - - if (next_rq) { - trace_block_unplug(q, 1, true); - blk_mq_try_issue_directly(next_rq->mq_hctx, next_rq); - } - } else if ((q->nr_hw_queues > 1 && is_sync) || - !rq->mq_hctx->dispatch_busy) { - /* - * There is no scheduler and we can try to send directly - * to the hardware. - */ - blk_mq_try_issue_directly(rq->mq_hctx, rq); } else { - /* Default case. */ - blk_mq_sched_insert_request(rq, false, true, true); + blk_mq_try_issue_directly(rq->mq_hctx, rq); } } diff --git a/block/blk.h b/block/blk.h index ccde6e6f1736..722036471ced 100644 --- a/block/blk.h +++ b/block/blk.h @@ -253,7 +253,7 @@ void blk_add_timer(struct request *req); void blk_print_req_error(struct request *req, blk_status_t status); bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio, - unsigned int nr_segs, bool *same_queue_rq); + unsigned int nr_segs); bool blk_bio_list_merge(struct request_queue *q, struct list_head *list, struct bio *bio, unsigned int nr_segs); diff --git a/block/fops.c b/block/fops.c index 18d963bcb613..c953737dbae9 100644 --- a/block/fops.c +++ b/block/fops.c @@ -340,8 +340,7 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb, } else { ret = bio_iov_iter_get_pages(bio, iter); if (unlikely(ret)) { - bio->bi_status = BLK_STS_IOERR; - bio_endio(bio); + bio_put(bio); return ret; } } diff --git a/block/ioprio.c b/block/ioprio.c index 313c14a70bbd..6f01d35a5145 100644 --- a/block/ioprio.c +++ b/block/ioprio.c @@ -220,6 +220,7 @@ SYSCALL_DEFINE2(ioprio_get, int, which, int, who) pgrp = task_pgrp(current); else pgrp = find_vpid(who); + read_lock(&tasklist_lock); do_each_pid_thread(pgrp, PIDTYPE_PGID, p) { tmpio = get_task_ioprio(p); if (tmpio < 0) @@ -229,6 +230,8 @@ SYSCALL_DEFINE2(ioprio_get, int, which, int, who) else ret = ioprio_best(ret, tmpio); } while_each_pid_thread(pgrp, PIDTYPE_PGID, p); + read_unlock(&tasklist_lock); + break; case IOPRIO_WHO_USER: uid = make_kuid(current_user_ns(), who); diff --git a/block/mq-deadline.c b/block/mq-deadline.c index 85d919bf60c7..3ed5eaf3446a 100644 --- a/block/mq-deadline.c +++ b/block/mq-deadline.c @@ -865,7 +865,7 @@ SHOW_JIFFIES(deadline_write_expire_show, dd->fifo_expire[DD_WRITE]); SHOW_JIFFIES(deadline_prio_aging_expire_show, dd->prio_aging_expire); SHOW_INT(deadline_writes_starved_show, dd->writes_starved); SHOW_INT(deadline_front_merges_show, dd->front_merges); -SHOW_INT(deadline_async_depth_show, dd->front_merges); +SHOW_INT(deadline_async_depth_show, dd->async_depth); SHOW_INT(deadline_fifo_batch_show, dd->fifo_batch); #undef SHOW_INT #undef SHOW_JIFFIES @@ -895,7 +895,7 @@ STORE_JIFFIES(deadline_write_expire_store, &dd->fifo_expire[DD_WRITE], 0, INT_MA STORE_JIFFIES(deadline_prio_aging_expire_store, &dd->prio_aging_expire, 0, INT_MAX); STORE_INT(deadline_writes_starved_store, &dd->writes_starved, INT_MIN, INT_MAX); STORE_INT(deadline_front_merges_store, &dd->front_merges, 0, 1); -STORE_INT(deadline_async_depth_store, &dd->front_merges, 1, INT_MAX); +STORE_INT(deadline_async_depth_store, &dd->async_depth, 1, INT_MAX); STORE_INT(deadline_fifo_batch_store, &dd->fifo_batch, 0, INT_MAX); #undef STORE_FUNCTION #undef STORE_INT diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c index 323af5c9c802..e23aac1a83f7 100644 --- a/drivers/block/null_blk/main.c +++ b/drivers/block/null_blk/main.c @@ -340,9 +340,9 @@ static int nullb_update_nr_hw_queues(struct nullb_device *dev, return 0; /* - * Make sure at least one queue exists for each of submit and poll. + * Make sure at least one submit queue exists. */ - if (!submit_queues || !poll_queues) + if (!submit_queues) return -EINVAL; /* @@ -1891,7 +1891,7 @@ static int null_init_tag_set(struct nullb *nullb, struct blk_mq_tag_set *set) if (g_shared_tag_bitmap) set->flags |= BLK_MQ_F_TAG_HCTX_SHARED; set->driver_data = nullb; - if (g_poll_queues) + if (poll_queues) set->nr_maps = 3; else set->nr_maps = 1; @@ -1918,8 +1918,6 @@ static int null_validate_conf(struct nullb_device *dev) if (dev->poll_queues > g_poll_queues) dev->poll_queues = g_poll_queues; - else if (dev->poll_queues == 0) - dev->poll_queues = 1; dev->prev_poll_queues = dev->poll_queues; dev->queue_mode = min_t(unsigned int, dev->queue_mode, NULL_Q_MQ); diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h index 4a6ff274335a..4dfa7bc8151e 100644 --- a/include/linux/sbitmap.h +++ b/include/linux/sbitmap.h @@ -406,6 +406,17 @@ static inline void sbitmap_queue_free(struct sbitmap_queue *sbq) sbitmap_free(&sbq->sb); } +/** + * sbitmap_queue_recalculate_wake_batch() - Recalculate wake batch + * @sbq: Bitmap queue to recalculate wake batch. + * @users: Number of shares. + * + * Like sbitmap_queue_update_wake_batch(), this will calculate wake batch + * by depth. This interface is for HCTX shared tags or queue shared tags. + */ +void sbitmap_queue_recalculate_wake_batch(struct sbitmap_queue *sbq, + unsigned int users); + /** * sbitmap_queue_resize() - Resize a &struct sbitmap_queue. * @sbq: Bitmap queue to resize. diff --git a/lib/sbitmap.c b/lib/sbitmap.c index 2709ab825499..09d293c30fd2 100644 --- a/lib/sbitmap.c +++ b/lib/sbitmap.c @@ -457,10 +457,9 @@ int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth, } EXPORT_SYMBOL_GPL(sbitmap_queue_init_node); -static void sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq, - unsigned int depth) +static inline void __sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq, + unsigned int wake_batch) { - unsigned int wake_batch = sbq_calc_wake_batch(sbq, depth); int i; if (sbq->wake_batch != wake_batch) { @@ -476,6 +475,30 @@ static void sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq, } } +static void sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq, + unsigned int depth) +{ + unsigned int wake_batch; + + wake_batch = sbq_calc_wake_batch(sbq, depth); + __sbitmap_queue_update_wake_batch(sbq, wake_batch); +} + +void sbitmap_queue_recalculate_wake_batch(struct sbitmap_queue *sbq, + unsigned int users) +{ + unsigned int wake_batch; + unsigned int min_batch; + unsigned int depth = (sbq->sb.depth + users - 1) / users; + + min_batch = sbq->sb.depth >= (4 * SBQ_WAIT_QUEUES) ? 4 : 1; + + wake_batch = clamp_val(depth / SBQ_WAIT_QUEUES, + min_batch, SBQ_WAKE_BATCH); + __sbitmap_queue_update_wake_batch(sbq, wake_batch); +} +EXPORT_SYMBOL_GPL(sbitmap_queue_recalculate_wake_batch); + void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth) { sbitmap_queue_update_wake_batch(sbq, depth);