From ab42f35d9cb5ac49b5a2a11f940e74f58f207280 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Fri, 26 May 2017 19:53:19 +0800
Subject: blk-mq: merge bio into sw queue before plugging

Before blk-mq is introduced, I/O is merged to elevator
before being putted into plug queue, but blk-mq changed the
order and makes merging to sw queue basically impossible.
Then it is observed that throughput of sequential I/O is degraded
about 10%~20% on virtio-blk in the test[1] if mq-deadline isn't used.

This patch moves the bio merging per sw queue before plugging,
like what blk_queue_bio() does, and the performance regression is
fixed under this situation.

[1]. test script:
sudo fio --direct=1 --size=128G --bsrange=4k-4k --runtime=40 --numjobs=16 --ioengine=libaio --iodepth=64 --group_reporting=1 --filename=/dev/vdb --name=virtio_blk-test-$RW --rw=$RW --output-format=json

RW=read or write

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq.c | 48 ++++++++++++++++++++++++++----------------------
 1 file changed, 26 insertions(+), 22 deletions(-)

(limited to 'block')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index f2224ffd225d..fd8244cf50a4 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1427,30 +1427,30 @@ static inline bool hctx_allow_merges(struct blk_mq_hw_ctx *hctx)
 		!blk_queue_nomerges(hctx->queue);
 }
 
-static inline bool blk_mq_merge_queue_io(struct blk_mq_hw_ctx *hctx,
-					 struct blk_mq_ctx *ctx,
-					 struct request *rq, struct bio *bio)
+/* attempt to merge bio into current sw queue */
+static inline bool blk_mq_merge_bio(struct request_queue *q, struct bio *bio)
 {
-	if (!hctx_allow_merges(hctx) || !bio_mergeable(bio)) {
-		blk_mq_bio_to_request(rq, bio);
-		spin_lock(&ctx->lock);
-insert_rq:
-		__blk_mq_insert_request(hctx, rq, false);
-		spin_unlock(&ctx->lock);
-		return false;
-	} else {
-		struct request_queue *q = hctx->queue;
+	bool ret = false;
+	struct blk_mq_ctx *ctx = blk_mq_get_ctx(q);
+	struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
 
+	if (hctx_allow_merges(hctx) && bio_mergeable(bio)) {
 		spin_lock(&ctx->lock);
-		if (!blk_mq_attempt_merge(q, ctx, bio)) {
-			blk_mq_bio_to_request(rq, bio);
-			goto insert_rq;
-		}
-
+		ret = blk_mq_attempt_merge(q, ctx, bio);
 		spin_unlock(&ctx->lock);
-		__blk_mq_finish_request(hctx, ctx, rq);
-		return true;
 	}
+
+	blk_mq_put_ctx(ctx);
+	return ret;
+}
+
+static inline void blk_mq_queue_io(struct blk_mq_hw_ctx *hctx,
+				   struct blk_mq_ctx *ctx,
+				   struct request *rq)
+{
+	spin_lock(&ctx->lock);
+	__blk_mq_insert_request(hctx, rq, false);
+	spin_unlock(&ctx->lock);
 }
 
 static blk_qc_t request_to_qc_t(struct blk_mq_hw_ctx *hctx, struct request *rq)
@@ -1549,6 +1549,9 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
 	if (blk_mq_sched_bio_merge(q, bio))
 		return BLK_QC_T_NONE;
 
+	if (blk_mq_merge_bio(q, bio))
+		return BLK_QC_T_NONE;
+
 	wb_acct = wbt_wait(q->rq_wb, bio, NULL);
 
 	trace_block_getrq(q, bio, bio->bi_opf);
@@ -1630,11 +1633,12 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
 		blk_mq_put_ctx(data.ctx);
 		blk_mq_bio_to_request(rq, bio);
 		blk_mq_sched_insert_request(rq, false, true, true, true);
-	} else if (!blk_mq_merge_queue_io(data.hctx, data.ctx, rq, bio)) {
+	} else {
 		blk_mq_put_ctx(data.ctx);
+		blk_mq_bio_to_request(rq, bio);
+		blk_mq_queue_io(data.hctx, data.ctx, rq);
 		blk_mq_run_hw_queue(data.hctx, true);
-	} else
-		blk_mq_put_ctx(data.ctx);
+	}
 
 	return cookie;
 }
-- 
cgit v1.2.3


From 9bddeb2a5b981507cbe2d7bdb545c32f204109c7 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Fri, 26 May 2017 19:53:20 +0800
Subject: blk-mq: make per-sw-queue bio merge as default .bio_merge

Because what the per-sw-queue bio merge does is basically same with
scheduler's .bio_merge(), this patch makes per-sw-queue bio merge
as the default .bio_merge if no scheduler is used or io scheduler
doesn't provide .bio_merge().

Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq-sched.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++----
 block/blk-mq-sched.h |  4 +---
 block/blk-mq.c       | 64 ----------------------------------------------------
 3 files changed, 58 insertions(+), 72 deletions(-)

(limited to 'block')

diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 1f5b692526ae..c4e2afb9d12d 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -221,19 +221,71 @@ bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio,
 }
 EXPORT_SYMBOL_GPL(blk_mq_sched_try_merge);
 
+/*
+ * Reverse check our software queue for entries that we could potentially
+ * merge with. Currently includes a hand-wavy stop count of 8, to not spend
+ * too much time checking for merges.
+ */
+static bool blk_mq_attempt_merge(struct request_queue *q,
+				 struct blk_mq_ctx *ctx, struct bio *bio)
+{
+	struct request *rq;
+	int checked = 8;
+
+	list_for_each_entry_reverse(rq, &ctx->rq_list, queuelist) {
+		bool merged = false;
+
+		if (!checked--)
+			break;
+
+		if (!blk_rq_merge_ok(rq, bio))
+			continue;
+
+		switch (blk_try_merge(rq, bio)) {
+		case ELEVATOR_BACK_MERGE:
+			if (blk_mq_sched_allow_merge(q, rq, bio))
+				merged = bio_attempt_back_merge(q, rq, bio);
+			break;
+		case ELEVATOR_FRONT_MERGE:
+			if (blk_mq_sched_allow_merge(q, rq, bio))
+				merged = bio_attempt_front_merge(q, rq, bio);
+			break;
+		case ELEVATOR_DISCARD_MERGE:
+			merged = bio_attempt_discard_merge(q, rq, bio);
+			break;
+		default:
+			continue;
+		}
+
+		if (merged)
+			ctx->rq_merged++;
+		return merged;
+	}
+
+	return false;
+}
+
 bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio)
 {
 	struct elevator_queue *e = q->elevator;
+	struct blk_mq_ctx *ctx = blk_mq_get_ctx(q);
+	struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
+	bool ret = false;
 
-	if (e->type->ops.mq.bio_merge) {
-		struct blk_mq_ctx *ctx = blk_mq_get_ctx(q);
-		struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
-
+	if (e && e->type->ops.mq.bio_merge) {
 		blk_mq_put_ctx(ctx);
 		return e->type->ops.mq.bio_merge(hctx, bio);
 	}
 
-	return false;
+	if (hctx->flags & BLK_MQ_F_SHOULD_MERGE) {
+		/* default per sw-queue merge */
+		spin_lock(&ctx->lock);
+		ret = blk_mq_attempt_merge(q, ctx, bio);
+		spin_unlock(&ctx->lock);
+	}
+
+	blk_mq_put_ctx(ctx);
+	return ret;
 }
 
 bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq)
diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h
index edafb5383b7b..b87e5be5db8c 100644
--- a/block/blk-mq-sched.h
+++ b/block/blk-mq-sched.h
@@ -38,9 +38,7 @@ int blk_mq_sched_init(struct request_queue *q);
 static inline bool
 blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio)
 {
-	struct elevator_queue *e = q->elevator;
-
-	if (!e || blk_queue_nomerges(q) || !bio_mergeable(bio))
+	if (blk_queue_nomerges(q) || !bio_mergeable(bio))
 		return false;
 
 	return __blk_mq_sched_bio_merge(q, bio);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index fd8244cf50a4..22438d5036a3 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -753,50 +753,6 @@ static void blk_mq_timeout_work(struct work_struct *work)
 	blk_queue_exit(q);
 }
 
-/*
- * Reverse check our software queue for entries that we could potentially
- * merge with. Currently includes a hand-wavy stop count of 8, to not spend
- * too much time checking for merges.
- */
-static bool blk_mq_attempt_merge(struct request_queue *q,
-				 struct blk_mq_ctx *ctx, struct bio *bio)
-{
-	struct request *rq;
-	int checked = 8;
-
-	list_for_each_entry_reverse(rq, &ctx->rq_list, queuelist) {
-		bool merged = false;
-
-		if (!checked--)
-			break;
-
-		if (!blk_rq_merge_ok(rq, bio))
-			continue;
-
-		switch (blk_try_merge(rq, bio)) {
-		case ELEVATOR_BACK_MERGE:
-			if (blk_mq_sched_allow_merge(q, rq, bio))
-				merged = bio_attempt_back_merge(q, rq, bio);
-			break;
-		case ELEVATOR_FRONT_MERGE:
-			if (blk_mq_sched_allow_merge(q, rq, bio))
-				merged = bio_attempt_front_merge(q, rq, bio);
-			break;
-		case ELEVATOR_DISCARD_MERGE:
-			merged = bio_attempt_discard_merge(q, rq, bio);
-			break;
-		default:
-			continue;
-		}
-
-		if (merged)
-			ctx->rq_merged++;
-		return merged;
-	}
-
-	return false;
-}
-
 struct flush_busy_ctx_data {
 	struct blk_mq_hw_ctx *hctx;
 	struct list_head *list;
@@ -1427,23 +1383,6 @@ static inline bool hctx_allow_merges(struct blk_mq_hw_ctx *hctx)
 		!blk_queue_nomerges(hctx->queue);
 }
 
-/* attempt to merge bio into current sw queue */
-static inline bool blk_mq_merge_bio(struct request_queue *q, struct bio *bio)
-{
-	bool ret = false;
-	struct blk_mq_ctx *ctx = blk_mq_get_ctx(q);
-	struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
-
-	if (hctx_allow_merges(hctx) && bio_mergeable(bio)) {
-		spin_lock(&ctx->lock);
-		ret = blk_mq_attempt_merge(q, ctx, bio);
-		spin_unlock(&ctx->lock);
-	}
-
-	blk_mq_put_ctx(ctx);
-	return ret;
-}
-
 static inline void blk_mq_queue_io(struct blk_mq_hw_ctx *hctx,
 				   struct blk_mq_ctx *ctx,
 				   struct request *rq)
@@ -1549,9 +1488,6 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
 	if (blk_mq_sched_bio_merge(q, bio))
 		return BLK_QC_T_NONE;
 
-	if (blk_mq_merge_bio(q, bio))
-		return BLK_QC_T_NONE;
-
 	wb_acct = wbt_wait(q->rq_wb, bio, NULL);
 
 	trace_block_getrq(q, bio, bio->bi_opf);
-- 
cgit v1.2.3


From 03ea8ad78cfb2910862c8dfcd2a627fc04097db2 Mon Sep 17 00:00:00 2001
From: Matthias Kaehlcke <mka@chromium.org>
Date: Fri, 26 May 2017 14:22:37 -0700
Subject: cfq-iosched: Delete unused function min_vdisktime()

This fixes the following warning when building with clang:

    block/cfq-iosched.c:970:19: error: unused function 'min_vdisktime'
        [-Werror,-Wunused-function]

Signed-off-by: Matthias Kaehlcke <mka@chromium.org>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/cfq-iosched.c | 9 ---------
 1 file changed, 9 deletions(-)

(limited to 'block')

diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index da69b079725f..f57bc7d5c483 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -978,15 +978,6 @@ static inline u64 max_vdisktime(u64 min_vdisktime, u64 vdisktime)
 	return min_vdisktime;
 }
 
-static inline u64 min_vdisktime(u64 min_vdisktime, u64 vdisktime)
-{
-	s64 delta = (s64)(vdisktime - min_vdisktime);
-	if (delta < 0)
-		min_vdisktime = vdisktime;
-
-	return min_vdisktime;
-}
-
 static void update_min_vdisktime(struct cfq_rb_root *st)
 {
 	struct cfq_group *cfqg;
-- 
cgit v1.2.3


From c0cb1c6d39060ce04470b10347b7b6f1df77bef5 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Thu, 1 Jun 2017 08:55:10 -0700
Subject: blk-mq-debugfs: Show atomic request flags

When analyzing e.g. queue lockups it is important to know whether
or not a request has already been started. Hence also show the
atomic request flags.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Eduardo Valentin <eduval@amazon.com>
Cc: Omar Sandoval <osandov@fb.com>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq-debugfs.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'block')

diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 803aed4d7221..d56ddd7a1285 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -267,6 +267,14 @@ static const char *const rqf_name[] = {
 };
 #undef RQF_NAME
 
+#define RQAF_NAME(name) [REQ_ATOM_##name] = #name
+static const char *const rqaf_name[] = {
+	RQAF_NAME(COMPLETE),
+	RQAF_NAME(STARTED),
+	RQAF_NAME(POLL_SLEPT),
+};
+#undef RQAF_NAME
+
 int __blk_mq_debugfs_rq_show(struct seq_file *m, struct request *rq)
 {
 	const struct blk_mq_ops *const mq_ops = rq->q->mq_ops;
@@ -283,6 +291,8 @@ int __blk_mq_debugfs_rq_show(struct seq_file *m, struct request *rq)
 	seq_puts(m, ", .rq_flags=");
 	blk_flags_show(m, (__force unsigned int)rq->rq_flags, rqf_name,
 		       ARRAY_SIZE(rqf_name));
+	seq_puts(m, ", .atomic_flags=");
+	blk_flags_show(m, rq->atomic_flags, rqaf_name, ARRAY_SIZE(rqaf_name));
 	seq_printf(m, ", .tag=%d, .internal_tag=%d", rq->tag,
 		   rq->internal_tag);
 	if (mq_ops->show_rq)
-- 
cgit v1.2.3


From 8ef1a191038c138d5675933cd69d47747d0d396b Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Thu, 1 Jun 2017 08:55:11 -0700
Subject: blk-mq-debugfs: Show requeue list

When verifying whether or not a blk-mq driver forgot to kick the
requeue list after having requeued a request it is important to
be able to verify the contents of the requeue list. Hence export
that list through debugfs.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Eduardo Valentin <eduval@amazon.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Omar Sandoval <osandov@fb.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq-debugfs.c | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

(limited to 'block')

diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index d56ddd7a1285..8b06a12c1461 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -308,6 +308,37 @@ int blk_mq_debugfs_rq_show(struct seq_file *m, void *v)
 }
 EXPORT_SYMBOL_GPL(blk_mq_debugfs_rq_show);
 
+static void *queue_requeue_list_start(struct seq_file *m, loff_t *pos)
+	__acquires(&q->requeue_lock)
+{
+	struct request_queue *q = m->private;
+
+	spin_lock_irq(&q->requeue_lock);
+	return seq_list_start(&q->requeue_list, *pos);
+}
+
+static void *queue_requeue_list_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	struct request_queue *q = m->private;
+
+	return seq_list_next(v, &q->requeue_list, pos);
+}
+
+static void queue_requeue_list_stop(struct seq_file *m, void *v)
+	__releases(&q->requeue_lock)
+{
+	struct request_queue *q = m->private;
+
+	spin_unlock_irq(&q->requeue_lock);
+}
+
+static const struct seq_operations queue_requeue_list_seq_ops = {
+	.start	= queue_requeue_list_start,
+	.next	= queue_requeue_list_next,
+	.stop	= queue_requeue_list_stop,
+	.show	= blk_mq_debugfs_rq_show,
+};
+
 static void *hctx_dispatch_start(struct seq_file *m, loff_t *pos)
 	__acquires(&hctx->lock)
 {
@@ -665,6 +696,7 @@ const struct file_operations blk_mq_debugfs_fops = {
 
 static const struct blk_mq_debugfs_attr blk_mq_debugfs_queue_attrs[] = {
 	{"poll_stat", 0400, queue_poll_stat_show},
+	{"requeue_list", 0400, .seq_ops = &queue_requeue_list_seq_ops},
 	{"state", 0600, queue_state_show, queue_state_write},
 	{},
 };
-- 
cgit v1.2.3


From 2720bab50258782573df0f536681bece11e784f0 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Thu, 1 Jun 2017 08:55:12 -0700
Subject: blk-mq-debugfs: Show busy requests

Requests that got stuck in a block driver are neither on
blk_mq_ctx.rq_list nor on any hw dispatch queue. Make these
visible in debugfs through the "busy" attribute.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Reviewed-by: Eduardo Valentin <eduval@amazon.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Hannes Reinecke <hare@suse.com>
Cc: Omar Sandoval <osandov@fb.com>
Cc: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq-debugfs.c | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

(limited to 'block')

diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 8b06a12c1461..90c454bbaf92 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -370,6 +370,36 @@ static const struct seq_operations hctx_dispatch_seq_ops = {
 	.show	= blk_mq_debugfs_rq_show,
 };
 
+struct show_busy_params {
+	struct seq_file		*m;
+	struct blk_mq_hw_ctx	*hctx;
+};
+
+/*
+ * Note: the state of a request may change while this function is in progress,
+ * e.g. due to a concurrent blk_mq_finish_request() call.
+ */
+static void hctx_show_busy_rq(struct request *rq, void *data, bool reserved)
+{
+	const struct show_busy_params *params = data;
+
+	if (blk_mq_map_queue(rq->q, rq->mq_ctx->cpu) == params->hctx &&
+	    test_bit(REQ_ATOM_STARTED, &rq->atomic_flags))
+		__blk_mq_debugfs_rq_show(params->m,
+					 list_entry_rq(&rq->queuelist));
+}
+
+static int hctx_busy_show(void *data, struct seq_file *m)
+{
+	struct blk_mq_hw_ctx *hctx = data;
+	struct show_busy_params params = { .m = m, .hctx = hctx };
+
+	blk_mq_tagset_busy_iter(hctx->queue->tag_set, hctx_show_busy_rq,
+				&params);
+
+	return 0;
+}
+
 static int hctx_ctx_map_show(void *data, struct seq_file *m)
 {
 	struct blk_mq_hw_ctx *hctx = data;
@@ -705,6 +735,7 @@ static const struct blk_mq_debugfs_attr blk_mq_debugfs_hctx_attrs[] = {
 	{"state", 0400, hctx_state_show},
 	{"flags", 0400, hctx_flags_show},
 	{"dispatch", 0400, .seq_ops = &hctx_dispatch_seq_ops},
+	{"busy", 0400, hctx_busy_show},
 	{"ctx_map", 0400, hctx_ctx_map_show},
 	{"tags", 0400, hctx_tags_show},
 	{"tags_bitmap", 0400, hctx_tags_bitmap_show},
-- 
cgit v1.2.3


From edea55abb86ff67afda96120d6254290e7a75d9e Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Thu, 1 Jun 2017 08:55:13 -0700
Subject: blk-mq-debugfs: Add 'kick' operation

Running a queue causes the block layer to examine the per-CPU and
hw queues but not the requeue list. Hence add a 'kick' operation
that also examines the requeue list.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Eduardo Valentin <eduval@amazon.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Hannes Reinecke <hare@suse.com>
Cc: Omar Sandoval <osandov@fb.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq-debugfs.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'block')

diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 90c454bbaf92..9edebbdce0bd 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -114,10 +114,12 @@ static ssize_t queue_state_write(void *data, const char __user *buf,
 		blk_mq_run_hw_queues(q, true);
 	} else if (strcmp(op, "start") == 0) {
 		blk_mq_start_stopped_hw_queues(q, true);
+	} else if (strcmp(op, "kick") == 0) {
+		blk_mq_kick_requeue_list(q);
 	} else {
 		pr_err("%s: unsupported operation '%s'\n", __func__, op);
 inval:
-		pr_err("%s: use either 'run' or 'start'\n", __func__);
+		pr_err("%s: use 'run', 'start' or 'kick'\n", __func__);
 		return -EINVAL;
 	}
 	return count;
-- 
cgit v1.2.3


From 9efc160f4bbd69b17b48edec53067537d04e62b7 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Wed, 31 May 2017 14:43:46 -0700
Subject: block: Introduce queue flag QUEUE_FLAG_SCSI_PASSTHROUGH

From the context where a SCSI command is submitted it is not always
possible to figure out whether or not the queue the command is
submitted to has struct scsi_request as the first member of its
private data. Hence introduce the flag QUEUE_FLAG_SCSI_PASSTHROUGH.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Cc: Omar Sandoval <osandov@fb.com>
Cc: Don Brace <don.brace@microsemi.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/bsg-lib.c                   | 1 +
 drivers/block/cciss.c             | 1 +
 drivers/ide/ide-probe.c           | 1 +
 drivers/scsi/scsi_lib.c           | 2 ++
 drivers/scsi/scsi_transport_sas.c | 1 +
 include/linux/blkdev.h            | 3 +++
 6 files changed, 9 insertions(+)

(limited to 'block')

diff --git a/block/bsg-lib.c b/block/bsg-lib.c
index 0a23dbba2d30..9b91daefcd9b 100644
--- a/block/bsg-lib.c
+++ b/block/bsg-lib.c
@@ -246,6 +246,7 @@ struct request_queue *bsg_setup_queue(struct device *dev, char *name,
 	q->bsg_job_size = dd_job_size;
 	q->bsg_job_fn = job_fn;
 	queue_flag_set_unlocked(QUEUE_FLAG_BIDI, q);
+	queue_flag_set_unlocked(QUEUE_FLAG_SCSI_PASSTHROUGH, q);
 	blk_queue_softirq_done(q, bsg_softirq_done);
 	blk_queue_rq_timeout(q, BLK_DEFAULT_SG_TIMEOUT);
 
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index cd375503f7b0..3761066fe89d 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -1956,6 +1956,7 @@ static int cciss_add_disk(ctlr_info_t *h, struct gendisk *disk,
 	disk->queue->cmd_size = sizeof(struct scsi_request);
 	disk->queue->request_fn = do_cciss_request;
 	disk->queue->queue_lock = &h->lock;
+	queue_flag_set_unlocked(QUEUE_FLAG_SCSI_PASSTHROUGH, disk->queue);
 	if (blk_init_allocated_queue(disk->queue) < 0)
 		goto cleanup_queue;
 
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index 023562565d11..b3f85250dea9 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -773,6 +773,7 @@ static int ide_init_queue(ide_drive_t *drive)
 	q->request_fn = do_ide_request;
 	q->init_rq_fn = ide_init_rq;
 	q->cmd_size = sizeof(struct ide_request);
+	queue_flag_set_unlocked(QUEUE_FLAG_SCSI_PASSTHROUGH, q);
 	if (blk_init_allocated_queue(q) < 0) {
 		blk_cleanup_queue(q);
 		return 1;
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 99e16ac479e3..884aaa84c2dd 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -2057,6 +2057,8 @@ void __scsi_init_queue(struct Scsi_Host *shost, struct request_queue *q)
 {
 	struct device *dev = shost->dma_dev;
 
+	queue_flag_set_unlocked(QUEUE_FLAG_SCSI_PASSTHROUGH, q);
+
 	/*
 	 * this limit is imposed by hardware restrictions
 	 */
diff --git a/drivers/scsi/scsi_transport_sas.c b/drivers/scsi/scsi_transport_sas.c
index 0ebe2f1bb908..d16414bfe2ef 100644
--- a/drivers/scsi/scsi_transport_sas.c
+++ b/drivers/scsi/scsi_transport_sas.c
@@ -264,6 +264,7 @@ static int sas_bsg_initialize(struct Scsi_Host *shost, struct sas_rphy *rphy)
 		q->queuedata = shost;
 
 	queue_flag_set_unlocked(QUEUE_FLAG_BIDI, q);
+	queue_flag_set_unlocked(QUEUE_FLAG_SCSI_PASSTHROUGH, q);
 	return 0;
 
 out_cleanup_queue:
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index ab92c4ea138b..019f18c65098 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -618,6 +618,7 @@ struct request_queue {
 #define QUEUE_FLAG_STATS       27	/* track rq completion times */
 #define QUEUE_FLAG_POLL_STATS  28	/* collecting stats for hybrid polling */
 #define QUEUE_FLAG_REGISTERED  29	/* queue has been registered to a disk */
+#define QUEUE_FLAG_SCSI_PASSTHROUGH 30	/* queue supports SCSI commands */
 
 #define QUEUE_FLAG_DEFAULT	((1 << QUEUE_FLAG_IO_STAT) |		\
 				 (1 << QUEUE_FLAG_STACKABLE)	|	\
@@ -708,6 +709,8 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q)
 #define blk_queue_secure_erase(q) \
 	(test_bit(QUEUE_FLAG_SECERASE, &(q)->queue_flags))
 #define blk_queue_dax(q)	test_bit(QUEUE_FLAG_DAX, &(q)->queue_flags)
+#define blk_queue_scsi_passthrough(q)	\
+	test_bit(QUEUE_FLAG_SCSI_PASSTHROUGH, &(q)->queue_flags)
 
 #define blk_noretry_request(rq) \
 	((rq)->cmd_flags & (REQ_FAILFAST_DEV|REQ_FAILFAST_TRANSPORT| \
-- 
cgit v1.2.3


From d9f972644606ecc2581390b43cb5a980b54c04bf Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Wed, 31 May 2017 14:43:47 -0700
Subject: bsg: Check queue type before attaching to a queue

Since BSG only supports request queues for which struct scsi_request
is the first member of their private request data, refuse to register
block layer queues for which struct scsi_request is not the first
member of their private data.

References: commit bd1599d931ca ("scsi_transport_sas: fix BSG ioctl memory corruption")
References: commit 82ed4db499b8 ("block: split scsi_request out of struct request")
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: Omar Sandoval <osandov@fb.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/bsg.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'block')

diff --git a/block/bsg.c b/block/bsg.c
index 6fd08544d77e..40db8ff4c618 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -750,6 +750,12 @@ static struct bsg_device *bsg_add_device(struct inode *inode,
 #ifdef BSG_DEBUG
 	unsigned char buf[32];
 #endif
+
+	if (!blk_queue_scsi_passthrough(rq)) {
+		WARN_ONCE(true, "Attempt to register a non-SCSI queue\n");
+		return ERR_PTR(-EINVAL);
+	}
+
 	if (!blk_get_queue(rq))
 		return ERR_PTR(-ENXIO);
 
-- 
cgit v1.2.3


From 2a842acab109f40f0d7d10b38e9ca88390628996 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sat, 3 Jun 2017 09:38:04 +0200
Subject: block: introduce new block status code type

Currently we use nornal Linux errno values in the block layer, and while
we accept any error a few have overloaded magic meanings.  This patch
instead introduces a new  blk_status_t value that holds block layer specific
status codes and explicitly explains their meaning.  Helpers to convert from
and to the previous special meanings are provided for now, but I suspect
we want to get rid of them in the long run - those drivers that have a
errno input (e.g. networking) usually get errnos that don't know about
the special block layer overloads, and similarly returning them to userspace
will usually return somethings that strictly speaking isn't correct
for file system operations, but that's left as an exercise for later.

For now the set of errors is a very limited set that closely corresponds
to the previous overloaded errno values, but there is some low hanging
fruite to improve it.

blk_status_t (ab)uses the sparse __bitwise annotations to allow for sparse
typechecking, so that we can easily catch places passing the wrong values.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 arch/s390/include/asm/eadm.h        |   6 +-
 arch/um/drivers/ubd_kern.c          |   2 +-
 block/blk-core.c                    | 156 +++++++++++++++++++++---------------
 block/blk-exec.c                    |   4 +-
 block/blk-flush.c                   |   8 +-
 block/blk-mq.c                      |   8 +-
 block/bsg-lib.c                     |   4 +-
 block/bsg.c                         |   6 +-
 drivers/block/DAC960.c              |   2 +-
 drivers/block/amiflop.c             |  10 +--
 drivers/block/aoe/aoecmd.c          |   2 +-
 drivers/block/ataflop.c             |  16 ++--
 drivers/block/cciss.c               |   3 +-
 drivers/block/floppy.c              |   4 +-
 drivers/block/loop.c                |   2 +-
 drivers/block/mtip32xx/mtip32xx.c   |  16 ++--
 drivers/block/mtip32xx/mtip32xx.h   |   2 +-
 drivers/block/nbd.c                 |  14 ++--
 drivers/block/null_blk.c            |   9 ++-
 drivers/block/paride/pcd.c          |   8 +-
 drivers/block/paride/pd.c           |   2 +-
 drivers/block/paride/pf.c           |  18 ++---
 drivers/block/ps3disk.c             |  11 +--
 drivers/block/rbd.c                 |   8 +-
 drivers/block/skd_main.c            |  31 ++++---
 drivers/block/sunvdc.c              |   4 +-
 drivers/block/swim.c                |   6 +-
 drivers/block/swim3.c               |  26 +++---
 drivers/block/sx8.c                 |  20 ++---
 drivers/block/virtio_blk.c          |  10 +--
 drivers/block/xen-blkfront.c        |  16 ++--
 drivers/block/xsysace.c             |   8 +-
 drivers/block/z2ram.c               |   4 +-
 drivers/cdrom/gdrom.c               |   9 ++-
 drivers/ide/ide-atapi.c             |   9 ++-
 drivers/ide/ide-cd.c                |  10 +--
 drivers/ide/ide-dma.c               |   2 +-
 drivers/ide/ide-eh.c                |  16 ++--
 drivers/ide/ide-floppy.c            |   6 +-
 drivers/ide/ide-io.c                |  10 +--
 drivers/ide/ide-pm.c                |   6 +-
 drivers/ide/ide-tape.c              |   2 +-
 drivers/ide/ide-taskfile.c          |   6 +-
 drivers/ide/siimage.c               |   6 +-
 drivers/md/dm-mpath.c               |  27 +++----
 drivers/md/dm-rq.c                  |  20 ++---
 drivers/md/dm-rq.h                  |   2 +-
 drivers/memstick/core/ms_block.c    |   7 +-
 drivers/memstick/core/mspro_block.c |   8 +-
 drivers/mmc/core/block.c            |  37 +++++----
 drivers/mmc/core/queue.c            |   2 +-
 drivers/mtd/mtd_blkdevs.c           |  30 ++++---
 drivers/mtd/ubi/block.c             |   2 +-
 drivers/nvme/host/core.c            |  29 +++----
 drivers/nvme/host/lightnvm.c        |   2 +-
 drivers/nvme/host/pci.c             |   8 +-
 drivers/s390/block/dasd.c           |  36 +++++----
 drivers/s390/block/scm_blk.c        |   8 +-
 drivers/s390/block/scm_blk.h        |   4 +-
 drivers/s390/cio/eadm_sch.c         |   6 +-
 drivers/s390/cio/scm.c              |   2 +-
 drivers/sbus/char/jsflash.c         |   4 +-
 drivers/scsi/osd/osd_initiator.c    |  20 ++---
 drivers/scsi/osst.c                 |   2 +-
 drivers/scsi/scsi_error.c           |   2 +-
 drivers/scsi/scsi_lib.c             |  51 ++++--------
 drivers/scsi/scsi_transport_sas.c   |   2 +-
 drivers/scsi/sg.c                   |   6 +-
 drivers/scsi/st.c                   |   2 +-
 drivers/target/target_core_pscsi.c  |   4 +-
 include/linux/blk-mq.h              |   4 +-
 include/linux/blk_types.h           |  16 ++++
 include/linux/blkdev.h              |  21 ++---
 include/linux/device-mapper.h       |   2 +-
 include/linux/ide.h                 |   6 +-
 include/scsi/osd_initiator.h        |   2 +-
 76 files changed, 474 insertions(+), 428 deletions(-)

(limited to 'block')

diff --git a/arch/s390/include/asm/eadm.h b/arch/s390/include/asm/eadm.h
index 67026300c88e..144809a3f4f6 100644
--- a/arch/s390/include/asm/eadm.h
+++ b/arch/s390/include/asm/eadm.h
@@ -3,6 +3,7 @@
 
 #include <linux/types.h>
 #include <linux/device.h>
+#include <linux/blkdev.h>
 
 struct arqb {
 	u64 data;
@@ -105,13 +106,14 @@ struct scm_driver {
 	int (*probe) (struct scm_device *scmdev);
 	int (*remove) (struct scm_device *scmdev);
 	void (*notify) (struct scm_device *scmdev, enum scm_event event);
-	void (*handler) (struct scm_device *scmdev, void *data, int error);
+	void (*handler) (struct scm_device *scmdev, void *data,
+			blk_status_t error);
 };
 
 int scm_driver_register(struct scm_driver *scmdrv);
 void scm_driver_unregister(struct scm_driver *scmdrv);
 
 int eadm_start_aob(struct aob *aob);
-void scm_irq_handler(struct aob *aob, int error);
+void scm_irq_handler(struct aob *aob, blk_status_t error);
 
 #endif /* _ASM_S390_EADM_H */
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index 85410279beab..b55fe9bf5d3e 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -534,7 +534,7 @@ static void ubd_handler(void)
 		for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
 			blk_end_request(
 				(*irq_req_buffer)[count]->req,
-				0,
+				BLK_STS_OK,
 				(*irq_req_buffer)[count]->length
 			);
 			kfree((*irq_req_buffer)[count]);
diff --git a/block/blk-core.c b/block/blk-core.c
index c7068520794b..e942a9f814c7 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -129,11 +129,66 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
 }
 EXPORT_SYMBOL(blk_rq_init);
 
+static const struct {
+	int		errno;
+	const char	*name;
+} blk_errors[] = {
+	[BLK_STS_OK]		= { 0,		"" },
+	[BLK_STS_NOTSUPP]	= { -EOPNOTSUPP, "operation not supported" },
+	[BLK_STS_TIMEOUT]	= { -ETIMEDOUT,	"timeout" },
+	[BLK_STS_NOSPC]		= { -ENOSPC,	"critical space allocation" },
+	[BLK_STS_TRANSPORT]	= { -ENOLINK,	"recoverable transport" },
+	[BLK_STS_TARGET]	= { -EREMOTEIO,	"critical target" },
+	[BLK_STS_NEXUS]		= { -EBADE,	"critical nexus" },
+	[BLK_STS_MEDIUM]	= { -ENODATA,	"critical medium" },
+	[BLK_STS_PROTECTION]	= { -EILSEQ,	"protection" },
+	[BLK_STS_RESOURCE]	= { -ENOMEM,	"kernel resource" },
+
+	/* everything else not covered above: */
+	[BLK_STS_IOERR]		= { -EIO,	"I/O" },
+};
+
+blk_status_t errno_to_blk_status(int errno)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(blk_errors); i++) {
+		if (blk_errors[i].errno == errno)
+			return (__force blk_status_t)i;
+	}
+
+	return BLK_STS_IOERR;
+}
+EXPORT_SYMBOL_GPL(errno_to_blk_status);
+
+int blk_status_to_errno(blk_status_t status)
+{
+	int idx = (__force int)status;
+
+	if (WARN_ON_ONCE(idx > ARRAY_SIZE(blk_errors)))
+		return -EIO;
+	return blk_errors[idx].errno;
+}
+EXPORT_SYMBOL_GPL(blk_status_to_errno);
+
+static void print_req_error(struct request *req, blk_status_t status)
+{
+	int idx = (__force int)status;
+
+	if (WARN_ON_ONCE(idx > ARRAY_SIZE(blk_errors)))
+		return;
+
+	printk_ratelimited(KERN_ERR "%s: %s error, dev %s, sector %llu\n",
+			   __func__, blk_errors[idx].name, req->rq_disk ?
+			   req->rq_disk->disk_name : "?",
+			   (unsigned long long)blk_rq_pos(req));
+}
+
 static void req_bio_endio(struct request *rq, struct bio *bio,
-			  unsigned int nbytes, int error)
+			  unsigned int nbytes, blk_status_t error)
 {
 	if (error)
-		bio->bi_error = error;
+		bio->bi_error = blk_status_to_errno(error);
 
 	if (unlikely(rq->rq_flags & RQF_QUIET))
 		bio_set_flag(bio, BIO_QUIET);
@@ -2177,29 +2232,29 @@ static int blk_cloned_rq_check_limits(struct request_queue *q,
  * @q:  the queue to submit the request
  * @rq: the request being queued
  */
-int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
+blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request *rq)
 {
 	unsigned long flags;
 	int where = ELEVATOR_INSERT_BACK;
 
 	if (blk_cloned_rq_check_limits(q, rq))
-		return -EIO;
+		return BLK_STS_IOERR;
 
 	if (rq->rq_disk &&
 	    should_fail_request(&rq->rq_disk->part0, blk_rq_bytes(rq)))
-		return -EIO;
+		return BLK_STS_IOERR;
 
 	if (q->mq_ops) {
 		if (blk_queue_io_stat(q))
 			blk_account_io_start(rq, true);
 		blk_mq_sched_insert_request(rq, false, true, false, false);
-		return 0;
+		return BLK_STS_OK;
 	}
 
 	spin_lock_irqsave(q->queue_lock, flags);
 	if (unlikely(blk_queue_dying(q))) {
 		spin_unlock_irqrestore(q->queue_lock, flags);
-		return -ENODEV;
+		return BLK_STS_IOERR;
 	}
 
 	/*
@@ -2216,7 +2271,7 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
 		__blk_run_queue(q);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 
-	return 0;
+	return BLK_STS_OK;
 }
 EXPORT_SYMBOL_GPL(blk_insert_cloned_request);
 
@@ -2450,15 +2505,14 @@ struct request *blk_peek_request(struct request_queue *q)
 			rq = NULL;
 			break;
 		} else if (ret == BLKPREP_KILL || ret == BLKPREP_INVALID) {
-			int err = (ret == BLKPREP_INVALID) ? -EREMOTEIO : -EIO;
-
 			rq->rq_flags |= RQF_QUIET;
 			/*
 			 * Mark this request as started so we don't trigger
 			 * any debug logic in the end I/O path.
 			 */
 			blk_start_request(rq);
-			__blk_end_request_all(rq, err);
+			__blk_end_request_all(rq, ret == BLKPREP_INVALID ?
+					BLK_STS_TARGET : BLK_STS_IOERR);
 		} else {
 			printk(KERN_ERR "%s: bad return=%d\n", __func__, ret);
 			break;
@@ -2547,7 +2601,7 @@ EXPORT_SYMBOL(blk_fetch_request);
 /**
  * blk_update_request - Special helper function for request stacking drivers
  * @req:      the request being processed
- * @error:    %0 for success, < %0 for error
+ * @error:    block status code
  * @nr_bytes: number of bytes to complete @req
  *
  * Description:
@@ -2566,49 +2620,19 @@ EXPORT_SYMBOL(blk_fetch_request);
  *     %false - this request doesn't have any more data
  *     %true  - this request has more data
  **/
-bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
+bool blk_update_request(struct request *req, blk_status_t error,
+		unsigned int nr_bytes)
 {
 	int total_bytes;
 
-	trace_block_rq_complete(req, error, nr_bytes);
+	trace_block_rq_complete(req, blk_status_to_errno(error), nr_bytes);
 
 	if (!req->bio)
 		return false;
 
-	if (error && !blk_rq_is_passthrough(req) &&
-	    !(req->rq_flags & RQF_QUIET)) {
-		char *error_type;
-
-		switch (error) {
-		case -ENOLINK:
-			error_type = "recoverable transport";
-			break;
-		case -EREMOTEIO:
-			error_type = "critical target";
-			break;
-		case -EBADE:
-			error_type = "critical nexus";
-			break;
-		case -ETIMEDOUT:
-			error_type = "timeout";
-			break;
-		case -ENOSPC:
-			error_type = "critical space allocation";
-			break;
-		case -ENODATA:
-			error_type = "critical medium";
-			break;
-		case -EIO:
-		default:
-			error_type = "I/O";
-			break;
-		}
-		printk_ratelimited(KERN_ERR "%s: %s error, dev %s, sector %llu\n",
-				   __func__, error_type, req->rq_disk ?
-				   req->rq_disk->disk_name : "?",
-				   (unsigned long long)blk_rq_pos(req));
-
-	}
+	if (unlikely(error && !blk_rq_is_passthrough(req) &&
+		     !(req->rq_flags & RQF_QUIET)))
+		print_req_error(req, error);
 
 	blk_account_io_completion(req, nr_bytes);
 
@@ -2674,7 +2698,7 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
 }
 EXPORT_SYMBOL_GPL(blk_update_request);
 
-static bool blk_update_bidi_request(struct request *rq, int error,
+static bool blk_update_bidi_request(struct request *rq, blk_status_t error,
 				    unsigned int nr_bytes,
 				    unsigned int bidi_bytes)
 {
@@ -2715,7 +2739,7 @@ EXPORT_SYMBOL_GPL(blk_unprep_request);
 /*
  * queue lock must be held
  */
-void blk_finish_request(struct request *req, int error)
+void blk_finish_request(struct request *req, blk_status_t error)
 {
 	struct request_queue *q = req->q;
 
@@ -2752,7 +2776,7 @@ EXPORT_SYMBOL(blk_finish_request);
 /**
  * blk_end_bidi_request - Complete a bidi request
  * @rq:         the request to complete
- * @error:      %0 for success, < %0 for error
+ * @error:      block status code
  * @nr_bytes:   number of bytes to complete @rq
  * @bidi_bytes: number of bytes to complete @rq->next_rq
  *
@@ -2766,7 +2790,7 @@ EXPORT_SYMBOL(blk_finish_request);
  *     %false - we are done with this request
  *     %true  - still buffers pending for this request
  **/
-static bool blk_end_bidi_request(struct request *rq, int error,
+static bool blk_end_bidi_request(struct request *rq, blk_status_t error,
 				 unsigned int nr_bytes, unsigned int bidi_bytes)
 {
 	struct request_queue *q = rq->q;
@@ -2785,7 +2809,7 @@ static bool blk_end_bidi_request(struct request *rq, int error,
 /**
  * __blk_end_bidi_request - Complete a bidi request with queue lock held
  * @rq:         the request to complete
- * @error:      %0 for success, < %0 for error
+ * @error:      block status code
  * @nr_bytes:   number of bytes to complete @rq
  * @bidi_bytes: number of bytes to complete @rq->next_rq
  *
@@ -2797,7 +2821,7 @@ static bool blk_end_bidi_request(struct request *rq, int error,
  *     %false - we are done with this request
  *     %true  - still buffers pending for this request
  **/
-static bool __blk_end_bidi_request(struct request *rq, int error,
+static bool __blk_end_bidi_request(struct request *rq, blk_status_t error,
 				   unsigned int nr_bytes, unsigned int bidi_bytes)
 {
 	if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))
@@ -2811,7 +2835,7 @@ static bool __blk_end_bidi_request(struct request *rq, int error,
 /**
  * blk_end_request - Helper function for drivers to complete the request.
  * @rq:       the request being processed
- * @error:    %0 for success, < %0 for error
+ * @error:    block status code
  * @nr_bytes: number of bytes to complete
  *
  * Description:
@@ -2822,7 +2846,8 @@ static bool __blk_end_bidi_request(struct request *rq, int error,
  *     %false - we are done with this request
  *     %true  - still buffers pending for this request
  **/
-bool blk_end_request(struct request *rq, int error, unsigned int nr_bytes)
+bool blk_end_request(struct request *rq, blk_status_t error,
+		unsigned int nr_bytes)
 {
 	return blk_end_bidi_request(rq, error, nr_bytes, 0);
 }
@@ -2831,12 +2856,12 @@ EXPORT_SYMBOL(blk_end_request);
 /**
  * blk_end_request_all - Helper function for drives to finish the request.
  * @rq: the request to finish
- * @error: %0 for success, < %0 for error
+ * @error: block status code
  *
  * Description:
  *     Completely finish @rq.
  */
-void blk_end_request_all(struct request *rq, int error)
+void blk_end_request_all(struct request *rq, blk_status_t error)
 {
 	bool pending;
 	unsigned int bidi_bytes = 0;
@@ -2852,7 +2877,7 @@ EXPORT_SYMBOL(blk_end_request_all);
 /**
  * __blk_end_request - Helper function for drivers to complete the request.
  * @rq:       the request being processed
- * @error:    %0 for success, < %0 for error
+ * @error:    block status code
  * @nr_bytes: number of bytes to complete
  *
  * Description:
@@ -2862,7 +2887,8 @@ EXPORT_SYMBOL(blk_end_request_all);
  *     %false - we are done with this request
  *     %true  - still buffers pending for this request
  **/
-bool __blk_end_request(struct request *rq, int error, unsigned int nr_bytes)
+bool __blk_end_request(struct request *rq, blk_status_t error,
+		unsigned int nr_bytes)
 {
 	return __blk_end_bidi_request(rq, error, nr_bytes, 0);
 }
@@ -2871,12 +2897,12 @@ EXPORT_SYMBOL(__blk_end_request);
 /**
  * __blk_end_request_all - Helper function for drives to finish the request.
  * @rq: the request to finish
- * @error: %0 for success, < %0 for error
+ * @error:    block status code
  *
  * Description:
  *     Completely finish @rq.  Must be called with queue lock held.
  */
-void __blk_end_request_all(struct request *rq, int error)
+void __blk_end_request_all(struct request *rq, blk_status_t error)
 {
 	bool pending;
 	unsigned int bidi_bytes = 0;
@@ -2892,7 +2918,7 @@ EXPORT_SYMBOL(__blk_end_request_all);
 /**
  * __blk_end_request_cur - Helper function to finish the current request chunk.
  * @rq: the request to finish the current chunk for
- * @error: %0 for success, < %0 for error
+ * @error:    block status code
  *
  * Description:
  *     Complete the current consecutively mapped chunk from @rq.  Must
@@ -2902,7 +2928,7 @@ EXPORT_SYMBOL(__blk_end_request_all);
  *     %false - we are done with this request
  *     %true  - still buffers pending for this request
  */
-bool __blk_end_request_cur(struct request *rq, int error)
+bool __blk_end_request_cur(struct request *rq, blk_status_t error)
 {
 	return __blk_end_request(rq, error, blk_rq_cur_bytes(rq));
 }
@@ -3243,7 +3269,7 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
 		 * Short-circuit if @q is dead
 		 */
 		if (unlikely(blk_queue_dying(q))) {
-			__blk_end_request_all(rq, -ENODEV);
+			__blk_end_request_all(rq, BLK_STS_IOERR);
 			continue;
 		}
 
diff --git a/block/blk-exec.c b/block/blk-exec.c
index a9451e3b8587..5c0f3dc446dc 100644
--- a/block/blk-exec.c
+++ b/block/blk-exec.c
@@ -16,7 +16,7 @@
  * @rq: request to complete
  * @error: end I/O status of the request
  */
-static void blk_end_sync_rq(struct request *rq, int error)
+static void blk_end_sync_rq(struct request *rq, blk_status_t error)
 {
 	struct completion *waiting = rq->end_io_data;
 
@@ -69,7 +69,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
 
 	if (unlikely(blk_queue_dying(q))) {
 		rq->rq_flags |= RQF_QUIET;
-		__blk_end_request_all(rq, -ENXIO);
+		__blk_end_request_all(rq, BLK_STS_IOERR);
 		spin_unlock_irq(q->queue_lock);
 		return;
 	}
diff --git a/block/blk-flush.c b/block/blk-flush.c
index c4e0880b54bb..a572b47fa059 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -164,7 +164,7 @@ static bool blk_flush_queue_rq(struct request *rq, bool add_front)
  */
 static bool blk_flush_complete_seq(struct request *rq,
 				   struct blk_flush_queue *fq,
-				   unsigned int seq, int error)
+				   unsigned int seq, blk_status_t error)
 {
 	struct request_queue *q = rq->q;
 	struct list_head *pending = &fq->flush_queue[fq->flush_pending_idx];
@@ -216,7 +216,7 @@ static bool blk_flush_complete_seq(struct request *rq,
 	return kicked | queued;
 }
 
-static void flush_end_io(struct request *flush_rq, int error)
+static void flush_end_io(struct request *flush_rq, blk_status_t error)
 {
 	struct request_queue *q = flush_rq->q;
 	struct list_head *running;
@@ -341,7 +341,7 @@ static bool blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq)
 	return blk_flush_queue_rq(flush_rq, false);
 }
 
-static void flush_data_end_io(struct request *rq, int error)
+static void flush_data_end_io(struct request *rq, blk_status_t error)
 {
 	struct request_queue *q = rq->q;
 	struct blk_flush_queue *fq = blk_get_flush_queue(q, NULL);
@@ -382,7 +382,7 @@ static void flush_data_end_io(struct request *rq, int error)
 		blk_run_queue_async(q);
 }
 
-static void mq_flush_data_end_io(struct request *rq, int error)
+static void mq_flush_data_end_io(struct request *rq, blk_status_t error)
 {
 	struct request_queue *q = rq->q;
 	struct blk_mq_hw_ctx *hctx;
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 22438d5036a3..adcc1c0dce6e 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -394,7 +394,7 @@ void blk_mq_free_request(struct request *rq)
 }
 EXPORT_SYMBOL_GPL(blk_mq_free_request);
 
-inline void __blk_mq_end_request(struct request *rq, int error)
+inline void __blk_mq_end_request(struct request *rq, blk_status_t error)
 {
 	blk_account_io_done(rq);
 
@@ -409,7 +409,7 @@ inline void __blk_mq_end_request(struct request *rq, int error)
 }
 EXPORT_SYMBOL(__blk_mq_end_request);
 
-void blk_mq_end_request(struct request *rq, int error)
+void blk_mq_end_request(struct request *rq, blk_status_t error)
 {
 	if (blk_update_request(rq, error, blk_rq_bytes(rq)))
 		BUG();
@@ -988,7 +988,7 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list)
 			pr_err("blk-mq: bad return on queue: %d\n", ret);
 		case BLK_MQ_RQ_QUEUE_ERROR:
 			errors++;
-			blk_mq_end_request(rq, -EIO);
+			blk_mq_end_request(rq, BLK_STS_IOERR);
 			break;
 		}
 
@@ -1433,7 +1433,7 @@ static void __blk_mq_try_issue_directly(struct request *rq, blk_qc_t *cookie,
 
 	if (ret == BLK_MQ_RQ_QUEUE_ERROR) {
 		*cookie = BLK_QC_T_NONE;
-		blk_mq_end_request(rq, -EIO);
+		blk_mq_end_request(rq, BLK_STS_IOERR);
 		return;
 	}
 
diff --git a/block/bsg-lib.c b/block/bsg-lib.c
index 9b91daefcd9b..c4513b23f57a 100644
--- a/block/bsg-lib.c
+++ b/block/bsg-lib.c
@@ -37,7 +37,7 @@ static void bsg_destroy_job(struct kref *kref)
 	struct bsg_job *job = container_of(kref, struct bsg_job, kref);
 	struct request *rq = job->req;
 
-	blk_end_request_all(rq, scsi_req(rq)->result);
+	blk_end_request_all(rq, BLK_STS_OK);
 
 	put_device(job->dev);	/* release reference for the request */
 
@@ -202,7 +202,7 @@ static void bsg_request_fn(struct request_queue *q)
 		ret = bsg_create_job(dev, req);
 		if (ret) {
 			scsi_req(req)->result = ret;
-			blk_end_request_all(req, ret);
+			blk_end_request_all(req, BLK_STS_OK);
 			spin_lock_irq(q->queue_lock);
 			continue;
 		}
diff --git a/block/bsg.c b/block/bsg.c
index 40db8ff4c618..59d02dd31b0c 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -294,14 +294,14 @@ out:
  * async completion call-back from the block layer, when scsi/ide/whatever
  * calls end_that_request_last() on a request
  */
-static void bsg_rq_end_io(struct request *rq, int uptodate)
+static void bsg_rq_end_io(struct request *rq, blk_status_t status)
 {
 	struct bsg_command *bc = rq->end_io_data;
 	struct bsg_device *bd = bc->bd;
 	unsigned long flags;
 
-	dprintk("%s: finished rq %p bc %p, bio %p stat %d\n",
-		bd->name, rq, bc, bc->bio, uptodate);
+	dprintk("%s: finished rq %p bc %p, bio %p\n",
+		bd->name, rq, bc, bc->bio);
 
 	bc->hdr.duration = jiffies_to_msecs(jiffies - bc->hdr.duration);
 
diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c
index 26a51be77227..245a879b036e 100644
--- a/drivers/block/DAC960.c
+++ b/drivers/block/DAC960.c
@@ -3464,7 +3464,7 @@ static inline bool DAC960_ProcessCompletedRequest(DAC960_Command_T *Command,
 						 bool SuccessfulIO)
 {
 	struct request *Request = Command->Request;
-	int Error = SuccessfulIO ? 0 : -EIO;
+	blk_status_t Error = SuccessfulIO ? BLK_STS_OK : BLK_STS_IOERR;
 
 	pci_unmap_sg(Command->Controller->PCIDevice, Command->cmd_sglist,
 		Command->SegmentCount, Command->DmaDirection);
diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c
index a328f673adfe..49908c74bfcb 100644
--- a/drivers/block/amiflop.c
+++ b/drivers/block/amiflop.c
@@ -1378,7 +1378,7 @@ static void redo_fd_request(void)
 	struct amiga_floppy_struct *floppy;
 	char *data;
 	unsigned long flags;
-	int err;
+	blk_status_t err;
 
 next_req:
 	rq = set_next_request();
@@ -1392,7 +1392,7 @@ next_req:
 
 next_segment:
 	/* Here someone could investigate to be more efficient */
-	for (cnt = 0, err = 0; cnt < blk_rq_cur_sectors(rq); cnt++) {
+	for (cnt = 0, err = BLK_STS_OK; cnt < blk_rq_cur_sectors(rq); cnt++) {
 #ifdef DEBUG
 		printk("fd: sector %ld + %d requested for %s\n",
 		       blk_rq_pos(rq), cnt,
@@ -1400,7 +1400,7 @@ next_segment:
 #endif
 		block = blk_rq_pos(rq) + cnt;
 		if ((int)block > floppy->blocks) {
-			err = -EIO;
+			err = BLK_STS_IOERR;
 			break;
 		}
 
@@ -1413,7 +1413,7 @@ next_segment:
 #endif
 
 		if (get_track(drive, track) == -1) {
-			err = -EIO;
+			err = BLK_STS_IOERR;
 			break;
 		}
 
@@ -1424,7 +1424,7 @@ next_segment:
 
 			/* keep the drive spinning while writes are scheduled */
 			if (!fd_motor_on(drive)) {
-				err = -EIO;
+				err = BLK_STS_IOERR;
 				break;
 			}
 			/*
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index 3c606c09fd5a..5bf0c9d21fc1 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -1071,7 +1071,7 @@ aoe_end_request(struct aoedev *d, struct request *rq, int fastfail)
 	do {
 		bio = rq->bio;
 		bok = !fastfail && !bio->bi_error;
-	} while (__blk_end_request(rq, bok ? 0 : -EIO, bio->bi_iter.bi_size));
+	} while (__blk_end_request(rq, bok ? BLK_STS_OK : BLK_STS_IOERR, bio->bi_iter.bi_size));
 
 	/* cf. http://lkml.org/lkml/2006/10/31/28 */
 	if (!fastfail)
diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c
index fa69ecd52cb5..92da886180aa 100644
--- a/drivers/block/ataflop.c
+++ b/drivers/block/ataflop.c
@@ -378,7 +378,7 @@ static DEFINE_TIMER(readtrack_timer, fd_readtrack_check, 0, 0);
 static DEFINE_TIMER(timeout_timer, fd_times_out, 0, 0);
 static DEFINE_TIMER(fd_timer, check_change, 0, 0);
 	
-static void fd_end_request_cur(int err)
+static void fd_end_request_cur(blk_status_t err)
 {
 	if (!__blk_end_request_cur(fd_request, err))
 		fd_request = NULL;
@@ -620,7 +620,7 @@ static void fd_error( void )
 	fd_request->error_count++;
 	if (fd_request->error_count >= MAX_ERRORS) {
 		printk(KERN_ERR "fd%d: too many errors.\n", SelectedDrive );
-		fd_end_request_cur(-EIO);
+		fd_end_request_cur(BLK_STS_IOERR);
 	}
 	else if (fd_request->error_count == RECALIBRATE_ERRORS) {
 		printk(KERN_WARNING "fd%d: recalibrating\n", SelectedDrive );
@@ -739,7 +739,7 @@ static void do_fd_action( int drive )
 		    }
 		    else {
 			/* all sectors finished */
-			fd_end_request_cur(0);
+			fd_end_request_cur(BLK_STS_OK);
 			redo_fd_request();
 			return;
 		    }
@@ -1144,7 +1144,7 @@ static void fd_rwsec_done1(int status)
 	}
 	else {
 		/* all sectors finished */
-		fd_end_request_cur(0);
+		fd_end_request_cur(BLK_STS_OK);
 		redo_fd_request();
 	}
 	return;
@@ -1445,7 +1445,7 @@ repeat:
 	if (!UD.connected) {
 		/* drive not connected */
 		printk(KERN_ERR "Unknown Device: fd%d\n", drive );
-		fd_end_request_cur(-EIO);
+		fd_end_request_cur(BLK_STS_IOERR);
 		goto repeat;
 	}
 		
@@ -1461,12 +1461,12 @@ repeat:
 		/* user supplied disk type */
 		if (--type >= NUM_DISK_MINORS) {
 			printk(KERN_WARNING "fd%d: invalid disk format", drive );
-			fd_end_request_cur(-EIO);
+			fd_end_request_cur(BLK_STS_IOERR);
 			goto repeat;
 		}
 		if (minor2disktype[type].drive_types > DriveType)  {
 			printk(KERN_WARNING "fd%d: unsupported disk format", drive );
-			fd_end_request_cur(-EIO);
+			fd_end_request_cur(BLK_STS_IOERR);
 			goto repeat;
 		}
 		type = minor2disktype[type].index;
@@ -1476,7 +1476,7 @@ repeat:
 	}
 	
 	if (blk_rq_pos(fd_request) + 1 > UDT->blocks) {
-		fd_end_request_cur(-EIO);
+		fd_end_request_cur(BLK_STS_IOERR);
 		goto repeat;
 	}
 
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index 3761066fe89d..02a611993bb4 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -1864,7 +1864,8 @@ static void cciss_softirq_done(struct request *rq)
 	/* set the residual count for pc requests */
 	if (blk_rq_is_passthrough(rq))
 		scsi_req(rq)->resid_len = c->err_info->ResidualCnt;
-	blk_end_request_all(rq, scsi_req(rq)->result ? -EIO : 0);
+	blk_end_request_all(rq, scsi_req(rq)->result ?
+			BLK_STS_IOERR : BLK_STS_OK);
 
 	spin_lock_irqsave(&h->lock, flags);
 	cmd_free(h, c);
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 60d4c7653178..cc75a5176057 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -2202,7 +2202,7 @@ static int do_format(int drive, struct format_descr *tmp_format_req)
  * =============================
  */
 
-static void floppy_end_request(struct request *req, int error)
+static void floppy_end_request(struct request *req, blk_status_t error)
 {
 	unsigned int nr_sectors = current_count_sectors;
 	unsigned int drive = (unsigned long)req->rq_disk->private_data;
@@ -2263,7 +2263,7 @@ static void request_done(int uptodate)
 			DRWE->last_error_generation = DRS->generation;
 		}
 		spin_lock_irqsave(q->queue_lock, flags);
-		floppy_end_request(req, -EIO);
+		floppy_end_request(req, BLK_STS_IOERR);
 		spin_unlock_irqrestore(q->queue_lock, flags);
 	}
 }
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index e288fb30100f..4caf6338c012 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -464,7 +464,7 @@ static void lo_complete_rq(struct request *rq)
 		zero_fill_bio(bio);
 	}
 
-	blk_mq_end_request(rq, cmd->ret < 0 ? -EIO : 0);
+	blk_mq_end_request(rq, cmd->ret < 0 ? BLK_STS_IOERR : BLK_STS_OK);
 }
 
 static void lo_rw_aio_complete(struct kiocb *iocb, long ret, long ret2)
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 3a779a4f5653..ee6f66bb50c7 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -532,7 +532,7 @@ static int mtip_read_log_page(struct mtip_port *port, u8 page, u16 *buffer,
 static int mtip_get_smart_attr(struct mtip_port *port, unsigned int id,
 						struct smart_attr *attrib);
 
-static void mtip_complete_command(struct mtip_cmd *cmd, int status)
+static void mtip_complete_command(struct mtip_cmd *cmd, blk_status_t status)
 {
 	struct request *req = blk_mq_rq_from_pdu(cmd);
 
@@ -568,7 +568,7 @@ static void mtip_handle_tfe(struct driver_data *dd)
 	if (test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags)) {
 		cmd = mtip_cmd_from_tag(dd, MTIP_TAG_INTERNAL);
 		dbg_printk(MTIP_DRV_NAME " TFE for the internal command\n");
-		mtip_complete_command(cmd, -EIO);
+		mtip_complete_command(cmd, BLK_STS_IOERR);
 		return;
 	}
 
@@ -667,7 +667,7 @@ static void mtip_handle_tfe(struct driver_data *dd)
 					tag,
 					fail_reason != NULL ?
 						fail_reason : "unknown");
-					mtip_complete_command(cmd, -ENODATA);
+					mtip_complete_command(cmd, BLK_STS_MEDIUM);
 					continue;
 				}
 			}
@@ -690,7 +690,7 @@ static void mtip_handle_tfe(struct driver_data *dd)
 			dev_warn(&port->dd->pdev->dev,
 				"retiring tag %d\n", tag);
 
-			mtip_complete_command(cmd, -EIO);
+			mtip_complete_command(cmd, BLK_STS_IOERR);
 		}
 	}
 	print_tags(dd, "reissued (TFE)", tagaccum, cmd_cnt);
@@ -2753,7 +2753,7 @@ static void mtip_abort_cmd(struct request *req, void *data,
 	dbg_printk(MTIP_DRV_NAME " Aborting request, tag = %d\n", req->tag);
 
 	clear_bit(req->tag, dd->port->cmds_to_issue);
-	cmd->status = -EIO;
+	cmd->status = BLK_STS_IOERR;
 	mtip_softirq_done_fn(req);
 }
 
@@ -3597,7 +3597,7 @@ static int mtip_submit_request(struct blk_mq_hw_ctx *hctx, struct request *rq)
 		int err;
 
 		err = mtip_send_trim(dd, blk_rq_pos(rq), blk_rq_sectors(rq));
-		blk_mq_end_request(rq, err);
+		blk_mq_end_request(rq, err ? BLK_STS_IOERR : BLK_STS_OK);
 		return 0;
 	}
 
@@ -3730,7 +3730,7 @@ static enum blk_eh_timer_return mtip_cmd_timeout(struct request *req,
 	if (reserved) {
 		struct mtip_cmd *cmd = blk_mq_rq_to_pdu(req);
 
-		cmd->status = -ETIME;
+		cmd->status = BLK_STS_TIMEOUT;
 		return BLK_EH_HANDLED;
 	}
 
@@ -3961,7 +3961,7 @@ static void mtip_no_dev_cleanup(struct request *rq, void *data, bool reserv)
 {
 	struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq);
 
-	cmd->status = -ENODEV;
+	cmd->status = BLK_STS_IOERR;
 	blk_mq_complete_request(rq);
 }
 
diff --git a/drivers/block/mtip32xx/mtip32xx.h b/drivers/block/mtip32xx/mtip32xx.h
index 37b8e3e0bb78..e8286af50e16 100644
--- a/drivers/block/mtip32xx/mtip32xx.h
+++ b/drivers/block/mtip32xx/mtip32xx.h
@@ -342,7 +342,7 @@ struct mtip_cmd {
 	int retries; /* The number of retries left for this command. */
 
 	int direction; /* Data transfer direction */
-	int status;
+	blk_status_t status;
 };
 
 /* Structure used to describe a port. */
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 6de9f9943a0e..978d2d2d08d6 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -116,7 +116,7 @@ struct nbd_cmd {
 	int index;
 	int cookie;
 	struct completion send_complete;
-	int status;
+	blk_status_t status;
 };
 
 #if IS_ENABLED(CONFIG_DEBUG_FS)
@@ -286,7 +286,7 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
 	struct nbd_config *config;
 
 	if (!refcount_inc_not_zero(&nbd->config_refs)) {
-		cmd->status = -EIO;
+		cmd->status = BLK_STS_TIMEOUT;
 		return BLK_EH_HANDLED;
 	}
 
@@ -331,7 +331,7 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
 				    "Connection timed out\n");
 	}
 	set_bit(NBD_TIMEDOUT, &config->runtime_flags);
-	cmd->status = -EIO;
+	cmd->status = BLK_STS_IOERR;
 	sock_shutdown(nbd);
 	nbd_config_put(nbd);
 
@@ -578,7 +578,7 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
 	if (ntohl(reply.error)) {
 		dev_err(disk_to_dev(nbd->disk), "Other side returned error (%d)\n",
 			ntohl(reply.error));
-		cmd->status = -EIO;
+		cmd->status = BLK_STS_IOERR;
 		return cmd;
 	}
 
@@ -603,7 +603,7 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
 				 */
 				if (nbd_disconnected(config) ||
 				    config->num_connections <= 1) {
-					cmd->status = -EIO;
+					cmd->status = BLK_STS_IOERR;
 					return cmd;
 				}
 				return ERR_PTR(-EIO);
@@ -655,7 +655,7 @@ static void nbd_clear_req(struct request *req, void *data, bool reserved)
 	if (!blk_mq_request_started(req))
 		return;
 	cmd = blk_mq_rq_to_pdu(req);
-	cmd->status = -EIO;
+	cmd->status = BLK_STS_IOERR;
 	blk_mq_complete_request(req);
 }
 
@@ -744,7 +744,7 @@ static int nbd_handle_cmd(struct nbd_cmd *cmd, int index)
 		nbd_config_put(nbd);
 		return -EINVAL;
 	}
-	cmd->status = 0;
+	cmd->status = BLK_STS_OK;
 again:
 	nsock = config->socks[index];
 	mutex_lock(&nsock->tx_lock);
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index d946e1eeac8e..e6b81d370882 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -229,11 +229,11 @@ static void end_cmd(struct nullb_cmd *cmd)
 
 	switch (queue_mode)  {
 	case NULL_Q_MQ:
-		blk_mq_end_request(cmd->rq, 0);
+		blk_mq_end_request(cmd->rq, BLK_STS_OK);
 		return;
 	case NULL_Q_RQ:
 		INIT_LIST_HEAD(&cmd->rq->queuelist);
-		blk_end_request_all(cmd->rq, 0);
+		blk_end_request_all(cmd->rq, BLK_STS_OK);
 		break;
 	case NULL_Q_BIO:
 		bio_endio(cmd->bio);
@@ -422,11 +422,12 @@ static void cleanup_queues(struct nullb *nullb)
 
 #ifdef CONFIG_NVM
 
-static void null_lnvm_end_io(struct request *rq, int error)
+static void null_lnvm_end_io(struct request *rq, blk_status_t status)
 {
 	struct nvm_rq *rqd = rq->end_io_data;
 
-	rqd->error = error;
+	/* XXX: lighnvm core seems to expect NVM_RSP_* values here.. */
+	rqd->error = status ? -EIO : 0;
 	nvm_end_io(rqd);
 
 	blk_put_request(rq);
diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c
index b1267ef34d5a..cffe42d80ce9 100644
--- a/drivers/block/paride/pcd.c
+++ b/drivers/block/paride/pcd.c
@@ -783,7 +783,7 @@ static void pcd_request(void)
 			ps_set_intr(do_pcd_read, NULL, 0, nice);
 			return;
 		} else {
-			__blk_end_request_all(pcd_req, -EIO);
+			__blk_end_request_all(pcd_req, BLK_STS_IOERR);
 			pcd_req = NULL;
 		}
 	}
@@ -794,7 +794,7 @@ static void do_pcd_request(struct request_queue *q)
 	pcd_request();
 }
 
-static inline void next_request(int err)
+static inline void next_request(blk_status_t err)
 {
 	unsigned long saved_flags;
 
@@ -837,7 +837,7 @@ static void pcd_start(void)
 
 	if (pcd_command(pcd_current, rd_cmd, 2048, "read block")) {
 		pcd_bufblk = -1;
-		next_request(-EIO);
+		next_request(BLK_STS_IOERR);
 		return;
 	}
 
@@ -871,7 +871,7 @@ static void do_pcd_read_drq(void)
 			return;
 		}
 		pcd_bufblk = -1;
-		next_request(-EIO);
+		next_request(BLK_STS_IOERR);
 		return;
 	}
 
diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c
index 7d2402f90978..c98983be4f9c 100644
--- a/drivers/block/paride/pd.c
+++ b/drivers/block/paride/pd.c
@@ -438,7 +438,7 @@ static void run_fsm(void)
 				phase = NULL;
 				spin_lock_irqsave(&pd_lock, saved_flags);
 				if (!__blk_end_request_cur(pd_req,
-						res == Ok ? 0 : -EIO)) {
+						res == Ok ? 0 : BLK_STS_IOERR)) {
 					if (!set_next_request())
 						stop = 1;
 				}
diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c
index f24ca7315ddc..5f46da8d05cd 100644
--- a/drivers/block/paride/pf.c
+++ b/drivers/block/paride/pf.c
@@ -801,7 +801,7 @@ static int set_next_request(void)
 	return pf_req != NULL;
 }
 
-static void pf_end_request(int err)
+static void pf_end_request(blk_status_t err)
 {
 	if (pf_req && !__blk_end_request_cur(pf_req, err))
 		pf_req = NULL;
@@ -821,7 +821,7 @@ repeat:
 	pf_count = blk_rq_cur_sectors(pf_req);
 
 	if (pf_block + pf_count > get_capacity(pf_req->rq_disk)) {
-		pf_end_request(-EIO);
+		pf_end_request(BLK_STS_IOERR);
 		goto repeat;
 	}
 
@@ -836,7 +836,7 @@ repeat:
 		pi_do_claimed(pf_current->pi, do_pf_write);
 	else {
 		pf_busy = 0;
-		pf_end_request(-EIO);
+		pf_end_request(BLK_STS_IOERR);
 		goto repeat;
 	}
 }
@@ -868,7 +868,7 @@ static int pf_next_buf(void)
 	return 0;
 }
 
-static inline void next_request(int err)
+static inline void next_request(blk_status_t err)
 {
 	unsigned long saved_flags;
 
@@ -896,7 +896,7 @@ static void do_pf_read_start(void)
 			pi_do_claimed(pf_current->pi, do_pf_read_start);
 			return;
 		}
-		next_request(-EIO);
+		next_request(BLK_STS_IOERR);
 		return;
 	}
 	pf_mask = STAT_DRQ;
@@ -915,7 +915,7 @@ static void do_pf_read_drq(void)
 				pi_do_claimed(pf_current->pi, do_pf_read_start);
 				return;
 			}
-			next_request(-EIO);
+			next_request(BLK_STS_IOERR);
 			return;
 		}
 		pi_read_block(pf_current->pi, pf_buf, 512);
@@ -942,7 +942,7 @@ static void do_pf_write_start(void)
 			pi_do_claimed(pf_current->pi, do_pf_write_start);
 			return;
 		}
-		next_request(-EIO);
+		next_request(BLK_STS_IOERR);
 		return;
 	}
 
@@ -955,7 +955,7 @@ static void do_pf_write_start(void)
 				pi_do_claimed(pf_current->pi, do_pf_write_start);
 				return;
 			}
-			next_request(-EIO);
+			next_request(BLK_STS_IOERR);
 			return;
 		}
 		pi_write_block(pf_current->pi, pf_buf, 512);
@@ -975,7 +975,7 @@ static void do_pf_write_done(void)
 			pi_do_claimed(pf_current->pi, do_pf_write_start);
 			return;
 		}
-		next_request(-EIO);
+		next_request(BLK_STS_IOERR);
 		return;
 	}
 	pi_disconnect(pf_current->pi);
diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c
index a809e3e9feb8..075662f2cf46 100644
--- a/drivers/block/ps3disk.c
+++ b/drivers/block/ps3disk.c
@@ -158,7 +158,7 @@ static int ps3disk_submit_request_sg(struct ps3_storage_device *dev,
 	if (res) {
 		dev_err(&dev->sbd.core, "%s:%u: %s failed %d\n", __func__,
 			__LINE__, op, res);
-		__blk_end_request_all(req, -EIO);
+		__blk_end_request_all(req, BLK_STS_IOERR);
 		return 0;
 	}
 
@@ -180,7 +180,7 @@ static int ps3disk_submit_flush_request(struct ps3_storage_device *dev,
 	if (res) {
 		dev_err(&dev->sbd.core, "%s:%u: sync cache failed 0x%llx\n",
 			__func__, __LINE__, res);
-		__blk_end_request_all(req, -EIO);
+		__blk_end_request_all(req, BLK_STS_IOERR);
 		return 0;
 	}
 
@@ -208,7 +208,7 @@ static void ps3disk_do_request(struct ps3_storage_device *dev,
 			break;
 		default:
 			blk_dump_rq_flags(req, DEVICE_NAME " bad request");
-			__blk_end_request_all(req, -EIO);
+			__blk_end_request_all(req, BLK_STS_IOERR);
 		}
 	}
 }
@@ -231,7 +231,8 @@ static irqreturn_t ps3disk_interrupt(int irq, void *data)
 	struct ps3_storage_device *dev = data;
 	struct ps3disk_private *priv;
 	struct request *req;
-	int res, read, error;
+	int res, read;
+	blk_status_t error;
 	u64 tag, status;
 	const char *op;
 
@@ -269,7 +270,7 @@ static irqreturn_t ps3disk_interrupt(int irq, void *data)
 	if (status) {
 		dev_dbg(&dev->sbd.core, "%s:%u: %s failed 0x%llx\n", __func__,
 			__LINE__, op, status);
-		error = -EIO;
+		error = BLK_STS_IOERR;
 	} else {
 		dev_dbg(&dev->sbd.core, "%s:%u: %s completed\n", __func__,
 			__LINE__, op);
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 454bf9c34882..3e8b43d792c2 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -2293,11 +2293,13 @@ static bool rbd_img_obj_end_request(struct rbd_obj_request *obj_request)
 		rbd_assert(img_request->obj_request != NULL);
 		more = obj_request->which < img_request->obj_request_count - 1;
 	} else {
+		blk_status_t status = errno_to_blk_status(result);
+
 		rbd_assert(img_request->rq != NULL);
 
-		more = blk_update_request(img_request->rq, result, xferred);
+		more = blk_update_request(img_request->rq, status, xferred);
 		if (!more)
-			__blk_mq_end_request(img_request->rq, result);
+			__blk_mq_end_request(img_request->rq, status);
 	}
 
 	return more;
@@ -4149,7 +4151,7 @@ err_rq:
 			 obj_op_name(op_type), length, offset, result);
 	ceph_put_snap_context(snapc);
 err:
-	blk_mq_end_request(rq, result);
+	blk_mq_end_request(rq, errno_to_blk_status(result));
 }
 
 static int rbd_queue_rq(struct blk_mq_hw_ctx *hctx,
diff --git a/drivers/block/skd_main.c b/drivers/block/skd_main.c
index 27833e4dae2a..e6c526861703 100644
--- a/drivers/block/skd_main.c
+++ b/drivers/block/skd_main.c
@@ -451,8 +451,8 @@ static void skd_send_special_fitmsg(struct skd_device *skdev,
 				    struct skd_special_context *skspcl);
 static void skd_request_fn(struct request_queue *rq);
 static void skd_end_request(struct skd_device *skdev,
-			    struct skd_request_context *skreq, int error);
-static int skd_preop_sg_list(struct skd_device *skdev,
+		struct skd_request_context *skreq, blk_status_t status);
+static bool skd_preop_sg_list(struct skd_device *skdev,
 			     struct skd_request_context *skreq);
 static void skd_postop_sg_list(struct skd_device *skdev,
 			       struct skd_request_context *skreq);
@@ -491,7 +491,7 @@ static void skd_fail_all_pending(struct skd_device *skdev)
 		if (req == NULL)
 			break;
 		blk_start_request(req);
-		__blk_end_request_all(req, -EIO);
+		__blk_end_request_all(req, BLK_STS_IOERR);
 	}
 }
 
@@ -545,7 +545,6 @@ static void skd_request_fn(struct request_queue *q)
 	struct request *req = NULL;
 	struct skd_scsi_request *scsi_req;
 	unsigned long io_flags;
-	int error;
 	u32 lba;
 	u32 count;
 	int data_dir;
@@ -716,9 +715,7 @@ static void skd_request_fn(struct request_queue *q)
 		if (!req->bio)
 			goto skip_sg;
 
-		error = skd_preop_sg_list(skdev, skreq);
-
-		if (error != 0) {
+		if (!skd_preop_sg_list(skdev, skreq)) {
 			/*
 			 * Complete the native request with error.
 			 * Note that the request context is still at the
@@ -730,7 +727,7 @@ static void skd_request_fn(struct request_queue *q)
 			 */
 			pr_debug("%s:%s:%d error Out\n",
 				 skdev->name, __func__, __LINE__);
-			skd_end_request(skdev, skreq, error);
+			skd_end_request(skdev, skreq, BLK_STS_RESOURCE);
 			continue;
 		}
 
@@ -805,7 +802,7 @@ skip_sg:
 }
 
 static void skd_end_request(struct skd_device *skdev,
-			    struct skd_request_context *skreq, int error)
+		struct skd_request_context *skreq, blk_status_t error)
 {
 	if (unlikely(error)) {
 		struct request *req = skreq->req;
@@ -822,7 +819,7 @@ static void skd_end_request(struct skd_device *skdev,
 	__blk_end_request_all(skreq->req, error);
 }
 
-static int skd_preop_sg_list(struct skd_device *skdev,
+static bool skd_preop_sg_list(struct skd_device *skdev,
 			     struct skd_request_context *skreq)
 {
 	struct request *req = skreq->req;
@@ -839,7 +836,7 @@ static int skd_preop_sg_list(struct skd_device *skdev,
 
 	n_sg = blk_rq_map_sg(skdev->queue, req, sg);
 	if (n_sg <= 0)
-		return -EINVAL;
+		return false;
 
 	/*
 	 * Map scatterlist to PCI bus addresses.
@@ -847,7 +844,7 @@ static int skd_preop_sg_list(struct skd_device *skdev,
 	 */
 	n_sg = pci_map_sg(skdev->pdev, sg, n_sg, pci_dir);
 	if (n_sg <= 0)
-		return -EINVAL;
+		return false;
 
 	SKD_ASSERT(n_sg <= skdev->sgs_per_request);
 
@@ -882,7 +879,7 @@ static int skd_preop_sg_list(struct skd_device *skdev,
 		}
 	}
 
-	return 0;
+	return true;
 }
 
 static void skd_postop_sg_list(struct skd_device *skdev,
@@ -2333,7 +2330,7 @@ static void skd_resolve_req_exception(struct skd_device *skdev,
 	switch (skd_check_status(skdev, cmp_status, &skreq->err_info)) {
 	case SKD_CHECK_STATUS_REPORT_GOOD:
 	case SKD_CHECK_STATUS_REPORT_SMART_ALERT:
-		skd_end_request(skdev, skreq, 0);
+		skd_end_request(skdev, skreq, BLK_STS_OK);
 		break;
 
 	case SKD_CHECK_STATUS_BUSY_IMMINENT:
@@ -2355,7 +2352,7 @@ static void skd_resolve_req_exception(struct skd_device *skdev,
 
 	case SKD_CHECK_STATUS_REPORT_ERROR:
 	default:
-		skd_end_request(skdev, skreq, -EIO);
+		skd_end_request(skdev, skreq, BLK_STS_IOERR);
 		break;
 	}
 }
@@ -2748,7 +2745,7 @@ static int skd_isr_completion_posted(struct skd_device *skdev,
 			 * native request.
 			 */
 			if (likely(cmp_status == SAM_STAT_GOOD))
-				skd_end_request(skdev, skreq, 0);
+				skd_end_request(skdev, skreq, BLK_STS_OK);
 			else
 				skd_resolve_req_exception(skdev, skreq);
 		}
@@ -3190,7 +3187,7 @@ static void skd_recover_requests(struct skd_device *skdev, int requeue)
 			    SKD_MAX_RETRIES)
 				blk_requeue_request(skdev->queue, skreq->req);
 			else
-				skd_end_request(skdev, skreq, -EIO);
+				skd_end_request(skdev, skreq, BLK_STS_IOERR);
 
 			skreq->req = NULL;
 
diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c
index 3f3a3ab3d50a..6b16ead1da58 100644
--- a/drivers/block/sunvdc.c
+++ b/drivers/block/sunvdc.c
@@ -316,7 +316,7 @@ static void vdc_end_one(struct vdc_port *port, struct vio_dring_state *dr,
 
 	rqe->req = NULL;
 
-	__blk_end_request(req, (desc->status ? -EIO : 0), desc->size);
+	__blk_end_request(req, (desc->status ? BLK_STS_IOERR : 0), desc->size);
 
 	vdc_blk_queue_start(port);
 }
@@ -1023,7 +1023,7 @@ static void vdc_queue_drain(struct vdc_port *port)
 	struct request *req;
 
 	while ((req = blk_fetch_request(port->disk->queue)) != NULL)
-		__blk_end_request_all(req, -EIO);
+		__blk_end_request_all(req, BLK_STS_IOERR);
 }
 
 static void vdc_ldc_reset_timer(unsigned long _arg)
diff --git a/drivers/block/swim.c b/drivers/block/swim.c
index 3064be6cf375..1633aaf24060 100644
--- a/drivers/block/swim.c
+++ b/drivers/block/swim.c
@@ -493,7 +493,7 @@ static inline int swim_read_sector(struct floppy_state *fs,
 	return ret;
 }
 
-static int floppy_read_sectors(struct floppy_state *fs,
+static blk_status_t floppy_read_sectors(struct floppy_state *fs,
 			       int req_sector, int sectors_nb,
 			       unsigned char *buffer)
 {
@@ -516,7 +516,7 @@ static int floppy_read_sectors(struct floppy_state *fs,
 			ret = swim_read_sector(fs, side, track, sector,
 						buffer);
 			if (try-- == 0)
-				return -EIO;
+				return BLK_STS_IOERR;
 		} while (ret != 512);
 
 		buffer += ret;
@@ -553,7 +553,7 @@ static void do_fd_request(struct request_queue *q)
 
 	req = swim_next_request(swd);
 	while (req) {
-		int err = -EIO;
+		blk_status_t err = BLK_STS_IOERR;
 
 		fs = req->rq_disk->private_data;
 		if (blk_rq_pos(req) >= fs->total_secs)
diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c
index ba4809c9bdba..c7953860ce91 100644
--- a/drivers/block/swim3.c
+++ b/drivers/block/swim3.c
@@ -257,7 +257,7 @@ static unsigned int floppy_check_events(struct gendisk *disk,
 					unsigned int clearing);
 static int floppy_revalidate(struct gendisk *disk);
 
-static bool swim3_end_request(struct floppy_state *fs, int err, unsigned int nr_bytes)
+static bool swim3_end_request(struct floppy_state *fs, blk_status_t err, unsigned int nr_bytes)
 {
 	struct request *req = fs->cur_req;
 	int rc;
@@ -334,7 +334,7 @@ static void start_request(struct floppy_state *fs)
 		if (fs->mdev->media_bay &&
 		    check_media_bay(fs->mdev->media_bay) != MB_FD) {
 			swim3_dbg("%s", "  media bay absent, dropping req\n");
-			swim3_end_request(fs, -ENODEV, 0);
+			swim3_end_request(fs, BLK_STS_IOERR, 0);
 			continue;
 		}
 
@@ -350,12 +350,12 @@ static void start_request(struct floppy_state *fs)
 		if (blk_rq_pos(req) >= fs->total_secs) {
 			swim3_dbg("  pos out of bounds (%ld, max is %ld)\n",
 				  (long)blk_rq_pos(req), (long)fs->total_secs);
-			swim3_end_request(fs, -EIO, 0);
+			swim3_end_request(fs, BLK_STS_IOERR, 0);
 			continue;
 		}
 		if (fs->ejected) {
 			swim3_dbg("%s", "  disk ejected\n");
-			swim3_end_request(fs, -EIO, 0);
+			swim3_end_request(fs, BLK_STS_IOERR, 0);
 			continue;
 		}
 
@@ -364,7 +364,7 @@ static void start_request(struct floppy_state *fs)
 				fs->write_prot = swim3_readbit(fs, WRITE_PROT);
 			if (fs->write_prot) {
 				swim3_dbg("%s", "  try to write, disk write protected\n");
-				swim3_end_request(fs, -EIO, 0);
+				swim3_end_request(fs, BLK_STS_IOERR, 0);
 				continue;
 			}
 		}
@@ -548,7 +548,7 @@ static void act(struct floppy_state *fs)
 				if (fs->retries > 5) {
 					swim3_err("Wrong cylinder in transfer, want: %d got %d\n",
 						  fs->req_cyl, fs->cur_cyl);
-					swim3_end_request(fs, -EIO, 0);
+					swim3_end_request(fs, BLK_STS_IOERR, 0);
 					fs->state = idle;
 					return;
 				}
@@ -584,7 +584,7 @@ static void scan_timeout(unsigned long data)
 	out_8(&sw->intr_enable, 0);
 	fs->cur_cyl = -1;
 	if (fs->retries > 5) {
-		swim3_end_request(fs, -EIO, 0);
+		swim3_end_request(fs, BLK_STS_IOERR, 0);
 		fs->state = idle;
 		start_request(fs);
 	} else {
@@ -608,7 +608,7 @@ static void seek_timeout(unsigned long data)
 	out_8(&sw->select, RELAX);
 	out_8(&sw->intr_enable, 0);
 	swim3_err("%s", "Seek timeout\n");
-	swim3_end_request(fs, -EIO, 0);
+	swim3_end_request(fs, BLK_STS_IOERR, 0);
 	fs->state = idle;
 	start_request(fs);
 	spin_unlock_irqrestore(&swim3_lock, flags);
@@ -637,7 +637,7 @@ static void settle_timeout(unsigned long data)
 		goto unlock;
 	}
 	swim3_err("%s", "Seek settle timeout\n");
-	swim3_end_request(fs, -EIO, 0);
+	swim3_end_request(fs, BLK_STS_IOERR, 0);
 	fs->state = idle;
 	start_request(fs);
  unlock:
@@ -666,7 +666,7 @@ static void xfer_timeout(unsigned long data)
 	swim3_err("Timeout %sing sector %ld\n",
 	       (rq_data_dir(fs->cur_req)==WRITE? "writ": "read"),
 	       (long)blk_rq_pos(fs->cur_req));
-	swim3_end_request(fs, -EIO, 0);
+	swim3_end_request(fs, BLK_STS_IOERR, 0);
 	fs->state = idle;
 	start_request(fs);
 	spin_unlock_irqrestore(&swim3_lock, flags);
@@ -703,7 +703,7 @@ static irqreturn_t swim3_interrupt(int irq, void *dev_id)
 				swim3_err("%s", "Seen sector but cyl=ff?\n");
 				fs->cur_cyl = -1;
 				if (fs->retries > 5) {
-					swim3_end_request(fs, -EIO, 0);
+					swim3_end_request(fs, BLK_STS_IOERR, 0);
 					fs->state = idle;
 					start_request(fs);
 				} else {
@@ -786,7 +786,7 @@ static irqreturn_t swim3_interrupt(int irq, void *dev_id)
 				swim3_err("Error %sing block %ld (err=%x)\n",
 				       rq_data_dir(req) == WRITE? "writ": "read",
 				       (long)blk_rq_pos(req), err);
-				swim3_end_request(fs, -EIO, 0);
+				swim3_end_request(fs, BLK_STS_IOERR, 0);
 				fs->state = idle;
 			}
 		} else {
@@ -795,7 +795,7 @@ static irqreturn_t swim3_interrupt(int irq, void *dev_id)
 				swim3_err("fd dma error: stat=%x resid=%d\n", stat, resid);
 				swim3_err("  state=%d, dir=%x, intr=%x, err=%x\n",
 					  fs->state, rq_data_dir(req), intr, err);
-				swim3_end_request(fs, -EIO, 0);
+				swim3_end_request(fs, BLK_STS_IOERR, 0);
 				fs->state = idle;
 				start_request(fs);
 				break;
diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c
index c8e072caf56f..08586dc14e85 100644
--- a/drivers/block/sx8.c
+++ b/drivers/block/sx8.c
@@ -745,7 +745,7 @@ static unsigned int carm_fill_get_fw_ver(struct carm_host *host,
 
 static inline void carm_end_request_queued(struct carm_host *host,
 					   struct carm_request *crq,
-					   int error)
+					   blk_status_t error)
 {
 	struct request *req = crq->rq;
 	int rc;
@@ -791,7 +791,7 @@ static inline void carm_round_robin(struct carm_host *host)
 }
 
 static inline void carm_end_rq(struct carm_host *host, struct carm_request *crq,
-			       int error)
+			       blk_status_t error)
 {
 	carm_end_request_queued(host, crq, error);
 	if (max_queue == 1)
@@ -869,14 +869,14 @@ queue_one_request:
 	sg = &crq->sg[0];
 	n_elem = blk_rq_map_sg(q, rq, sg);
 	if (n_elem <= 0) {
-		carm_end_rq(host, crq, -EIO);
+		carm_end_rq(host, crq, BLK_STS_IOERR);
 		return;		/* request with no s/g entries? */
 	}
 
 	/* map scatterlist to PCI bus addresses */
 	n_elem = pci_map_sg(host->pdev, sg, n_elem, pci_dir);
 	if (n_elem <= 0) {
-		carm_end_rq(host, crq, -EIO);
+		carm_end_rq(host, crq, BLK_STS_IOERR);
 		return;		/* request with no s/g entries? */
 	}
 	crq->n_elem = n_elem;
@@ -937,7 +937,7 @@ queue_one_request:
 
 static void carm_handle_array_info(struct carm_host *host,
 				   struct carm_request *crq, u8 *mem,
-				   int error)
+				   blk_status_t error)
 {
 	struct carm_port *port;
 	u8 *msg_data = mem + sizeof(struct carm_array_info);
@@ -997,7 +997,7 @@ out:
 
 static void carm_handle_scan_chan(struct carm_host *host,
 				  struct carm_request *crq, u8 *mem,
-				  int error)
+				  blk_status_t error)
 {
 	u8 *msg_data = mem + IOC_SCAN_CHAN_OFFSET;
 	unsigned int i, dev_count = 0;
@@ -1029,7 +1029,7 @@ out:
 }
 
 static void carm_handle_generic(struct carm_host *host,
-				struct carm_request *crq, int error,
+				struct carm_request *crq, blk_status_t error,
 				int cur_state, int next_state)
 {
 	DPRINTK("ENTER\n");
@@ -1045,7 +1045,7 @@ static void carm_handle_generic(struct carm_host *host,
 }
 
 static inline void carm_handle_rw(struct carm_host *host,
-				  struct carm_request *crq, int error)
+				  struct carm_request *crq, blk_status_t error)
 {
 	int pci_dir;
 
@@ -1067,7 +1067,7 @@ static inline void carm_handle_resp(struct carm_host *host,
 	u32 handle = le32_to_cpu(ret_handle_le);
 	unsigned int msg_idx;
 	struct carm_request *crq;
-	int error = (status == RMSG_OK) ? 0 : -EIO;
+	blk_status_t error = (status == RMSG_OK) ? 0 : BLK_STS_IOERR;
 	u8 *mem;
 
 	VPRINTK("ENTER, handle == 0x%x\n", handle);
@@ -1155,7 +1155,7 @@ static inline void carm_handle_resp(struct carm_host *host,
 err_out:
 	printk(KERN_WARNING DRV_NAME "(%s): BUG: unhandled message type %d/%d\n",
 	       pci_name(host->pdev), crq->msg_type, crq->msg_subtype);
-	carm_end_rq(host, crq, -EIO);
+	carm_end_rq(host, crq, BLK_STS_IOERR);
 }
 
 static inline void carm_handle_responses(struct carm_host *host)
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 553cc4c542b4..205b74d70efc 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -64,15 +64,15 @@ struct virtblk_req {
 	struct scatterlist sg[];
 };
 
-static inline int virtblk_result(struct virtblk_req *vbr)
+static inline blk_status_t virtblk_result(struct virtblk_req *vbr)
 {
 	switch (vbr->status) {
 	case VIRTIO_BLK_S_OK:
-		return 0;
+		return BLK_STS_OK;
 	case VIRTIO_BLK_S_UNSUPP:
-		return -ENOTTY;
+		return BLK_STS_NOTSUPP;
 	default:
-		return -EIO;
+		return BLK_STS_IOERR;
 	}
 }
 
@@ -307,7 +307,7 @@ static int virtblk_get_id(struct gendisk *disk, char *id_str)
 		goto out;
 
 	blk_execute_rq(vblk->disk->queue, vblk->disk, req, false);
-	err = virtblk_result(blk_mq_rq_to_pdu(req));
+	err = blk_status_to_errno(virtblk_result(blk_mq_rq_to_pdu(req)));
 out:
 	blk_put_request(req);
 	return err;
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 39459631667c..aedc3c759273 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -1601,14 +1601,18 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
 			continue;
 		}
 
-		blkif_req(req)->error = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO;
+		if (bret->status == BLKIF_RSP_OKAY)
+			blkif_req(req)->error = BLK_STS_OK;
+		else
+			blkif_req(req)->error = BLK_STS_IOERR;
+
 		switch (bret->operation) {
 		case BLKIF_OP_DISCARD:
 			if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
 				struct request_queue *rq = info->rq;
 				printk(KERN_WARNING "blkfront: %s: %s op failed\n",
 					   info->gd->disk_name, op_name(bret->operation));
-				blkif_req(req)->error = -EOPNOTSUPP;
+				blkif_req(req)->error = BLK_STS_NOTSUPP;
 				info->feature_discard = 0;
 				info->feature_secdiscard = 0;
 				queue_flag_clear(QUEUE_FLAG_DISCARD, rq);
@@ -1626,11 +1630,11 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
 				     rinfo->shadow[id].req.u.rw.nr_segments == 0)) {
 				printk(KERN_WARNING "blkfront: %s: empty %s op failed\n",
 				       info->gd->disk_name, op_name(bret->operation));
-				blkif_req(req)->error = -EOPNOTSUPP;
+				blkif_req(req)->error = BLK_STS_NOTSUPP;
 			}
 			if (unlikely(blkif_req(req)->error)) {
-				if (blkif_req(req)->error == -EOPNOTSUPP)
-					blkif_req(req)->error = 0;
+				if (blkif_req(req)->error == BLK_STS_NOTSUPP)
+					blkif_req(req)->error = BLK_STS_OK;
 				info->feature_fua = 0;
 				info->feature_flush = 0;
 				xlvbd_flush(info);
@@ -2137,7 +2141,7 @@ static int blkfront_resume(struct xenbus_device *dev)
 			merge_bio.tail = shadow[j].request->biotail;
 			bio_list_merge(&info->bio_list, &merge_bio);
 			shadow[j].request->bio = NULL;
-			blk_mq_end_request(shadow[j].request, 0);
+			blk_mq_end_request(shadow[j].request, BLK_STS_OK);
 		}
 	}
 
diff --git a/drivers/block/xsysace.c b/drivers/block/xsysace.c
index 757dce2147e0..977fdf066017 100644
--- a/drivers/block/xsysace.c
+++ b/drivers/block/xsysace.c
@@ -471,7 +471,7 @@ static struct request *ace_get_next_request(struct request_queue *q)
 		if (!blk_rq_is_passthrough(req))
 			break;
 		blk_start_request(req);
-		__blk_end_request_all(req, -EIO);
+		__blk_end_request_all(req, BLK_STS_IOERR);
 	}
 	return req;
 }
@@ -499,11 +499,11 @@ static void ace_fsm_dostate(struct ace_device *ace)
 
 		/* Drop all in-flight and pending requests */
 		if (ace->req) {
-			__blk_end_request_all(ace->req, -EIO);
+			__blk_end_request_all(ace->req, BLK_STS_IOERR);
 			ace->req = NULL;
 		}
 		while ((req = blk_fetch_request(ace->queue)) != NULL)
-			__blk_end_request_all(req, -EIO);
+			__blk_end_request_all(req, BLK_STS_IOERR);
 
 		/* Drop back to IDLE state and notify waiters */
 		ace->fsm_state = ACE_FSM_STATE_IDLE;
@@ -728,7 +728,7 @@ static void ace_fsm_dostate(struct ace_device *ace)
 		}
 
 		/* bio finished; is there another one? */
-		if (__blk_end_request_cur(ace->req, 0)) {
+		if (__blk_end_request_cur(ace->req, BLK_STS_OK)) {
 			/* dev_dbg(ace->dev, "next block; h=%u c=%u\n",
 			 *      blk_rq_sectors(ace->req),
 			 *      blk_rq_cur_sectors(ace->req));
diff --git a/drivers/block/z2ram.c b/drivers/block/z2ram.c
index 968f9e52effa..41c95c9b2ab4 100644
--- a/drivers/block/z2ram.c
+++ b/drivers/block/z2ram.c
@@ -74,14 +74,14 @@ static void do_z2_request(struct request_queue *q)
 	while (req) {
 		unsigned long start = blk_rq_pos(req) << 9;
 		unsigned long len  = blk_rq_cur_bytes(req);
-		int err = 0;
+		blk_status_t err = BLK_STS_OK;
 
 		if (start + len > z2ram_size) {
 			pr_err(DEVICE_NAME ": bad access: block=%llu, "
 			       "count=%u\n",
 			       (unsigned long long)blk_rq_pos(req),
 			       blk_rq_cur_sectors(req));
-			err = -EIO;
+			err = BLK_STS_IOERR;
 			goto done;
 		}
 		while (len) {
diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c
index 1372763a948f..53f8278e66f7 100644
--- a/drivers/cdrom/gdrom.c
+++ b/drivers/cdrom/gdrom.c
@@ -583,7 +583,8 @@ static int gdrom_set_interrupt_handlers(void)
  */
 static void gdrom_readdisk_dma(struct work_struct *work)
 {
-	int err, block, block_cnt;
+	int block, block_cnt;
+	blk_status_t err;
 	struct packet_command *read_command;
 	struct list_head *elem, *next;
 	struct request *req;
@@ -641,7 +642,7 @@ static void gdrom_readdisk_dma(struct work_struct *work)
 		__raw_writeb(1, GDROM_DMA_STATUS_REG);
 		wait_event_interruptible_timeout(request_queue,
 			gd.transfer == 0, GDROM_DEFAULT_TIMEOUT);
-		err = gd.transfer ? -EIO : 0;
+		err = gd.transfer ? BLK_STS_IOERR : BLK_STS_OK;
 		gd.transfer = 0;
 		gd.pending = 0;
 		/* now seek to take the request spinlock
@@ -670,11 +671,11 @@ static void gdrom_request(struct request_queue *rq)
 			break;
 		case REQ_OP_WRITE:
 			pr_notice("Read only device - write request ignored\n");
-			__blk_end_request_all(req, -EIO);
+			__blk_end_request_all(req, BLK_STS_IOERR);
 			break;
 		default:
 			printk(KERN_DEBUG "gdrom: Non-fs request ignored\n");
-			__blk_end_request_all(req, -EIO);
+			__blk_end_request_all(req, BLK_STS_IOERR);
 			break;
 		}
 	}
diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index 5901937284e7..d7a49dcfa85e 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -273,7 +273,7 @@ void ide_retry_pc(ide_drive_t *drive)
 	ide_requeue_and_plug(drive, failed_rq);
 	if (ide_queue_sense_rq(drive, pc)) {
 		blk_start_request(failed_rq);
-		ide_complete_rq(drive, -EIO, blk_rq_bytes(failed_rq));
+		ide_complete_rq(drive, BLK_STS_IOERR, blk_rq_bytes(failed_rq));
 	}
 }
 EXPORT_SYMBOL_GPL(ide_retry_pc);
@@ -437,7 +437,8 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
 
 	/* No more interrupts */
 	if ((stat & ATA_DRQ) == 0) {
-		int uptodate, error;
+		int uptodate;
+		blk_status_t error;
 
 		debug_log("Packet command completed, %d bytes transferred\n",
 			  blk_rq_bytes(rq));
@@ -490,7 +491,7 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
 
 		if (ata_misc_request(rq)) {
 			scsi_req(rq)->result = 0;
-			error = 0;
+			error = BLK_STS_OK;
 		} else {
 
 			if (blk_rq_is_passthrough(rq) && uptodate <= 0) {
@@ -498,7 +499,7 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
 					scsi_req(rq)->result = -EIO;
 			}
 
-			error = uptodate ? 0 : -EIO;
+			error = uptodate ? BLK_STS_OK : BLK_STS_IOERR;
 		}
 
 		ide_complete_rq(drive, error, blk_rq_bytes(rq));
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 07e5ff3a64c3..d55e44ed82b5 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -228,7 +228,7 @@ static void ide_cd_complete_failed_rq(ide_drive_t *drive, struct request *rq)
 		scsi_req(failed)->sense_len = scsi_req(rq)->sense_len;
 		cdrom_analyze_sense_data(drive, failed);
 
-		if (ide_end_rq(drive, failed, -EIO, blk_rq_bytes(failed)))
+		if (ide_end_rq(drive, failed, BLK_STS_IOERR, blk_rq_bytes(failed)))
 			BUG();
 	} else
 		cdrom_analyze_sense_data(drive, NULL);
@@ -508,7 +508,7 @@ static bool ide_cd_error_cmd(ide_drive_t *drive, struct ide_cmd *cmd)
 		nr_bytes -= cmd->last_xfer_len;
 
 	if (nr_bytes > 0) {
-		ide_complete_rq(drive, 0, nr_bytes);
+		ide_complete_rq(drive, BLK_STS_OK, nr_bytes);
 		return true;
 	}
 
@@ -674,7 +674,7 @@ static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive)
 out_end:
 	if (blk_rq_is_scsi(rq) && rc == 0) {
 		scsi_req(rq)->resid_len = 0;
-		blk_end_request_all(rq, 0);
+		blk_end_request_all(rq, BLK_STS_OK);
 		hwif->rq = NULL;
 	} else {
 		if (sense && uptodate)
@@ -699,7 +699,7 @@ out_end:
 				scsi_req(rq)->resid_len += cmd->last_xfer_len;
 		}
 
-		ide_complete_rq(drive, uptodate ? 0 : -EIO, blk_rq_bytes(rq));
+		ide_complete_rq(drive, uptodate ? BLK_STS_OK : BLK_STS_IOERR, blk_rq_bytes(rq));
 
 		if (sense && rc == 2)
 			ide_error(drive, "request sense failure", stat);
@@ -844,7 +844,7 @@ out_end:
 	if (nsectors == 0)
 		nsectors = 1;
 
-	ide_complete_rq(drive, uptodate ? 0 : -EIO, nsectors << 9);
+	ide_complete_rq(drive, uptodate ? BLK_STS_OK : BLK_STS_IOERR, nsectors << 9);
 
 	return ide_stopped;
 }
diff --git a/drivers/ide/ide-dma.c b/drivers/ide/ide-dma.c
index 51c81223e56d..54d4d78ca46a 100644
--- a/drivers/ide/ide-dma.c
+++ b/drivers/ide/ide-dma.c
@@ -104,7 +104,7 @@ ide_startstop_t ide_dma_intr(ide_drive_t *drive)
 			if ((cmd->tf_flags & IDE_TFLAG_FS) == 0)
 				ide_finish_cmd(drive, cmd, stat);
 			else
-				ide_complete_rq(drive, 0,
+				ide_complete_rq(drive, BLK_STS_OK,
 						blk_rq_sectors(cmd->rq) << 9);
 			return ide_stopped;
 		}
diff --git a/drivers/ide/ide-eh.c b/drivers/ide/ide-eh.c
index 4b7ffd7d158d..47d5f3379748 100644
--- a/drivers/ide/ide-eh.c
+++ b/drivers/ide/ide-eh.c
@@ -135,7 +135,7 @@ ide_startstop_t ide_error(ide_drive_t *drive, const char *msg, u8 stat)
 			return ide_stopped;
 		}
 		scsi_req(rq)->result = err;
-		ide_complete_rq(drive, err ? -EIO : 0, blk_rq_bytes(rq));
+		ide_complete_rq(drive, err ? BLK_STS_IOERR : BLK_STS_OK, blk_rq_bytes(rq));
 		return ide_stopped;
 	}
 
@@ -143,7 +143,7 @@ ide_startstop_t ide_error(ide_drive_t *drive, const char *msg, u8 stat)
 }
 EXPORT_SYMBOL_GPL(ide_error);
 
-static inline void ide_complete_drive_reset(ide_drive_t *drive, int err)
+static inline void ide_complete_drive_reset(ide_drive_t *drive, blk_status_t err)
 {
 	struct request *rq = drive->hwif->rq;
 
@@ -151,7 +151,7 @@ static inline void ide_complete_drive_reset(ide_drive_t *drive, int err)
 	    scsi_req(rq)->cmd[0] == REQ_DRIVE_RESET) {
 		if (err <= 0 && scsi_req(rq)->result == 0)
 			scsi_req(rq)->result = -EIO;
-		ide_complete_rq(drive, err ? err : 0, blk_rq_bytes(rq));
+		ide_complete_rq(drive, err, blk_rq_bytes(rq));
 	}
 }
 
@@ -191,7 +191,7 @@ static ide_startstop_t atapi_reset_pollfunc(ide_drive_t *drive)
 	}
 	/* done polling */
 	hwif->polling = 0;
-	ide_complete_drive_reset(drive, 0);
+	ide_complete_drive_reset(drive, BLK_STS_OK);
 	return ide_stopped;
 }
 
@@ -225,7 +225,7 @@ static ide_startstop_t reset_pollfunc(ide_drive_t *drive)
 	ide_hwif_t *hwif = drive->hwif;
 	const struct ide_port_ops *port_ops = hwif->port_ops;
 	u8 tmp;
-	int err = 0;
+	blk_status_t err = BLK_STS_OK;
 
 	if (port_ops && port_ops->reset_poll) {
 		err = port_ops->reset_poll(drive);
@@ -247,7 +247,7 @@ static ide_startstop_t reset_pollfunc(ide_drive_t *drive)
 		printk(KERN_ERR "%s: reset timed-out, status=0x%02x\n",
 			hwif->name, tmp);
 		drive->failures++;
-		err = -EIO;
+		err = BLK_STS_IOERR;
 	} else  {
 		tmp = ide_read_error(drive);
 
@@ -257,7 +257,7 @@ static ide_startstop_t reset_pollfunc(ide_drive_t *drive)
 		} else {
 			ide_reset_report_error(hwif, tmp);
 			drive->failures++;
-			err = -EIO;
+			err = BLK_STS_IOERR;
 		}
 	}
 out:
@@ -392,7 +392,7 @@ static ide_startstop_t do_reset1(ide_drive_t *drive, int do_not_try_atapi)
 
 	if (io_ports->ctl_addr == 0) {
 		spin_unlock_irqrestore(&hwif->lock, flags);
-		ide_complete_drive_reset(drive, -ENXIO);
+		ide_complete_drive_reset(drive, BLK_STS_IOERR);
 		return ide_stopped;
 	}
 
diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index 8ac6048cd2df..627b1f62a749 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -143,7 +143,7 @@ static ide_startstop_t ide_floppy_issue_pc(ide_drive_t *drive,
 
 		drive->failed_pc = NULL;
 		drive->pc_callback(drive, 0);
-		ide_complete_rq(drive, -EIO, done);
+		ide_complete_rq(drive, BLK_STS_IOERR, done);
 		return ide_stopped;
 	}
 
@@ -248,7 +248,7 @@ static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive,
 
 		if (ata_misc_request(rq)) {
 			scsi_req(rq)->result = 0;
-			ide_complete_rq(drive, 0, blk_rq_bytes(rq));
+			ide_complete_rq(drive, BLK_STS_OK, blk_rq_bytes(rq));
 			return ide_stopped;
 		} else
 			goto out_end;
@@ -303,7 +303,7 @@ out_end:
 	drive->failed_pc = NULL;
 	if (blk_rq_is_passthrough(rq) && scsi_req(rq)->result == 0)
 		scsi_req(rq)->result = -EIO;
-	ide_complete_rq(drive, -EIO, blk_rq_bytes(rq));
+	ide_complete_rq(drive, BLK_STS_IOERR, blk_rq_bytes(rq));
 	return ide_stopped;
 }
 
diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index 323af721f8cb..3a234701d92c 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -54,7 +54,7 @@
 #include <linux/uaccess.h>
 #include <asm/io.h>
 
-int ide_end_rq(ide_drive_t *drive, struct request *rq, int error,
+int ide_end_rq(ide_drive_t *drive, struct request *rq, blk_status_t error,
 	       unsigned int nr_bytes)
 {
 	/*
@@ -112,7 +112,7 @@ void ide_complete_cmd(ide_drive_t *drive, struct ide_cmd *cmd, u8 stat, u8 err)
 	}
 }
 
-int ide_complete_rq(ide_drive_t *drive, int error, unsigned int nr_bytes)
+int ide_complete_rq(ide_drive_t *drive, blk_status_t error, unsigned int nr_bytes)
 {
 	ide_hwif_t *hwif = drive->hwif;
 	struct request *rq = hwif->rq;
@@ -122,7 +122,7 @@ int ide_complete_rq(ide_drive_t *drive, int error, unsigned int nr_bytes)
 	 * if failfast is set on a request, override number of sectors
 	 * and complete the whole request right now
 	 */
-	if (blk_noretry_request(rq) && error <= 0)
+	if (blk_noretry_request(rq) && error)
 		nr_bytes = blk_rq_sectors(rq) << 9;
 
 	rc = ide_end_rq(drive, rq, error, nr_bytes);
@@ -149,7 +149,7 @@ void ide_kill_rq(ide_drive_t *drive, struct request *rq)
 			scsi_req(rq)->result = -EIO;
 	}
 
-	ide_complete_rq(drive, -EIO, blk_rq_bytes(rq));
+	ide_complete_rq(drive, BLK_STS_IOERR, blk_rq_bytes(rq));
 }
 
 static void ide_tf_set_specify_cmd(ide_drive_t *drive, struct ide_taskfile *tf)
@@ -272,7 +272,7 @@ static ide_startstop_t execute_drive_cmd (ide_drive_t *drive,
  	printk("%s: DRIVE_CMD (null)\n", drive->name);
 #endif
 	scsi_req(rq)->result = 0;
-	ide_complete_rq(drive, 0, blk_rq_bytes(rq));
+	ide_complete_rq(drive, BLK_STS_OK, blk_rq_bytes(rq));
 
  	return ide_stopped;
 }
diff --git a/drivers/ide/ide-pm.c b/drivers/ide/ide-pm.c
index 0977fc1f40ce..08b54bb3b705 100644
--- a/drivers/ide/ide-pm.c
+++ b/drivers/ide/ide-pm.c
@@ -40,7 +40,7 @@ int generic_ide_suspend(struct device *dev, pm_message_t mesg)
 	return ret;
 }
 
-static void ide_end_sync_rq(struct request *rq, int error)
+static void ide_end_sync_rq(struct request *rq, blk_status_t error)
 {
 	complete(rq->end_io_data);
 }
@@ -57,7 +57,7 @@ static int ide_pm_execute_rq(struct request *rq)
 	if (unlikely(blk_queue_dying(q))) {
 		rq->rq_flags |= RQF_QUIET;
 		scsi_req(rq)->result = -ENXIO;
-		__blk_end_request_all(rq, 0);
+		__blk_end_request_all(rq, BLK_STS_OK);
 		spin_unlock_irq(q->queue_lock);
 		return -ENXIO;
 	}
@@ -235,7 +235,7 @@ void ide_complete_pm_rq(ide_drive_t *drive, struct request *rq)
 
 	drive->hwif->rq = NULL;
 
-	if (blk_end_request(rq, 0, 0))
+	if (blk_end_request(rq, BLK_STS_OK, 0))
 		BUG();
 }
 
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index a0651f948b76..4d062c568777 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -474,7 +474,7 @@ static ide_startstop_t ide_tape_issue_pc(ide_drive_t *drive,
 
 		drive->failed_pc = NULL;
 		drive->pc_callback(drive, 0);
-		ide_complete_rq(drive, -EIO, blk_rq_bytes(rq));
+		ide_complete_rq(drive, BLK_STS_IOERR, blk_rq_bytes(rq));
 		return ide_stopped;
 	}
 	ide_debug_log(IDE_DBG_SENSE, "retry #%d, cmd: 0x%02x", pc->retries,
diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c
index d71199d23c9e..ab1a32cdcb0a 100644
--- a/drivers/ide/ide-taskfile.c
+++ b/drivers/ide/ide-taskfile.c
@@ -318,7 +318,7 @@ static void ide_error_cmd(ide_drive_t *drive, struct ide_cmd *cmd)
 		}
 
 		if (nr_bytes > 0)
-			ide_complete_rq(drive, 0, nr_bytes);
+			ide_complete_rq(drive, BLK_STS_OK, nr_bytes);
 	}
 }
 
@@ -336,7 +336,7 @@ void ide_finish_cmd(ide_drive_t *drive, struct ide_cmd *cmd, u8 stat)
 		ide_driveid_update(drive);
 	}
 
-	ide_complete_rq(drive, err ? -EIO : 0, blk_rq_bytes(rq));
+	ide_complete_rq(drive, err ? BLK_STS_IOERR : BLK_STS_OK, blk_rq_bytes(rq));
 }
 
 /*
@@ -394,7 +394,7 @@ out_end:
 	if ((cmd->tf_flags & IDE_TFLAG_FS) == 0)
 		ide_finish_cmd(drive, cmd, stat);
 	else
-		ide_complete_rq(drive, 0, blk_rq_sectors(cmd->rq) << 9);
+		ide_complete_rq(drive, BLK_STS_OK, blk_rq_sectors(cmd->rq) << 9);
 	return ide_stopped;
 out_err:
 	ide_error_cmd(drive, cmd);
diff --git a/drivers/ide/siimage.c b/drivers/ide/siimage.c
index 6a1849bb476c..57eea5a9047f 100644
--- a/drivers/ide/siimage.c
+++ b/drivers/ide/siimage.c
@@ -406,7 +406,7 @@ static int siimage_dma_test_irq(ide_drive_t *drive)
  *	yet.
  */
 
-static int sil_sata_reset_poll(ide_drive_t *drive)
+static blk_status_t sil_sata_reset_poll(ide_drive_t *drive)
 {
 	ide_hwif_t *hwif = drive->hwif;
 	void __iomem *sata_status_addr
@@ -419,11 +419,11 @@ static int sil_sata_reset_poll(ide_drive_t *drive)
 		if ((sata_stat & 0x03) != 0x03) {
 			printk(KERN_WARNING "%s: reset phy dead, status=0x%08x\n",
 					    hwif->name, sata_stat);
-			return -ENXIO;
+			return BLK_STS_IOERR;
 		}
 	}
 
-	return 0;
+	return BLK_STS_OK;
 }
 
 /**
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index ceeeb495d01c..39262e344ae1 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -1449,22 +1449,15 @@ static void activate_path_work(struct work_struct *work)
 	activate_or_offline_path(pgpath);
 }
 
-static int noretry_error(int error)
+static int noretry_error(blk_status_t error)
 {
 	switch (error) {
-	case -EBADE:
-		/*
-		 * EBADE signals an reservation conflict.
-		 * We shouldn't fail the path here as we can communicate with
-		 * the target.  We should failover to the next path, but in
-		 * doing so we might be causing a ping-pong between paths.
-		 * So just return the reservation conflict error.
-		 */
-	case -EOPNOTSUPP:
-	case -EREMOTEIO:
-	case -EILSEQ:
-	case -ENODATA:
-	case -ENOSPC:
+	case BLK_STS_NOTSUPP:
+	case BLK_STS_NOSPC:
+	case BLK_STS_TARGET:
+	case BLK_STS_NEXUS:
+	case BLK_STS_MEDIUM:
+	case BLK_STS_RESOURCE:
 		return 1;
 	}
 
@@ -1473,7 +1466,7 @@ static int noretry_error(int error)
 }
 
 static int multipath_end_io(struct dm_target *ti, struct request *clone,
-			    int error, union map_info *map_context)
+			    blk_status_t error, union map_info *map_context)
 {
 	struct dm_mpath_io *mpio = get_mpio(map_context);
 	struct pgpath *pgpath = mpio->pgpath;
@@ -1500,7 +1493,7 @@ static int multipath_end_io(struct dm_target *ti, struct request *clone,
 
 		if (atomic_read(&m->nr_valid_paths) == 0 &&
 		    !test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) {
-			if (error == -EIO)
+			if (error == BLK_STS_IOERR)
 				dm_report_EIO(m);
 			/* complete with the original error */
 			r = DM_ENDIO_DONE;
@@ -1525,7 +1518,7 @@ static int multipath_end_io_bio(struct dm_target *ti, struct bio *clone, int *er
 	unsigned long flags;
 	int r = DM_ENDIO_DONE;
 
-	if (!*error || noretry_error(*error))
+	if (!*error || noretry_error(errno_to_blk_status(*error)))
 		goto done;
 
 	if (pgpath)
diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
index b639fa7246ee..bee334389173 100644
--- a/drivers/md/dm-rq.c
+++ b/drivers/md/dm-rq.c
@@ -119,7 +119,7 @@ static void end_clone_bio(struct bio *clone)
 	struct dm_rq_target_io *tio = info->tio;
 	struct bio *bio = info->orig;
 	unsigned int nr_bytes = info->orig->bi_iter.bi_size;
-	int error = clone->bi_error;
+	blk_status_t error = errno_to_blk_status(clone->bi_error);
 
 	bio_put(clone);
 
@@ -158,7 +158,7 @@ static void end_clone_bio(struct bio *clone)
 	 * Do not use blk_end_request() here, because it may complete
 	 * the original request before the clone, and break the ordering.
 	 */
-	blk_update_request(tio->orig, 0, nr_bytes);
+	blk_update_request(tio->orig, BLK_STS_OK, nr_bytes);
 }
 
 static struct dm_rq_target_io *tio_from_request(struct request *rq)
@@ -216,7 +216,7 @@ static void rq_completed(struct mapped_device *md, int rw, bool run_queue)
  * Must be called without clone's queue lock held,
  * see end_clone_request() for more details.
  */
-static void dm_end_request(struct request *clone, int error)
+static void dm_end_request(struct request *clone, blk_status_t error)
 {
 	int rw = rq_data_dir(clone);
 	struct dm_rq_target_io *tio = clone->end_io_data;
@@ -285,7 +285,7 @@ static void dm_requeue_original_request(struct dm_rq_target_io *tio, bool delay_
 	rq_completed(md, rw, false);
 }
 
-static void dm_done(struct request *clone, int error, bool mapped)
+static void dm_done(struct request *clone, blk_status_t error, bool mapped)
 {
 	int r = DM_ENDIO_DONE;
 	struct dm_rq_target_io *tio = clone->end_io_data;
@@ -298,7 +298,7 @@ static void dm_done(struct request *clone, int error, bool mapped)
 			r = rq_end_io(tio->ti, clone, error, &tio->info);
 	}
 
-	if (unlikely(error == -EREMOTEIO)) {
+	if (unlikely(error == BLK_STS_TARGET)) {
 		if (req_op(clone) == REQ_OP_WRITE_SAME &&
 		    !clone->q->limits.max_write_same_sectors)
 			disable_write_same(tio->md);
@@ -358,7 +358,7 @@ static void dm_softirq_done(struct request *rq)
  * Complete the clone and the original request with the error status
  * through softirq context.
  */
-static void dm_complete_request(struct request *rq, int error)
+static void dm_complete_request(struct request *rq, blk_status_t error)
 {
 	struct dm_rq_target_io *tio = tio_from_request(rq);
 
@@ -375,7 +375,7 @@ static void dm_complete_request(struct request *rq, int error)
  * Target's rq_end_io() function isn't called.
  * This may be used when the target's map_rq() or clone_and_map_rq() functions fail.
  */
-static void dm_kill_unmapped_request(struct request *rq, int error)
+static void dm_kill_unmapped_request(struct request *rq, blk_status_t error)
 {
 	rq->rq_flags |= RQF_FAILED;
 	dm_complete_request(rq, error);
@@ -384,7 +384,7 @@ static void dm_kill_unmapped_request(struct request *rq, int error)
 /*
  * Called with the clone's queue lock held (in the case of .request_fn)
  */
-static void end_clone_request(struct request *clone, int error)
+static void end_clone_request(struct request *clone, blk_status_t error)
 {
 	struct dm_rq_target_io *tio = clone->end_io_data;
 
@@ -401,7 +401,7 @@ static void end_clone_request(struct request *clone, int error)
 
 static void dm_dispatch_clone_request(struct request *clone, struct request *rq)
 {
-	int r;
+	blk_status_t r;
 
 	if (blk_queue_io_stat(clone->q))
 		clone->rq_flags |= RQF_IO_STAT;
@@ -506,7 +506,7 @@ static int map_request(struct dm_rq_target_io *tio)
 		break;
 	case DM_MAPIO_KILL:
 		/* The target wants to complete the I/O */
-		dm_kill_unmapped_request(rq, -EIO);
+		dm_kill_unmapped_request(rq, BLK_STS_IOERR);
 		break;
 	default:
 		DMWARN("unimplemented target map return value: %d", r);
diff --git a/drivers/md/dm-rq.h b/drivers/md/dm-rq.h
index f0020d21b95f..9813922e4fe5 100644
--- a/drivers/md/dm-rq.h
+++ b/drivers/md/dm-rq.h
@@ -24,7 +24,7 @@ struct dm_rq_target_io {
 	struct dm_target *ti;
 	struct request *orig, *clone;
 	struct kthread_work work;
-	int error;
+	blk_status_t error;
 	union map_info info;
 	struct dm_stats_aux stats_aux;
 	unsigned long duration_jiffies;
diff --git a/drivers/memstick/core/ms_block.c b/drivers/memstick/core/ms_block.c
index 99e651c27fb7..22de7f5ed032 100644
--- a/drivers/memstick/core/ms_block.c
+++ b/drivers/memstick/core/ms_block.c
@@ -1921,12 +1921,13 @@ static void msb_io_work(struct work_struct *work)
 		spin_lock_irqsave(&msb->q_lock, flags);
 
 		if (len)
-			if (!__blk_end_request(msb->req, 0, len))
+			if (!__blk_end_request(msb->req, BLK_STS_OK, len))
 				msb->req = NULL;
 
 		if (error && msb->req) {
+			blk_status_t ret = errno_to_blk_status(error);
 			dbg_verbose("IO: ending one sector of the request with error");
-			if (!__blk_end_request(msb->req, error, msb->page_size))
+			if (!__blk_end_request(msb->req, ret, msb->page_size))
 				msb->req = NULL;
 		}
 
@@ -2014,7 +2015,7 @@ static void msb_submit_req(struct request_queue *q)
 		WARN_ON(!msb->io_queue_stopped);
 
 		while ((req = blk_fetch_request(q)) != NULL)
-			__blk_end_request_all(req, -ENODEV);
+			__blk_end_request_all(req, BLK_STS_IOERR);
 		return;
 	}
 
diff --git a/drivers/memstick/core/mspro_block.c b/drivers/memstick/core/mspro_block.c
index c00d8a266878..8897962781bb 100644
--- a/drivers/memstick/core/mspro_block.c
+++ b/drivers/memstick/core/mspro_block.c
@@ -709,7 +709,8 @@ try_again:
 					       msb->req_sg);
 
 		if (!msb->seg_count) {
-			chunk = __blk_end_request_cur(msb->block_req, -ENOMEM);
+			chunk = __blk_end_request_cur(msb->block_req,
+					BLK_STS_RESOURCE);
 			continue;
 		}
 
@@ -776,7 +777,8 @@ static int mspro_block_complete_req(struct memstick_dev *card, int error)
 		if (error && !t_len)
 			t_len = blk_rq_cur_bytes(msb->block_req);
 
-		chunk = __blk_end_request(msb->block_req, error, t_len);
+		chunk = __blk_end_request(msb->block_req,
+				errno_to_blk_status(error), t_len);
 
 		error = mspro_block_issue_req(card, chunk);
 
@@ -838,7 +840,7 @@ static void mspro_block_submit_req(struct request_queue *q)
 
 	if (msb->eject) {
 		while ((req = blk_fetch_request(q)) != NULL)
-			__blk_end_request_all(req, -ENODEV);
+			__blk_end_request_all(req, BLK_STS_IOERR);
 
 		return;
 	}
diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c
index 8273b078686d..6ff94a948a4b 100644
--- a/drivers/mmc/core/block.c
+++ b/drivers/mmc/core/block.c
@@ -1184,9 +1184,10 @@ static void mmc_blk_issue_discard_rq(struct mmc_queue *mq, struct request *req)
 	struct mmc_card *card = md->queue.card;
 	unsigned int from, nr, arg;
 	int err = 0, type = MMC_BLK_DISCARD;
+	blk_status_t status = BLK_STS_OK;
 
 	if (!mmc_can_erase(card)) {
-		err = -EOPNOTSUPP;
+		status = BLK_STS_NOTSUPP;
 		goto fail;
 	}
 
@@ -1212,10 +1213,12 @@ static void mmc_blk_issue_discard_rq(struct mmc_queue *mq, struct request *req)
 		if (!err)
 			err = mmc_erase(card, from, nr, arg);
 	} while (err == -EIO && !mmc_blk_reset(md, card->host, type));
-	if (!err)
+	if (err)
+		status = BLK_STS_IOERR;
+	else
 		mmc_blk_reset_success(md, type);
 fail:
-	blk_end_request(req, err, blk_rq_bytes(req));
+	blk_end_request(req, status, blk_rq_bytes(req));
 }
 
 static void mmc_blk_issue_secdiscard_rq(struct mmc_queue *mq,
@@ -1225,9 +1228,10 @@ static void mmc_blk_issue_secdiscard_rq(struct mmc_queue *mq,
 	struct mmc_card *card = md->queue.card;
 	unsigned int from, nr, arg;
 	int err = 0, type = MMC_BLK_SECDISCARD;
+	blk_status_t status = BLK_STS_OK;
 
 	if (!(mmc_can_secure_erase_trim(card))) {
-		err = -EOPNOTSUPP;
+		status = BLK_STS_NOTSUPP;
 		goto out;
 	}
 
@@ -1254,8 +1258,10 @@ retry:
 	err = mmc_erase(card, from, nr, arg);
 	if (err == -EIO)
 		goto out_retry;
-	if (err)
+	if (err) {
+		status = BLK_STS_IOERR;
 		goto out;
+	}
 
 	if (arg == MMC_SECURE_TRIM1_ARG) {
 		if (card->quirks & MMC_QUIRK_INAND_CMD38) {
@@ -1270,8 +1276,10 @@ retry:
 		err = mmc_erase(card, from, nr, MMC_SECURE_TRIM2_ARG);
 		if (err == -EIO)
 			goto out_retry;
-		if (err)
+		if (err) {
+			status = BLK_STS_IOERR;
 			goto out;
+		}
 	}
 
 out_retry:
@@ -1280,7 +1288,7 @@ out_retry:
 	if (!err)
 		mmc_blk_reset_success(md, type);
 out:
-	blk_end_request(req, err, blk_rq_bytes(req));
+	blk_end_request(req, status, blk_rq_bytes(req));
 }
 
 static void mmc_blk_issue_flush(struct mmc_queue *mq, struct request *req)
@@ -1290,10 +1298,7 @@ static void mmc_blk_issue_flush(struct mmc_queue *mq, struct request *req)
 	int ret = 0;
 
 	ret = mmc_flush_cache(card);
-	if (ret)
-		ret = -EIO;
-
-	blk_end_request_all(req, ret);
+	blk_end_request_all(req, ret ? BLK_STS_IOERR : BLK_STS_OK);
 }
 
 /*
@@ -1641,7 +1646,7 @@ static void mmc_blk_rw_cmd_abort(struct mmc_queue *mq, struct mmc_card *card,
 {
 	if (mmc_card_removed(card))
 		req->rq_flags |= RQF_QUIET;
-	while (blk_end_request(req, -EIO, blk_rq_cur_bytes(req)));
+	while (blk_end_request(req, BLK_STS_IOERR, blk_rq_cur_bytes(req)));
 	mmc_queue_req_free(mq, mqrq);
 }
 
@@ -1661,7 +1666,7 @@ static void mmc_blk_rw_try_restart(struct mmc_queue *mq, struct request *req,
 	 */
 	if (mmc_card_removed(mq->card)) {
 		req->rq_flags |= RQF_QUIET;
-		blk_end_request_all(req, -EIO);
+		blk_end_request_all(req, BLK_STS_IOERR);
 		mmc_queue_req_free(mq, mqrq);
 		return;
 	}
@@ -1743,7 +1748,7 @@ static void mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *new_req)
 			 */
 			mmc_blk_reset_success(md, type);
 
-			req_pending = blk_end_request(old_req, 0,
+			req_pending = blk_end_request(old_req, BLK_STS_OK,
 						      brq->data.bytes_xfered);
 			/*
 			 * If the blk_end_request function returns non-zero even
@@ -1811,7 +1816,7 @@ static void mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *new_req)
 			 * time, so we only reach here after trying to
 			 * read a single sector.
 			 */
-			req_pending = blk_end_request(old_req, -EIO,
+			req_pending = blk_end_request(old_req, BLK_STS_IOERR,
 						      brq->data.blksz);
 			if (!req_pending) {
 				mmc_queue_req_free(mq, mq_rq);
@@ -1860,7 +1865,7 @@ void mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req)
 	ret = mmc_blk_part_switch(card, md);
 	if (ret) {
 		if (req) {
-			blk_end_request_all(req, -EIO);
+			blk_end_request_all(req, BLK_STS_IOERR);
 		}
 		goto out;
 	}
diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c
index 5c37b6be3e7b..7f20298d892b 100644
--- a/drivers/mmc/core/queue.c
+++ b/drivers/mmc/core/queue.c
@@ -133,7 +133,7 @@ static void mmc_request_fn(struct request_queue *q)
 	if (!mq) {
 		while ((req = blk_fetch_request(q)) != NULL) {
 			req->rq_flags |= RQF_QUIET;
-			__blk_end_request_all(req, -EIO);
+			__blk_end_request_all(req, BLK_STS_IOERR);
 		}
 		return;
 	}
diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c
index 6b8d5cd7dbf6..91c17fba7659 100644
--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c
@@ -73,7 +73,7 @@ static void blktrans_dev_put(struct mtd_blktrans_dev *dev)
 }
 
 
-static int do_blktrans_request(struct mtd_blktrans_ops *tr,
+static blk_status_t do_blktrans_request(struct mtd_blktrans_ops *tr,
 			       struct mtd_blktrans_dev *dev,
 			       struct request *req)
 {
@@ -84,33 +84,37 @@ static int do_blktrans_request(struct mtd_blktrans_ops *tr,
 	nsect = blk_rq_cur_bytes(req) >> tr->blkshift;
 	buf = bio_data(req->bio);
 
-	if (req_op(req) == REQ_OP_FLUSH)
-		return tr->flush(dev);
+	if (req_op(req) == REQ_OP_FLUSH) {
+		if (tr->flush(dev))
+			return BLK_STS_IOERR;
+		return BLK_STS_OK;
+	}
 
 	if (blk_rq_pos(req) + blk_rq_cur_sectors(req) >
 	    get_capacity(req->rq_disk))
-		return -EIO;
+		return BLK_STS_IOERR;
 
 	switch (req_op(req)) {
 	case REQ_OP_DISCARD:
-		return tr->discard(dev, block, nsect);
+		if (tr->discard(dev, block, nsect))
+			return BLK_STS_IOERR;
+		return BLK_STS_OK;
 	case REQ_OP_READ:
 		for (; nsect > 0; nsect--, block++, buf += tr->blksize)
 			if (tr->readsect(dev, block, buf))
-				return -EIO;
+				return BLK_STS_IOERR;
 		rq_flush_dcache_pages(req);
-		return 0;
+		return BLK_STS_OK;
 	case REQ_OP_WRITE:
 		if (!tr->writesect)
-			return -EIO;
+			return BLK_STS_IOERR;
 
 		rq_flush_dcache_pages(req);
 		for (; nsect > 0; nsect--, block++, buf += tr->blksize)
 			if (tr->writesect(dev, block, buf))
-				return -EIO;
-		return 0;
+				return BLK_STS_IOERR;
 	default:
-		return -EIO;
+		return BLK_STS_IOERR;
 	}
 }
 
@@ -132,7 +136,7 @@ static void mtd_blktrans_work(struct work_struct *work)
 	spin_lock_irq(rq->queue_lock);
 
 	while (1) {
-		int res;
+		blk_status_t res;
 
 		dev->bg_stop = false;
 		if (!req && !(req = blk_fetch_request(rq))) {
@@ -178,7 +182,7 @@ static void mtd_blktrans_request(struct request_queue *rq)
 
 	if (!dev)
 		while ((req = blk_fetch_request(rq)) != NULL)
-			__blk_end_request_all(req, -ENODEV);
+			__blk_end_request_all(req, BLK_STS_IOERR);
 	else
 		queue_work(dev->wq, &dev->work);
 }
diff --git a/drivers/mtd/ubi/block.c b/drivers/mtd/ubi/block.c
index 5497e65439df..3ecdb39d1985 100644
--- a/drivers/mtd/ubi/block.c
+++ b/drivers/mtd/ubi/block.c
@@ -313,7 +313,7 @@ static void ubiblock_do_work(struct work_struct *work)
 	ret = ubiblock_read(pdu);
 	rq_flush_dcache_pages(req);
 
-	blk_mq_end_request(req, ret);
+	blk_mq_end_request(req, errno_to_blk_status(ret));
 }
 
 static int ubiblock_queue_rq(struct blk_mq_hw_ctx *hctx,
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index a60926410438..07e95c7d837a 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -70,29 +70,21 @@ static DEFINE_SPINLOCK(dev_list_lock);
 
 static struct class *nvme_class;
 
-static int nvme_error_status(struct request *req)
+static blk_status_t nvme_error_status(struct request *req)
 {
 	switch (nvme_req(req)->status & 0x7ff) {
 	case NVME_SC_SUCCESS:
-		return 0;
+		return BLK_STS_OK;
 	case NVME_SC_CAP_EXCEEDED:
-		return -ENOSPC;
-	default:
-		return -EIO;
-
-	/*
-	 * XXX: these errors are a nasty side-band protocol to
-	 * drivers/md/dm-mpath.c:noretry_error() that aren't documented
-	 * anywhere..
-	 */
-	case NVME_SC_CMD_SEQ_ERROR:
-		return -EILSEQ;
+		return BLK_STS_NOSPC;
 	case NVME_SC_ONCS_NOT_SUPPORTED:
-		return -EOPNOTSUPP;
+		return BLK_STS_NOTSUPP;
 	case NVME_SC_WRITE_FAULT:
 	case NVME_SC_READ_ERROR:
 	case NVME_SC_UNWRITTEN_BLOCK:
-		return -ENODATA;
+		return BLK_STS_MEDIUM;
+	default:
+		return BLK_STS_IOERR;
 	}
 }
 
@@ -555,15 +547,16 @@ int nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
 			result, timeout);
 }
 
-static void nvme_keep_alive_end_io(struct request *rq, int error)
+static void nvme_keep_alive_end_io(struct request *rq, blk_status_t status)
 {
 	struct nvme_ctrl *ctrl = rq->end_io_data;
 
 	blk_mq_free_request(rq);
 
-	if (error) {
+	if (status) {
 		dev_err(ctrl->device,
-			"failed nvme_keep_alive_end_io error=%d\n", error);
+			"failed nvme_keep_alive_end_io error=%d\n",
+				status);
 		return;
 	}
 
diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c
index f3885b5e56bd..2d7a2889866f 100644
--- a/drivers/nvme/host/lightnvm.c
+++ b/drivers/nvme/host/lightnvm.c
@@ -480,7 +480,7 @@ static inline void nvme_nvm_rqtocmd(struct nvm_rq *rqd, struct nvme_ns *ns,
 					rqd->bio->bi_iter.bi_sector));
 }
 
-static void nvme_nvm_end_io(struct request *rq, int error)
+static void nvme_nvm_end_io(struct request *rq, blk_status_t status)
 {
 	struct nvm_rq *rqd = rq->end_io_data;
 
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index d52701df7245..819898428763 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -706,7 +706,7 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
 	if (ns && ns->ms && !blk_integrity_rq(req)) {
 		if (!(ns->pi_type && ns->ms == 8) &&
 		    !blk_rq_is_passthrough(req)) {
-			blk_mq_end_request(req, -EFAULT);
+			blk_mq_end_request(req, BLK_STS_NOTSUPP);
 			return BLK_MQ_RQ_QUEUE_OK;
 		}
 	}
@@ -939,7 +939,7 @@ static int adapter_delete_sq(struct nvme_dev *dev, u16 sqid)
 	return adapter_delete_queue(dev, nvme_admin_delete_sq, sqid);
 }
 
-static void abort_endio(struct request *req, int error)
+static void abort_endio(struct request *req, blk_status_t error)
 {
 	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
 	struct nvme_queue *nvmeq = iod->nvmeq;
@@ -1586,7 +1586,7 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
 	return nvme_create_io_queues(dev);
 }
 
-static void nvme_del_queue_end(struct request *req, int error)
+static void nvme_del_queue_end(struct request *req, blk_status_t error)
 {
 	struct nvme_queue *nvmeq = req->end_io_data;
 
@@ -1594,7 +1594,7 @@ static void nvme_del_queue_end(struct request *req, int error)
 	complete(&nvmeq->dev->ioq_wait);
 }
 
-static void nvme_del_cq_end(struct request *req, int error)
+static void nvme_del_cq_end(struct request *req, blk_status_t error)
 {
 	struct nvme_queue *nvmeq = req->end_io_data;
 
diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index 6fb3fd5efc11..b7cbd5d2cdea 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -2672,7 +2672,7 @@ static void __dasd_process_request_queue(struct dasd_block *block)
 	 */
 	if (basedev->state < DASD_STATE_READY) {
 		while ((req = blk_fetch_request(block->request_queue)))
-			__blk_end_request_all(req, -EIO);
+			__blk_end_request_all(req, BLK_STS_IOERR);
 		return;
 	}
 
@@ -2692,7 +2692,7 @@ static void __dasd_process_request_queue(struct dasd_block *block)
 				      "Rejecting write request %p",
 				      req);
 			blk_start_request(req);
-			__blk_end_request_all(req, -EIO);
+			__blk_end_request_all(req, BLK_STS_IOERR);
 			continue;
 		}
 		if (test_bit(DASD_FLAG_ABORTALL, &basedev->flags) &&
@@ -2702,7 +2702,7 @@ static void __dasd_process_request_queue(struct dasd_block *block)
 				      "Rejecting failfast request %p",
 				      req);
 			blk_start_request(req);
-			__blk_end_request_all(req, -ETIMEDOUT);
+			__blk_end_request_all(req, BLK_STS_TIMEOUT);
 			continue;
 		}
 		cqr = basedev->discipline->build_cp(basedev, block, req);
@@ -2734,7 +2734,7 @@ static void __dasd_process_request_queue(struct dasd_block *block)
 				      "on request %p",
 				      PTR_ERR(cqr), req);
 			blk_start_request(req);
-			__blk_end_request_all(req, -EIO);
+			__blk_end_request_all(req, BLK_STS_IOERR);
 			continue;
 		}
 		/*
@@ -2755,21 +2755,29 @@ static void __dasd_cleanup_cqr(struct dasd_ccw_req *cqr)
 {
 	struct request *req;
 	int status;
-	int error = 0;
+	blk_status_t error = BLK_STS_OK;
 
 	req = (struct request *) cqr->callback_data;
 	dasd_profile_end(cqr->block, cqr, req);
+
 	status = cqr->block->base->discipline->free_cp(cqr, req);
 	if (status < 0)
-		error = status;
+		error = errno_to_blk_status(status);
 	else if (status == 0) {
-		if (cqr->intrc == -EPERM)
-			error = -EBADE;
-		else if (cqr->intrc == -ENOLINK ||
-			 cqr->intrc == -ETIMEDOUT)
-			error = cqr->intrc;
-		else
-			error = -EIO;
+		switch (cqr->intrc) {
+		case -EPERM:
+			error = BLK_STS_NEXUS;
+			break;
+		case -ENOLINK:
+			error = BLK_STS_TRANSPORT;
+			break;
+		case -ETIMEDOUT:
+			error = BLK_STS_TIMEOUT;
+			break;
+		default:
+			error = BLK_STS_IOERR;
+			break;
+		}
 	}
 	__blk_end_request_all(req, error);
 }
@@ -3190,7 +3198,7 @@ static void dasd_flush_request_queue(struct dasd_block *block)
 
 	spin_lock_irq(&block->request_queue_lock);
 	while ((req = blk_fetch_request(block->request_queue)))
-		__blk_end_request_all(req, -EIO);
+		__blk_end_request_all(req, BLK_STS_IOERR);
 	spin_unlock_irq(&block->request_queue_lock);
 }
 
diff --git a/drivers/s390/block/scm_blk.c b/drivers/s390/block/scm_blk.c
index 152de6817875..3c2c84b72877 100644
--- a/drivers/s390/block/scm_blk.c
+++ b/drivers/s390/block/scm_blk.c
@@ -231,7 +231,7 @@ static inline void scm_request_init(struct scm_blk_dev *bdev,
 	aob->request.data = (u64) aobrq;
 	scmrq->bdev = bdev;
 	scmrq->retries = 4;
-	scmrq->error = 0;
+	scmrq->error = BLK_STS_OK;
 	/* We don't use all msbs - place aidaws at the end of the aob page. */
 	scmrq->next_aidaw = (void *) &aob->msb[nr_requests_per_io];
 	scm_request_cluster_init(scmrq);
@@ -364,7 +364,7 @@ static void __scmrq_log_error(struct scm_request *scmrq)
 {
 	struct aob *aob = scmrq->aob;
 
-	if (scmrq->error == -ETIMEDOUT)
+	if (scmrq->error == BLK_STS_TIMEOUT)
 		SCM_LOG(1, "Request timeout");
 	else {
 		SCM_LOG(1, "Request error");
@@ -377,7 +377,7 @@ static void __scmrq_log_error(struct scm_request *scmrq)
 		       scmrq->error);
 }
 
-void scm_blk_irq(struct scm_device *scmdev, void *data, int error)
+void scm_blk_irq(struct scm_device *scmdev, void *data, blk_status_t error)
 {
 	struct scm_request *scmrq = data;
 	struct scm_blk_dev *bdev = scmrq->bdev;
@@ -397,7 +397,7 @@ static void scm_blk_handle_error(struct scm_request *scmrq)
 	struct scm_blk_dev *bdev = scmrq->bdev;
 	unsigned long flags;
 
-	if (scmrq->error != -EIO)
+	if (scmrq->error != BLK_STS_IOERR)
 		goto restart;
 
 	/* For -EIO the response block is valid. */
diff --git a/drivers/s390/block/scm_blk.h b/drivers/s390/block/scm_blk.h
index 09218cdc5129..cd598d1a4eae 100644
--- a/drivers/s390/block/scm_blk.h
+++ b/drivers/s390/block/scm_blk.h
@@ -35,7 +35,7 @@ struct scm_request {
 	struct aob *aob;
 	struct list_head list;
 	u8 retries;
-	int error;
+	blk_status_t error;
 #ifdef CONFIG_SCM_BLOCK_CLUSTER_WRITE
 	struct {
 		enum {CLUSTER_NONE, CLUSTER_READ, CLUSTER_WRITE} state;
@@ -50,7 +50,7 @@ struct scm_request {
 int scm_blk_dev_setup(struct scm_blk_dev *, struct scm_device *);
 void scm_blk_dev_cleanup(struct scm_blk_dev *);
 void scm_blk_set_available(struct scm_blk_dev *);
-void scm_blk_irq(struct scm_device *, void *, int);
+void scm_blk_irq(struct scm_device *, void *, blk_status_t);
 
 void scm_request_finish(struct scm_request *);
 void scm_request_requeue(struct scm_request *);
diff --git a/drivers/s390/cio/eadm_sch.c b/drivers/s390/cio/eadm_sch.c
index b3f44bc7f644..0f11f3bcac82 100644
--- a/drivers/s390/cio/eadm_sch.c
+++ b/drivers/s390/cio/eadm_sch.c
@@ -135,7 +135,7 @@ static void eadm_subchannel_irq(struct subchannel *sch)
 	struct eadm_private *private = get_eadm_private(sch);
 	struct eadm_scsw *scsw = &sch->schib.scsw.eadm;
 	struct irb *irb = this_cpu_ptr(&cio_irb);
-	int error = 0;
+	blk_status_t error = BLK_STS_OK;
 
 	EADM_LOG(6, "irq");
 	EADM_LOG_HEX(6, irb, sizeof(*irb));
@@ -144,10 +144,10 @@ static void eadm_subchannel_irq(struct subchannel *sch)
 
 	if ((scsw->stctl & (SCSW_STCTL_ALERT_STATUS | SCSW_STCTL_STATUS_PEND))
 	    && scsw->eswf == 1 && irb->esw.eadm.erw.r)
-		error = -EIO;
+		error = BLK_STS_IOERR;
 
 	if (scsw->fctl & SCSW_FCTL_CLEAR_FUNC)
-		error = -ETIMEDOUT;
+		error = BLK_STS_TIMEOUT;
 
 	eadm_subchannel_set_timeout(sch, 0);
 
diff --git a/drivers/s390/cio/scm.c b/drivers/s390/cio/scm.c
index 15268edc54ae..1fa53ecdc2aa 100644
--- a/drivers/s390/cio/scm.c
+++ b/drivers/s390/cio/scm.c
@@ -71,7 +71,7 @@ void scm_driver_unregister(struct scm_driver *scmdrv)
 }
 EXPORT_SYMBOL_GPL(scm_driver_unregister);
 
-void scm_irq_handler(struct aob *aob, int error)
+void scm_irq_handler(struct aob *aob, blk_status_t error)
 {
 	struct aob_rq_header *aobrq = (void *) aob->request.data;
 	struct scm_device *scmdev = aobrq->scmdev;
diff --git a/drivers/sbus/char/jsflash.c b/drivers/sbus/char/jsflash.c
index 62fed9dc893e..35a69949f92d 100644
--- a/drivers/sbus/char/jsflash.c
+++ b/drivers/sbus/char/jsflash.c
@@ -214,7 +214,7 @@ static void jsfd_request(void)
 		struct jsfd_part *jdp = req->rq_disk->private_data;
 		unsigned long offset = blk_rq_pos(req) << 9;
 		size_t len = blk_rq_cur_bytes(req);
-		int err = -EIO;
+		blk_status_t err = BLK_STS_IOERR;
 
 		if ((offset + len) > jdp->dsize)
 			goto end;
@@ -230,7 +230,7 @@ static void jsfd_request(void)
 		}
 
 		jsfd_read(bio_data(req->bio), jdp->dbase + offset, len);
-		err = 0;
+		err = BLK_STS_OK;
 	end:
 		if (!__blk_end_request_cur(req, err))
 			req = jsfd_next_request();
diff --git a/drivers/scsi/osd/osd_initiator.c b/drivers/scsi/osd/osd_initiator.c
index 14785177ce7b..1e69a43b279d 100644
--- a/drivers/scsi/osd/osd_initiator.c
+++ b/drivers/scsi/osd/osd_initiator.c
@@ -446,7 +446,7 @@ static void _put_request(struct request *rq)
 	 *       code paths.
 	 */
 	if (unlikely(rq->bio))
-		blk_end_request(rq, -ENOMEM, blk_rq_bytes(rq));
+		blk_end_request(rq, BLK_STS_IOERR, blk_rq_bytes(rq));
 	else
 		blk_put_request(rq);
 }
@@ -474,7 +474,7 @@ void osd_end_request(struct osd_request *or)
 EXPORT_SYMBOL(osd_end_request);
 
 static void _set_error_resid(struct osd_request *or, struct request *req,
-			     int error)
+			     blk_status_t error)
 {
 	or->async_error = error;
 	or->req_errors = scsi_req(req)->result;
@@ -489,17 +489,19 @@ static void _set_error_resid(struct osd_request *or, struct request *req,
 
 int osd_execute_request(struct osd_request *or)
 {
-	int error;
-
 	blk_execute_rq(or->request->q, NULL, or->request, 0);
-	error = scsi_req(or->request)->result ? -EIO : 0;
 
-	_set_error_resid(or, or->request, error);
-	return error;
+	if (scsi_req(or->request)->result) {
+		_set_error_resid(or, or->request, BLK_STS_IOERR);
+		return -EIO;
+	}
+
+	_set_error_resid(or, or->request, BLK_STS_OK);
+	return 0;
 }
 EXPORT_SYMBOL(osd_execute_request);
 
-static void osd_request_async_done(struct request *req, int error)
+static void osd_request_async_done(struct request *req, blk_status_t error)
 {
 	struct osd_request *or = req->end_io_data;
 
@@ -1914,7 +1916,7 @@ analyze:
 		/* scsi sense is Empty, the request was never issued to target
 		 * linux return code might tell us what happened.
 		 */
-		if (or->async_error == -ENOMEM)
+		if (or->async_error == BLK_STS_RESOURCE)
 			osi->osd_err_pri = OSD_ERR_PRI_RESOURCE;
 		else
 			osi->osd_err_pri = OSD_ERR_PRI_UNREACHABLE;
diff --git a/drivers/scsi/osst.c b/drivers/scsi/osst.c
index 67cbed92f07d..d54689c9216e 100644
--- a/drivers/scsi/osst.c
+++ b/drivers/scsi/osst.c
@@ -320,7 +320,7 @@ static int osst_chk_result(struct osst_tape * STp, struct osst_request * SRpnt)
 
 
 /* Wakeup from interrupt */
-static void osst_end_async(struct request *req, int update)
+static void osst_end_async(struct request *req, blk_status_t status)
 {
 	struct scsi_request *rq = scsi_req(req);
 	struct osst_request *SRpnt = req->end_io_data;
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index ecc07dab893d..44904f41924c 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -1874,7 +1874,7 @@ int scsi_decide_disposition(struct scsi_cmnd *scmd)
 	}
 }
 
-static void eh_lock_door_done(struct request *req, int uptodate)
+static void eh_lock_door_done(struct request *req, blk_status_t status)
 {
 	__blk_put_request(req->q, req);
 }
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 884aaa84c2dd..67a67191520f 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -635,7 +635,7 @@ static void scsi_release_bidi_buffers(struct scsi_cmnd *cmd)
 	cmd->request->next_rq->special = NULL;
 }
 
-static bool scsi_end_request(struct request *req, int error,
+static bool scsi_end_request(struct request *req, blk_status_t error,
 		unsigned int bytes, unsigned int bidi_bytes)
 {
 	struct scsi_cmnd *cmd = req->special;
@@ -694,45 +694,28 @@ static bool scsi_end_request(struct request *req, int error,
  * @cmd:	SCSI command (unused)
  * @result:	scsi error code
  *
- * Translate SCSI error code into standard UNIX errno.
- * Return values:
- * -ENOLINK	temporary transport failure
- * -EREMOTEIO	permanent target failure, do not retry
- * -EBADE	permanent nexus failure, retry on other path
- * -ENOSPC	No write space available
- * -ENODATA	Medium error
- * -EIO		unspecified I/O error
+ * Translate SCSI error code into block errors.
  */
-static int __scsi_error_from_host_byte(struct scsi_cmnd *cmd, int result)
+static blk_status_t __scsi_error_from_host_byte(struct scsi_cmnd *cmd,
+		int result)
 {
-	int error = 0;
-
-	switch(host_byte(result)) {
+	switch (host_byte(result)) {
 	case DID_TRANSPORT_FAILFAST:
-		error = -ENOLINK;
-		break;
+		return BLK_STS_TRANSPORT;
 	case DID_TARGET_FAILURE:
 		set_host_byte(cmd, DID_OK);
-		error = -EREMOTEIO;
-		break;
+		return BLK_STS_TARGET;
 	case DID_NEXUS_FAILURE:
-		set_host_byte(cmd, DID_OK);
-		error = -EBADE;
-		break;
+		return BLK_STS_NEXUS;
 	case DID_ALLOC_FAILURE:
 		set_host_byte(cmd, DID_OK);
-		error = -ENOSPC;
-		break;
+		return BLK_STS_NOSPC;
 	case DID_MEDIUM_ERROR:
 		set_host_byte(cmd, DID_OK);
-		error = -ENODATA;
-		break;
+		return BLK_STS_MEDIUM;
 	default:
-		error = -EIO;
-		break;
+		return BLK_STS_IOERR;
 	}
-
-	return error;
 }
 
 /*
@@ -769,7 +752,7 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
 	int result = cmd->result;
 	struct request_queue *q = cmd->device->request_queue;
 	struct request *req = cmd->request;
-	int error = 0;
+	blk_status_t error = BLK_STS_OK;
 	struct scsi_sense_hdr sshdr;
 	bool sense_valid = false;
 	int sense_deferred = 0, level = 0;
@@ -808,7 +791,7 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
 			 * both sides at once.
 			 */
 			scsi_req(req->next_rq)->resid_len = scsi_in(cmd)->resid;
-			if (scsi_end_request(req, 0, blk_rq_bytes(req),
+			if (scsi_end_request(req, BLK_STS_OK, blk_rq_bytes(req),
 					blk_rq_bytes(req->next_rq)))
 				BUG();
 			return;
@@ -850,7 +833,7 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
 			scsi_print_sense(cmd);
 		result = 0;
 		/* for passthrough error may be set */
-		error = 0;
+		error = BLK_STS_OK;
 	}
 
 	/*
@@ -922,18 +905,18 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
 				action = ACTION_REPREP;
 			} else if (sshdr.asc == 0x10) /* DIX */ {
 				action = ACTION_FAIL;
-				error = -EILSEQ;
+				error = BLK_STS_PROTECTION;
 			/* INVALID COMMAND OPCODE or INVALID FIELD IN CDB */
 			} else if (sshdr.asc == 0x20 || sshdr.asc == 0x24) {
 				action = ACTION_FAIL;
-				error = -EREMOTEIO;
+				error = BLK_STS_TARGET;
 			} else
 				action = ACTION_FAIL;
 			break;
 		case ABORTED_COMMAND:
 			action = ACTION_FAIL;
 			if (sshdr.asc == 0x10) /* DIF */
-				error = -EILSEQ;
+				error = BLK_STS_PROTECTION;
 			break;
 		case NOT_READY:
 			/* If the device is in the process of becoming
diff --git a/drivers/scsi/scsi_transport_sas.c b/drivers/scsi/scsi_transport_sas.c
index d16414bfe2ef..cc970c811bcb 100644
--- a/drivers/scsi/scsi_transport_sas.c
+++ b/drivers/scsi/scsi_transport_sas.c
@@ -172,7 +172,7 @@ static void sas_smp_request(struct request_queue *q, struct Scsi_Host *shost,
 			    struct sas_rphy *rphy)
 {
 	struct request *req;
-	int ret;
+	blk_status_t ret;
 	int (*handler)(struct Scsi_Host *, struct sas_rphy *, struct request *);
 
 	while ((req = blk_fetch_request(q)) != NULL) {
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index 82c33a6edbea..f3387c6089c5 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -177,7 +177,7 @@ typedef struct sg_device { /* holds the state of each scsi generic device */
 } Sg_device;
 
 /* tasklet or soft irq callback */
-static void sg_rq_end_io(struct request *rq, int uptodate);
+static void sg_rq_end_io(struct request *rq, blk_status_t status);
 static int sg_start_req(Sg_request *srp, unsigned char *cmd);
 static int sg_finish_rem_req(Sg_request * srp);
 static int sg_build_indirect(Sg_scatter_hold * schp, Sg_fd * sfp, int buff_size);
@@ -808,7 +808,7 @@ sg_common_write(Sg_fd * sfp, Sg_request * srp,
 	if (atomic_read(&sdp->detaching)) {
 		if (srp->bio) {
 			scsi_req_free_cmd(scsi_req(srp->rq));
-			blk_end_request_all(srp->rq, -EIO);
+			blk_end_request_all(srp->rq, BLK_STS_IOERR);
 			srp->rq = NULL;
 		}
 
@@ -1300,7 +1300,7 @@ sg_rq_end_io_usercontext(struct work_struct *work)
  * level when a command is completed (or has failed).
  */
 static void
-sg_rq_end_io(struct request *rq, int uptodate)
+sg_rq_end_io(struct request *rq, blk_status_t status)
 {
 	struct sg_request *srp = rq->end_io_data;
 	struct scsi_request *req = scsi_req(rq);
diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c
index 1ea34d6f5437..6b1c4ac54e66 100644
--- a/drivers/scsi/st.c
+++ b/drivers/scsi/st.c
@@ -511,7 +511,7 @@ static void st_do_stats(struct scsi_tape *STp, struct request *req)
 	atomic64_dec(&STp->stats->in_flight);
 }
 
-static void st_scsi_execute_end(struct request *req, int uptodate)
+static void st_scsi_execute_end(struct request *req, blk_status_t status)
 {
 	struct st_request *SRpnt = req->end_io_data;
 	struct scsi_request *rq = scsi_req(req);
diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c
index 3e4abb13f8ea..323ab47645d0 100644
--- a/drivers/target/target_core_pscsi.c
+++ b/drivers/target/target_core_pscsi.c
@@ -55,7 +55,7 @@ static inline struct pscsi_dev_virt *PSCSI_DEV(struct se_device *dev)
 }
 
 static sense_reason_t pscsi_execute_cmd(struct se_cmd *cmd);
-static void pscsi_req_done(struct request *, int);
+static void pscsi_req_done(struct request *, blk_status_t);
 
 /*	pscsi_attach_hba():
  *
@@ -1045,7 +1045,7 @@ static sector_t pscsi_get_blocks(struct se_device *dev)
 	return 0;
 }
 
-static void pscsi_req_done(struct request *req, int uptodate)
+static void pscsi_req_done(struct request *req, blk_status_t status)
 {
 	struct se_cmd *cmd = req->end_io_data;
 	struct pscsi_plugin_task *pt = cmd->priv;
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index fcd641032f8d..0cf6735046d3 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -230,8 +230,8 @@ static inline u16 blk_mq_unique_tag_to_tag(u32 unique_tag)
 
 int blk_mq_request_started(struct request *rq);
 void blk_mq_start_request(struct request *rq);
-void blk_mq_end_request(struct request *rq, int error);
-void __blk_mq_end_request(struct request *rq, int error);
+void blk_mq_end_request(struct request *rq, blk_status_t error);
+void __blk_mq_end_request(struct request *rq, blk_status_t error);
 
 void blk_mq_requeue_request(struct request *rq, bool kick_requeue_list);
 void blk_mq_add_to_requeue_list(struct request *rq, bool at_head,
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 61339bc44400..59378939a8cd 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -17,6 +17,22 @@ struct io_context;
 struct cgroup_subsys_state;
 typedef void (bio_end_io_t) (struct bio *);
 
+/*
+ * Block error status values.  See block/blk-core:blk_errors for the details.
+ */
+typedef u8 __bitwise blk_status_t;
+#define	BLK_STS_OK 0
+#define BLK_STS_NOTSUPP		((__force blk_status_t)1)
+#define BLK_STS_TIMEOUT		((__force blk_status_t)2)
+#define BLK_STS_NOSPC		((__force blk_status_t)3)
+#define BLK_STS_TRANSPORT	((__force blk_status_t)4)
+#define BLK_STS_TARGET		((__force blk_status_t)5)
+#define BLK_STS_NEXUS		((__force blk_status_t)6)
+#define BLK_STS_MEDIUM		((__force blk_status_t)7)
+#define BLK_STS_PROTECTION	((__force blk_status_t)8)
+#define BLK_STS_RESOURCE	((__force blk_status_t)9)
+#define BLK_STS_IOERR		((__force blk_status_t)10)
+
 struct blk_issue_stat {
 	u64 stat;
 };
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 019f18c65098..2a8871638453 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -55,7 +55,7 @@ struct blk_stat_callback;
  */
 #define BLKCG_MAX_POLS		3
 
-typedef void (rq_end_io_fn)(struct request *, int);
+typedef void (rq_end_io_fn)(struct request *, blk_status_t);
 
 #define BLK_RL_SYNCFULL		(1U << 0)
 #define BLK_RL_ASYNCFULL	(1U << 1)
@@ -940,7 +940,7 @@ extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
 			     int (*bio_ctr)(struct bio *, struct bio *, void *),
 			     void *data);
 extern void blk_rq_unprep_clone(struct request *rq);
-extern int blk_insert_cloned_request(struct request_queue *q,
+extern blk_status_t blk_insert_cloned_request(struct request_queue *q,
 				     struct request *rq);
 extern int blk_rq_append_bio(struct request *rq, struct bio *bio);
 extern void blk_delay_queue(struct request_queue *, unsigned long);
@@ -980,6 +980,9 @@ extern void blk_execute_rq(struct request_queue *, struct gendisk *,
 extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *,
 				  struct request *, int, rq_end_io_fn *);
 
+int blk_status_to_errno(blk_status_t status);
+blk_status_t errno_to_blk_status(int errno);
+
 bool blk_mq_poll(struct request_queue *q, blk_qc_t cookie);
 
 static inline struct request_queue *bdev_get_queue(struct block_device *bdev)
@@ -1112,16 +1115,16 @@ extern struct request *blk_fetch_request(struct request_queue *q);
  * blk_end_request() for parts of the original function.
  * This prevents code duplication in drivers.
  */
-extern bool blk_update_request(struct request *rq, int error,
+extern bool blk_update_request(struct request *rq, blk_status_t error,
 			       unsigned int nr_bytes);
-extern void blk_finish_request(struct request *rq, int error);
-extern bool blk_end_request(struct request *rq, int error,
+extern void blk_finish_request(struct request *rq, blk_status_t error);
+extern bool blk_end_request(struct request *rq, blk_status_t error,
 			    unsigned int nr_bytes);
-extern void blk_end_request_all(struct request *rq, int error);
-extern bool __blk_end_request(struct request *rq, int error,
+extern void blk_end_request_all(struct request *rq, blk_status_t error);
+extern bool __blk_end_request(struct request *rq, blk_status_t error,
 			      unsigned int nr_bytes);
-extern void __blk_end_request_all(struct request *rq, int error);
-extern bool __blk_end_request_cur(struct request *rq, int error);
+extern void __blk_end_request_all(struct request *rq, blk_status_t error);
+extern bool __blk_end_request_cur(struct request *rq, blk_status_t error);
 
 extern void blk_complete_request(struct request *);
 extern void __blk_complete_request(struct request *);
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index dec227acc13b..5de5c53251ec 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -74,7 +74,7 @@ typedef void (*dm_release_clone_request_fn) (struct request *clone);
 typedef int (*dm_endio_fn) (struct dm_target *ti,
 			    struct bio *bio, int *error);
 typedef int (*dm_request_endio_fn) (struct dm_target *ti,
-				    struct request *clone, int error,
+				    struct request *clone, blk_status_t error,
 				    union map_info *map_context);
 
 typedef void (*dm_presuspend_fn) (struct dm_target *ti);
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 6980ca322074..dc152e4b7f73 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -671,7 +671,7 @@ struct ide_port_ops {
 	void	(*init_dev)(ide_drive_t *);
 	void	(*set_pio_mode)(struct hwif_s *, ide_drive_t *);
 	void	(*set_dma_mode)(struct hwif_s *, ide_drive_t *);
-	int	(*reset_poll)(ide_drive_t *);
+	blk_status_t (*reset_poll)(ide_drive_t *);
 	void	(*pre_reset)(ide_drive_t *);
 	void	(*resetproc)(ide_drive_t *);
 	void	(*maskproc)(ide_drive_t *, int);
@@ -1092,7 +1092,7 @@ int generic_ide_ioctl(ide_drive_t *, struct block_device *, unsigned, unsigned l
 extern int ide_vlb_clk;
 extern int ide_pci_clk;
 
-int ide_end_rq(ide_drive_t *, struct request *, int, unsigned int);
+int ide_end_rq(ide_drive_t *, struct request *, blk_status_t, unsigned int);
 void ide_kill_rq(ide_drive_t *, struct request *);
 
 void __ide_set_handler(ide_drive_t *, ide_handler_t *, unsigned int);
@@ -1123,7 +1123,7 @@ extern int ide_devset_execute(ide_drive_t *drive,
 			      const struct ide_devset *setting, int arg);
 
 void ide_complete_cmd(ide_drive_t *, struct ide_cmd *, u8, u8);
-int ide_complete_rq(ide_drive_t *, int, unsigned int);
+int ide_complete_rq(ide_drive_t *, blk_status_t, unsigned int);
 
 void ide_tf_readback(ide_drive_t *drive, struct ide_cmd *cmd);
 void ide_tf_dump(const char *, struct ide_cmd *);
diff --git a/include/scsi/osd_initiator.h b/include/scsi/osd_initiator.h
index a09cca829082..a29d3086eb56 100644
--- a/include/scsi/osd_initiator.h
+++ b/include/scsi/osd_initiator.h
@@ -157,7 +157,7 @@ struct osd_request {
 
 	osd_req_done_fn *async_done;
 	void *async_private;
-	int async_error;
+	blk_status_t async_error;
 	int req_errors;
 };
 
-- 
cgit v1.2.3


From fc17b6534eb8395f0b3133eb31d87deec32c642b Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sat, 3 Jun 2017 09:38:05 +0200
Subject: blk-mq: switch ->queue_rq return value to blk_status_t

Use the same values for use for request completion errors as the return
value from ->queue_rq.  BLK_STS_RESOURCE is special cased to cause
a requeue, and all the others are completed as-is.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq.c                    | 37 ++++++++++++++++------------------
 drivers/block/loop.c              |  6 +++---
 drivers/block/mtip32xx/mtip32xx.c | 17 ++++++++--------
 drivers/block/nbd.c               | 12 ++++-------
 drivers/block/null_blk.c          |  4 ++--
 drivers/block/rbd.c               |  4 ++--
 drivers/block/virtio_blk.c        | 10 +++++-----
 drivers/block/xen-blkfront.c      |  8 ++++----
 drivers/md/dm-rq.c                |  8 ++++----
 drivers/mtd/ubi/block.c           |  6 +++---
 drivers/nvme/host/core.c          | 14 ++++++-------
 drivers/nvme/host/fc.c            | 23 +++++++++++----------
 drivers/nvme/host/nvme.h          |  2 +-
 drivers/nvme/host/pci.c           | 42 +++++++++++++++++++--------------------
 drivers/nvme/host/rdma.c          | 26 +++++++++++++-----------
 drivers/nvme/target/loop.c        | 17 ++++++++--------
 drivers/scsi/scsi_lib.c           | 30 ++++++++++++++--------------
 include/linux/blk-mq.h            |  7 ++-----
 18 files changed, 131 insertions(+), 142 deletions(-)

(limited to 'block')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index adcc1c0dce6e..7af78b1e9db9 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -924,7 +924,7 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list)
 {
 	struct blk_mq_hw_ctx *hctx;
 	struct request *rq;
-	int errors, queued, ret = BLK_MQ_RQ_QUEUE_OK;
+	int errors, queued;
 
 	if (list_empty(list))
 		return false;
@@ -935,6 +935,7 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list)
 	errors = queued = 0;
 	do {
 		struct blk_mq_queue_data bd;
+		blk_status_t ret;
 
 		rq = list_first_entry(list, struct request, queuelist);
 		if (!blk_mq_get_driver_tag(rq, &hctx, false)) {
@@ -975,25 +976,20 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list)
 		}
 
 		ret = q->mq_ops->queue_rq(hctx, &bd);
-		switch (ret) {
-		case BLK_MQ_RQ_QUEUE_OK:
-			queued++;
-			break;
-		case BLK_MQ_RQ_QUEUE_BUSY:
+		if (ret == BLK_STS_RESOURCE) {
 			blk_mq_put_driver_tag_hctx(hctx, rq);
 			list_add(&rq->queuelist, list);
 			__blk_mq_requeue_request(rq);
 			break;
-		default:
-			pr_err("blk-mq: bad return on queue: %d\n", ret);
-		case BLK_MQ_RQ_QUEUE_ERROR:
+		}
+
+		if (unlikely(ret != BLK_STS_OK)) {
 			errors++;
 			blk_mq_end_request(rq, BLK_STS_IOERR);
-			break;
+			continue;
 		}
 
-		if (ret == BLK_MQ_RQ_QUEUE_BUSY)
-			break;
+		queued++;
 	} while (!list_empty(list));
 
 	hctx->dispatched[queued_to_index(queued)]++;
@@ -1031,7 +1027,7 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list)
 		 * - blk_mq_run_hw_queue() checks whether or not a queue has
 		 *   been stopped before rerunning a queue.
 		 * - Some but not all block drivers stop a queue before
-		 *   returning BLK_MQ_RQ_QUEUE_BUSY. Two exceptions are scsi-mq
+		 *   returning BLK_STS_RESOURCE. Two exceptions are scsi-mq
 		 *   and dm-rq.
 		 */
 		if (!blk_mq_sched_needs_restart(hctx) &&
@@ -1410,7 +1406,7 @@ static void __blk_mq_try_issue_directly(struct request *rq, blk_qc_t *cookie,
 	};
 	struct blk_mq_hw_ctx *hctx;
 	blk_qc_t new_cookie;
-	int ret;
+	blk_status_t ret;
 
 	if (q->elevator)
 		goto insert;
@@ -1426,18 +1422,19 @@ static void __blk_mq_try_issue_directly(struct request *rq, blk_qc_t *cookie,
 	 * would have done
 	 */
 	ret = q->mq_ops->queue_rq(hctx, &bd);
-	if (ret == BLK_MQ_RQ_QUEUE_OK) {
+	switch (ret) {
+	case BLK_STS_OK:
 		*cookie = new_cookie;
 		return;
-	}
-
-	if (ret == BLK_MQ_RQ_QUEUE_ERROR) {
+	case BLK_STS_RESOURCE:
+		__blk_mq_requeue_request(rq);
+		goto insert;
+	default:
 		*cookie = BLK_QC_T_NONE;
-		blk_mq_end_request(rq, BLK_STS_IOERR);
+		blk_mq_end_request(rq, ret);
 		return;
 	}
 
-	__blk_mq_requeue_request(rq);
 insert:
 	blk_mq_sched_insert_request(rq, false, true, false, may_sleep);
 }
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 4caf6338c012..70fd7e0de0fa 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1674,7 +1674,7 @@ int loop_unregister_transfer(int number)
 EXPORT_SYMBOL(loop_register_transfer);
 EXPORT_SYMBOL(loop_unregister_transfer);
 
-static int loop_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t loop_queue_rq(struct blk_mq_hw_ctx *hctx,
 		const struct blk_mq_queue_data *bd)
 {
 	struct loop_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
@@ -1683,7 +1683,7 @@ static int loop_queue_rq(struct blk_mq_hw_ctx *hctx,
 	blk_mq_start_request(bd->rq);
 
 	if (lo->lo_state != Lo_bound)
-		return BLK_MQ_RQ_QUEUE_ERROR;
+		return BLK_STS_IOERR;
 
 	switch (req_op(cmd->rq)) {
 	case REQ_OP_FLUSH:
@@ -1698,7 +1698,7 @@ static int loop_queue_rq(struct blk_mq_hw_ctx *hctx,
 
 	kthread_queue_work(&lo->worker, &cmd->work);
 
-	return BLK_MQ_RQ_QUEUE_OK;
+	return BLK_STS_OK;
 }
 
 static void loop_handle_cmd(struct loop_cmd *cmd)
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index ee6f66bb50c7..d8618a71da74 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -3633,8 +3633,8 @@ static bool mtip_check_unal_depth(struct blk_mq_hw_ctx *hctx,
 	return false;
 }
 
-static int mtip_issue_reserved_cmd(struct blk_mq_hw_ctx *hctx,
-				   struct request *rq)
+static blk_status_t mtip_issue_reserved_cmd(struct blk_mq_hw_ctx *hctx,
+		struct request *rq)
 {
 	struct driver_data *dd = hctx->queue->queuedata;
 	struct mtip_int_cmd *icmd = rq->special;
@@ -3642,7 +3642,7 @@ static int mtip_issue_reserved_cmd(struct blk_mq_hw_ctx *hctx,
 	struct mtip_cmd_sg *command_sg;
 
 	if (mtip_commands_active(dd->port))
-		return BLK_MQ_RQ_QUEUE_BUSY;
+		return BLK_STS_RESOURCE;
 
 	/* Populate the SG list */
 	cmd->command_header->opts =
@@ -3666,10 +3666,10 @@ static int mtip_issue_reserved_cmd(struct blk_mq_hw_ctx *hctx,
 
 	blk_mq_start_request(rq);
 	mtip_issue_non_ncq_command(dd->port, rq->tag);
-	return BLK_MQ_RQ_QUEUE_OK;
+	return 0;
 }
 
-static int mtip_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t mtip_queue_rq(struct blk_mq_hw_ctx *hctx,
 			 const struct blk_mq_queue_data *bd)
 {
 	struct request *rq = bd->rq;
@@ -3681,15 +3681,14 @@ static int mtip_queue_rq(struct blk_mq_hw_ctx *hctx,
 		return mtip_issue_reserved_cmd(hctx, rq);
 
 	if (unlikely(mtip_check_unal_depth(hctx, rq)))
-		return BLK_MQ_RQ_QUEUE_BUSY;
+		return BLK_STS_RESOURCE;
 
 	blk_mq_start_request(rq);
 
 	ret = mtip_submit_request(hctx, rq);
 	if (likely(!ret))
-		return BLK_MQ_RQ_QUEUE_OK;
-
-	return BLK_MQ_RQ_QUEUE_ERROR;
+		return BLK_STS_OK;
+	return BLK_STS_IOERR;
 }
 
 static void mtip_free_cmd(struct blk_mq_tag_set *set, struct request *rq,
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 978d2d2d08d6..36839dc45472 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -469,7 +469,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
 				nsock->pending = req;
 				nsock->sent = sent;
 			}
-			return BLK_MQ_RQ_QUEUE_BUSY;
+			return BLK_STS_RESOURCE;
 		}
 		dev_err_ratelimited(disk_to_dev(nbd->disk),
 			"Send control failed (result %d)\n", result);
@@ -510,7 +510,7 @@ send_pages:
 					 */
 					nsock->pending = req;
 					nsock->sent = sent;
-					return BLK_MQ_RQ_QUEUE_BUSY;
+					return BLK_STS_RESOURCE;
 				}
 				dev_err(disk_to_dev(nbd->disk),
 					"Send data failed (result %d)\n",
@@ -798,7 +798,7 @@ out:
 	return ret;
 }
 
-static int nbd_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t nbd_queue_rq(struct blk_mq_hw_ctx *hctx,
 			const struct blk_mq_queue_data *bd)
 {
 	struct nbd_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
@@ -822,13 +822,9 @@ static int nbd_queue_rq(struct blk_mq_hw_ctx *hctx,
 	 * appropriate.
 	 */
 	ret = nbd_handle_cmd(cmd, hctx->queue_num);
-	if (ret < 0)
-		ret = BLK_MQ_RQ_QUEUE_ERROR;
-	if (!ret)
-		ret = BLK_MQ_RQ_QUEUE_OK;
 	complete(&cmd->send_complete);
 
-	return ret;
+	return ret < 0 ? BLK_STS_IOERR : BLK_STS_OK;
 }
 
 static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg,
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index e6b81d370882..586dfff5d53f 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -356,7 +356,7 @@ static void null_request_fn(struct request_queue *q)
 	}
 }
 
-static int null_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx,
 			 const struct blk_mq_queue_data *bd)
 {
 	struct nullb_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
@@ -373,7 +373,7 @@ static int null_queue_rq(struct blk_mq_hw_ctx *hctx,
 	blk_mq_start_request(bd->rq);
 
 	null_handle_cmd(cmd);
-	return BLK_MQ_RQ_QUEUE_OK;
+	return BLK_STS_OK;
 }
 
 static void null_init_queue(struct nullb *nullb, struct nullb_queue *nq)
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 3e8b43d792c2..74a6791b15c8 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -4154,14 +4154,14 @@ err:
 	blk_mq_end_request(rq, errno_to_blk_status(result));
 }
 
-static int rbd_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t rbd_queue_rq(struct blk_mq_hw_ctx *hctx,
 		const struct blk_mq_queue_data *bd)
 {
 	struct request *rq = bd->rq;
 	struct work_struct *work = blk_mq_rq_to_pdu(rq);
 
 	queue_work(rbd_wq, work);
-	return BLK_MQ_RQ_QUEUE_OK;
+	return BLK_STS_OK;
 }
 
 static void rbd_free_disk(struct rbd_device *rbd_dev)
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 205b74d70efc..e59bd4549a8a 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -214,7 +214,7 @@ static void virtblk_done(struct virtqueue *vq)
 	spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);
 }
 
-static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
 			   const struct blk_mq_queue_data *bd)
 {
 	struct virtio_blk *vblk = hctx->queue->queuedata;
@@ -246,7 +246,7 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
 		break;
 	default:
 		WARN_ON_ONCE(1);
-		return BLK_MQ_RQ_QUEUE_ERROR;
+		return BLK_STS_IOERR;
 	}
 
 	vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev, type);
@@ -276,8 +276,8 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
 		/* Out of mem doesn't actually happen, since we fall back
 		 * to direct descriptors */
 		if (err == -ENOMEM || err == -ENOSPC)
-			return BLK_MQ_RQ_QUEUE_BUSY;
-		return BLK_MQ_RQ_QUEUE_ERROR;
+			return BLK_STS_RESOURCE;
+		return BLK_STS_IOERR;
 	}
 
 	if (bd->last && virtqueue_kick_prepare(vblk->vqs[qid].vq))
@@ -286,7 +286,7 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
 
 	if (notify)
 		virtqueue_notify(vblk->vqs[qid].vq);
-	return BLK_MQ_RQ_QUEUE_OK;
+	return BLK_STS_OK;
 }
 
 /* return id (s/n) string for *disk to *id_str
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index aedc3c759273..2f468cf86dcf 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -881,7 +881,7 @@ static inline bool blkif_request_flush_invalid(struct request *req,
 		 !info->feature_fua));
 }
 
-static int blkif_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t blkif_queue_rq(struct blk_mq_hw_ctx *hctx,
 			  const struct blk_mq_queue_data *qd)
 {
 	unsigned long flags;
@@ -904,16 +904,16 @@ static int blkif_queue_rq(struct blk_mq_hw_ctx *hctx,
 
 	flush_requests(rinfo);
 	spin_unlock_irqrestore(&rinfo->ring_lock, flags);
-	return BLK_MQ_RQ_QUEUE_OK;
+	return BLK_STS_OK;
 
 out_err:
 	spin_unlock_irqrestore(&rinfo->ring_lock, flags);
-	return BLK_MQ_RQ_QUEUE_ERROR;
+	return BLK_STS_IOERR;
 
 out_busy:
 	spin_unlock_irqrestore(&rinfo->ring_lock, flags);
 	blk_mq_stop_hw_queue(hctx);
-	return BLK_MQ_RQ_QUEUE_BUSY;
+	return BLK_STS_RESOURCE;
 }
 
 static void blkif_complete_rq(struct request *rq)
diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
index bee334389173..63402f8a38de 100644
--- a/drivers/md/dm-rq.c
+++ b/drivers/md/dm-rq.c
@@ -727,7 +727,7 @@ static int dm_mq_init_request(struct blk_mq_tag_set *set, struct request *rq,
 	return __dm_rq_init_rq(set->driver_data, rq);
 }
 
-static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
 			  const struct blk_mq_queue_data *bd)
 {
 	struct request *rq = bd->rq;
@@ -744,7 +744,7 @@ static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
 	}
 
 	if (ti->type->busy && ti->type->busy(ti))
-		return BLK_MQ_RQ_QUEUE_BUSY;
+		return BLK_STS_RESOURCE;
 
 	dm_start_request(md, rq);
 
@@ -762,10 +762,10 @@ static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
 		rq_end_stats(md, rq);
 		rq_completed(md, rq_data_dir(rq), false);
 		blk_mq_delay_run_hw_queue(hctx, 100/*ms*/);
-		return BLK_MQ_RQ_QUEUE_BUSY;
+		return BLK_STS_RESOURCE;
 	}
 
-	return BLK_MQ_RQ_QUEUE_OK;
+	return BLK_STS_OK;
 }
 
 static const struct blk_mq_ops dm_mq_ops = {
diff --git a/drivers/mtd/ubi/block.c b/drivers/mtd/ubi/block.c
index 3ecdb39d1985..c3963f880448 100644
--- a/drivers/mtd/ubi/block.c
+++ b/drivers/mtd/ubi/block.c
@@ -316,7 +316,7 @@ static void ubiblock_do_work(struct work_struct *work)
 	blk_mq_end_request(req, errno_to_blk_status(ret));
 }
 
-static int ubiblock_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t ubiblock_queue_rq(struct blk_mq_hw_ctx *hctx,
 			     const struct blk_mq_queue_data *bd)
 {
 	struct request *req = bd->rq;
@@ -327,9 +327,9 @@ static int ubiblock_queue_rq(struct blk_mq_hw_ctx *hctx,
 	case REQ_OP_READ:
 		ubi_sgl_init(&pdu->usgl);
 		queue_work(dev->wq, &pdu->work);
-		return BLK_MQ_RQ_QUEUE_OK;
+		return BLK_STS_OK;
 	default:
-		return BLK_MQ_RQ_QUEUE_ERROR;
+		return BLK_STS_IOERR;
 	}
 
 }
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 07e95c7d837a..4e193b93d1d9 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -283,7 +283,7 @@ static inline void nvme_setup_flush(struct nvme_ns *ns,
 	cmnd->common.nsid = cpu_to_le32(ns->ns_id);
 }
 
-static inline int nvme_setup_discard(struct nvme_ns *ns, struct request *req,
+static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req,
 		struct nvme_command *cmnd)
 {
 	unsigned short segments = blk_rq_nr_discard_segments(req), n = 0;
@@ -292,7 +292,7 @@ static inline int nvme_setup_discard(struct nvme_ns *ns, struct request *req,
 
 	range = kmalloc_array(segments, sizeof(*range), GFP_ATOMIC);
 	if (!range)
-		return BLK_MQ_RQ_QUEUE_BUSY;
+		return BLK_STS_RESOURCE;
 
 	__rq_for_each_bio(bio, req) {
 		u64 slba = nvme_block_nr(ns, bio->bi_iter.bi_sector);
@@ -306,7 +306,7 @@ static inline int nvme_setup_discard(struct nvme_ns *ns, struct request *req,
 
 	if (WARN_ON_ONCE(n != segments)) {
 		kfree(range);
-		return BLK_MQ_RQ_QUEUE_ERROR;
+		return BLK_STS_IOERR;
 	}
 
 	memset(cmnd, 0, sizeof(*cmnd));
@@ -320,7 +320,7 @@ static inline int nvme_setup_discard(struct nvme_ns *ns, struct request *req,
 	req->special_vec.bv_len = sizeof(*range) * segments;
 	req->rq_flags |= RQF_SPECIAL_PAYLOAD;
 
-	return BLK_MQ_RQ_QUEUE_OK;
+	return BLK_STS_OK;
 }
 
 static inline void nvme_setup_rw(struct nvme_ns *ns, struct request *req,
@@ -364,10 +364,10 @@ static inline void nvme_setup_rw(struct nvme_ns *ns, struct request *req,
 	cmnd->rw.dsmgmt = cpu_to_le32(dsmgmt);
 }
 
-int nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
+blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
 		struct nvme_command *cmd)
 {
-	int ret = BLK_MQ_RQ_QUEUE_OK;
+	blk_status_t ret = BLK_STS_OK;
 
 	if (!(req->rq_flags & RQF_DONTPREP)) {
 		nvme_req(req)->retries = 0;
@@ -394,7 +394,7 @@ int nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
 		break;
 	default:
 		WARN_ON_ONCE(1);
-		return BLK_MQ_RQ_QUEUE_ERROR;
+		return BLK_STS_IOERR;
 	}
 
 	cmd->common.command_id = req->tag;
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 5b14cbefb724..eb0973ac9e17 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -1873,7 +1873,7 @@ nvme_fc_unmap_data(struct nvme_fc_ctrl *ctrl, struct request *rq,
  * level FC exchange resource that is also outstanding. This must be
  * considered in all cleanup operations.
  */
-static int
+static blk_status_t
 nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
 	struct nvme_fc_fcp_op *op, u32 data_len,
 	enum nvmefc_fcp_datadir	io_dir)
@@ -1888,10 +1888,10 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
 	 * the target device is present
 	 */
 	if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE)
-		return BLK_MQ_RQ_QUEUE_ERROR;
+		return BLK_STS_IOERR;
 
 	if (!nvme_fc_ctrl_get(ctrl))
-		return BLK_MQ_RQ_QUEUE_ERROR;
+		return BLK_STS_IOERR;
 
 	/* format the FC-NVME CMD IU and fcp_req */
 	cmdiu->connection_id = cpu_to_be64(queue->connection_id);
@@ -1939,8 +1939,9 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
 		if (ret < 0) {
 			nvme_cleanup_cmd(op->rq);
 			nvme_fc_ctrl_put(ctrl);
-			return (ret == -ENOMEM || ret == -EAGAIN) ?
-				BLK_MQ_RQ_QUEUE_BUSY : BLK_MQ_RQ_QUEUE_ERROR;
+			if (ret == -ENOMEM || ret == -EAGAIN)
+				return BLK_STS_RESOURCE;
+			return BLK_STS_IOERR;
 		}
 	}
 
@@ -1966,19 +1967,19 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
 		nvme_fc_ctrl_put(ctrl);
 
 		if (ret != -EBUSY)
-			return BLK_MQ_RQ_QUEUE_ERROR;
+			return BLK_STS_IOERR;
 
 		if (op->rq) {
 			blk_mq_stop_hw_queues(op->rq->q);
 			blk_mq_delay_queue(queue->hctx, NVMEFC_QUEUE_DELAY);
 		}
-		return BLK_MQ_RQ_QUEUE_BUSY;
+		return BLK_STS_RESOURCE;
 	}
 
-	return BLK_MQ_RQ_QUEUE_OK;
+	return BLK_STS_OK;
 }
 
-static int
+static blk_status_t
 nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx,
 			const struct blk_mq_queue_data *bd)
 {
@@ -1991,7 +1992,7 @@ nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx,
 	struct nvme_command *sqe = &cmdiu->sqe;
 	enum nvmefc_fcp_datadir	io_dir;
 	u32 data_len;
-	int ret;
+	blk_status_t ret;
 
 	ret = nvme_setup_cmd(ns, rq, sqe);
 	if (ret)
@@ -2046,7 +2047,7 @@ nvme_fc_submit_async_event(struct nvme_ctrl *arg, int aer_idx)
 	struct nvme_fc_fcp_op *aen_op;
 	unsigned long flags;
 	bool terminating = false;
-	int ret;
+	blk_status_t ret;
 
 	if (aer_idx > NVME_FC_NR_AEN_COMMANDS)
 		return;
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 9d6a070d4391..22ee60b2a3e8 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -296,7 +296,7 @@ void nvme_start_freeze(struct nvme_ctrl *ctrl);
 #define NVME_QID_ANY -1
 struct request *nvme_alloc_request(struct request_queue *q,
 		struct nvme_command *cmd, unsigned int flags, int qid);
-int nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
+blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
 		struct nvme_command *cmd);
 int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
 		void *buf, unsigned bufflen);
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 819898428763..430d085af31c 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -427,7 +427,7 @@ static __le64 **iod_list(struct request *req)
 	return (__le64 **)(iod->sg + blk_rq_nr_phys_segments(req));
 }
 
-static int nvme_init_iod(struct request *rq, struct nvme_dev *dev)
+static blk_status_t nvme_init_iod(struct request *rq, struct nvme_dev *dev)
 {
 	struct nvme_iod *iod = blk_mq_rq_to_pdu(rq);
 	int nseg = blk_rq_nr_phys_segments(rq);
@@ -436,7 +436,7 @@ static int nvme_init_iod(struct request *rq, struct nvme_dev *dev)
 	if (nseg > NVME_INT_PAGES || size > NVME_INT_BYTES(dev)) {
 		iod->sg = kmalloc(nvme_iod_alloc_size(dev, size, nseg), GFP_ATOMIC);
 		if (!iod->sg)
-			return BLK_MQ_RQ_QUEUE_BUSY;
+			return BLK_STS_RESOURCE;
 	} else {
 		iod->sg = iod->inline_sg;
 	}
@@ -446,7 +446,7 @@ static int nvme_init_iod(struct request *rq, struct nvme_dev *dev)
 	iod->nents = 0;
 	iod->length = size;
 
-	return BLK_MQ_RQ_QUEUE_OK;
+	return BLK_STS_OK;
 }
 
 static void nvme_free_iod(struct nvme_dev *dev, struct request *req)
@@ -616,21 +616,21 @@ static bool nvme_setup_prps(struct nvme_dev *dev, struct request *req)
 	return true;
 }
 
-static int nvme_map_data(struct nvme_dev *dev, struct request *req,
+static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
 		struct nvme_command *cmnd)
 {
 	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
 	struct request_queue *q = req->q;
 	enum dma_data_direction dma_dir = rq_data_dir(req) ?
 			DMA_TO_DEVICE : DMA_FROM_DEVICE;
-	int ret = BLK_MQ_RQ_QUEUE_ERROR;
+	blk_status_t ret = BLK_STS_IOERR;
 
 	sg_init_table(iod->sg, blk_rq_nr_phys_segments(req));
 	iod->nents = blk_rq_map_sg(q, req, iod->sg);
 	if (!iod->nents)
 		goto out;
 
-	ret = BLK_MQ_RQ_QUEUE_BUSY;
+	ret = BLK_STS_RESOURCE;
 	if (!dma_map_sg_attrs(dev->dev, iod->sg, iod->nents, dma_dir,
 				DMA_ATTR_NO_WARN))
 		goto out;
@@ -638,7 +638,7 @@ static int nvme_map_data(struct nvme_dev *dev, struct request *req,
 	if (!nvme_setup_prps(dev, req))
 		goto out_unmap;
 
-	ret = BLK_MQ_RQ_QUEUE_ERROR;
+	ret = BLK_STS_IOERR;
 	if (blk_integrity_rq(req)) {
 		if (blk_rq_count_integrity_sg(q, req->bio) != 1)
 			goto out_unmap;
@@ -658,7 +658,7 @@ static int nvme_map_data(struct nvme_dev *dev, struct request *req,
 	cmnd->rw.dptr.prp2 = cpu_to_le64(iod->first_dma);
 	if (blk_integrity_rq(req))
 		cmnd->rw.metadata = cpu_to_le64(sg_dma_address(&iod->meta_sg));
-	return BLK_MQ_RQ_QUEUE_OK;
+	return BLK_STS_OK;
 
 out_unmap:
 	dma_unmap_sg(dev->dev, iod->sg, iod->nents, dma_dir);
@@ -688,7 +688,7 @@ static void nvme_unmap_data(struct nvme_dev *dev, struct request *req)
 /*
  * NOTE: ns is NULL when called on the admin queue.
  */
-static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
 			 const struct blk_mq_queue_data *bd)
 {
 	struct nvme_ns *ns = hctx->queue->queuedata;
@@ -696,7 +696,7 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
 	struct nvme_dev *dev = nvmeq->dev;
 	struct request *req = bd->rq;
 	struct nvme_command cmnd;
-	int ret = BLK_MQ_RQ_QUEUE_OK;
+	blk_status_t ret = BLK_STS_OK;
 
 	/*
 	 * If formated with metadata, require the block layer provide a buffer
@@ -705,38 +705,36 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
 	 */
 	if (ns && ns->ms && !blk_integrity_rq(req)) {
 		if (!(ns->pi_type && ns->ms == 8) &&
-		    !blk_rq_is_passthrough(req)) {
-			blk_mq_end_request(req, BLK_STS_NOTSUPP);
-			return BLK_MQ_RQ_QUEUE_OK;
-		}
+		    !blk_rq_is_passthrough(req))
+			return BLK_STS_NOTSUPP;
 	}
 
 	ret = nvme_setup_cmd(ns, req, &cmnd);
-	if (ret != BLK_MQ_RQ_QUEUE_OK)
+	if (ret)
 		return ret;
 
 	ret = nvme_init_iod(req, dev);
-	if (ret != BLK_MQ_RQ_QUEUE_OK)
+	if (ret)
 		goto out_free_cmd;
 
-	if (blk_rq_nr_phys_segments(req))
+	if (blk_rq_nr_phys_segments(req)) {
 		ret = nvme_map_data(dev, req, &cmnd);
-
-	if (ret != BLK_MQ_RQ_QUEUE_OK)
-		goto out_cleanup_iod;
+		if (ret)
+			goto out_cleanup_iod;
+	}
 
 	blk_mq_start_request(req);
 
 	spin_lock_irq(&nvmeq->q_lock);
 	if (unlikely(nvmeq->cq_vector < 0)) {
-		ret = BLK_MQ_RQ_QUEUE_ERROR;
+		ret = BLK_STS_IOERR;
 		spin_unlock_irq(&nvmeq->q_lock);
 		goto out_cleanup_iod;
 	}
 	__nvme_submit_cmd(nvmeq, &cmnd);
 	nvme_process_cq(nvmeq);
 	spin_unlock_irq(&nvmeq->q_lock);
-	return BLK_MQ_RQ_QUEUE_OK;
+	return BLK_STS_OK;
 out_cleanup_iod:
 	nvme_free_iod(dev, req);
 out_free_cmd:
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 28bd255c144d..58d311e704e5 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -1448,7 +1448,7 @@ static inline bool nvme_rdma_queue_is_ready(struct nvme_rdma_queue *queue,
 	return true;
 }
 
-static int nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
 		const struct blk_mq_queue_data *bd)
 {
 	struct nvme_ns *ns = hctx->queue->queuedata;
@@ -1459,27 +1459,28 @@ static int nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
 	struct nvme_command *c = sqe->data;
 	bool flush = false;
 	struct ib_device *dev;
-	int ret;
+	blk_status_t ret;
+	int err;
 
 	WARN_ON_ONCE(rq->tag < 0);
 
 	if (!nvme_rdma_queue_is_ready(queue, rq))
-		return BLK_MQ_RQ_QUEUE_BUSY;
+		return BLK_STS_RESOURCE;
 
 	dev = queue->device->dev;
 	ib_dma_sync_single_for_cpu(dev, sqe->dma,
 			sizeof(struct nvme_command), DMA_TO_DEVICE);
 
 	ret = nvme_setup_cmd(ns, rq, c);
-	if (ret != BLK_MQ_RQ_QUEUE_OK)
+	if (ret)
 		return ret;
 
 	blk_mq_start_request(rq);
 
-	ret = nvme_rdma_map_data(queue, rq, c);
-	if (ret < 0) {
+	err = nvme_rdma_map_data(queue, rq, c);
+	if (err < 0) {
 		dev_err(queue->ctrl->ctrl.device,
-			     "Failed to map data (%d)\n", ret);
+			     "Failed to map data (%d)\n", err);
 		nvme_cleanup_cmd(rq);
 		goto err;
 	}
@@ -1489,17 +1490,18 @@ static int nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
 
 	if (req_op(rq) == REQ_OP_FLUSH)
 		flush = true;
-	ret = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge,
+	err = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge,
 			req->mr->need_inval ? &req->reg_wr.wr : NULL, flush);
-	if (ret) {
+	if (err) {
 		nvme_rdma_unmap_data(queue, rq);
 		goto err;
 	}
 
-	return BLK_MQ_RQ_QUEUE_OK;
+	return BLK_STS_OK;
 err:
-	return (ret == -ENOMEM || ret == -EAGAIN) ?
-		BLK_MQ_RQ_QUEUE_BUSY : BLK_MQ_RQ_QUEUE_ERROR;
+	if (err == -ENOMEM || err == -EAGAIN)
+		return BLK_STS_RESOURCE;
+	return BLK_STS_IOERR;
 }
 
 static int nvme_rdma_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag)
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index e503cfff0337..db8ebadf885b 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -159,17 +159,17 @@ nvme_loop_timeout(struct request *rq, bool reserved)
 	return BLK_EH_HANDLED;
 }
 
-static int nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
 		const struct blk_mq_queue_data *bd)
 {
 	struct nvme_ns *ns = hctx->queue->queuedata;
 	struct nvme_loop_queue *queue = hctx->driver_data;
 	struct request *req = bd->rq;
 	struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(req);
-	int ret;
+	blk_status_t ret;
 
 	ret = nvme_setup_cmd(ns, req, &iod->cmd);
-	if (ret != BLK_MQ_RQ_QUEUE_OK)
+	if (ret)
 		return ret;
 
 	iod->cmd.common.flags |= NVME_CMD_SGL_METABUF;
@@ -179,16 +179,15 @@ static int nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
 		nvme_cleanup_cmd(req);
 		blk_mq_start_request(req);
 		nvme_loop_queue_response(&iod->req);
-		return BLK_MQ_RQ_QUEUE_OK;
+		return BLK_STS_OK;
 	}
 
 	if (blk_rq_bytes(req)) {
 		iod->sg_table.sgl = iod->first_sgl;
-		ret = sg_alloc_table_chained(&iod->sg_table,
+		if (sg_alloc_table_chained(&iod->sg_table,
 				blk_rq_nr_phys_segments(req),
-				iod->sg_table.sgl);
-		if (ret)
-			return BLK_MQ_RQ_QUEUE_BUSY;
+				iod->sg_table.sgl))
+			return BLK_STS_RESOURCE;
 
 		iod->req.sg = iod->sg_table.sgl;
 		iod->req.sg_cnt = blk_rq_map_sg(req->q, req, iod->sg_table.sgl);
@@ -197,7 +196,7 @@ static int nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
 	blk_mq_start_request(req);
 
 	schedule_work(&iod->work);
-	return BLK_MQ_RQ_QUEUE_OK;
+	return BLK_STS_OK;
 }
 
 static void nvme_loop_submit_async_event(struct nvme_ctrl *arg, int aer_idx)
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 67a67191520f..b5f310b9e910 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1812,15 +1812,15 @@ out_delay:
 		blk_delay_queue(q, SCSI_QUEUE_DELAY);
 }
 
-static inline int prep_to_mq(int ret)
+static inline blk_status_t prep_to_mq(int ret)
 {
 	switch (ret) {
 	case BLKPREP_OK:
-		return BLK_MQ_RQ_QUEUE_OK;
+		return BLK_STS_OK;
 	case BLKPREP_DEFER:
-		return BLK_MQ_RQ_QUEUE_BUSY;
+		return BLK_STS_RESOURCE;
 	default:
-		return BLK_MQ_RQ_QUEUE_ERROR;
+		return BLK_STS_IOERR;
 	}
 }
 
@@ -1892,7 +1892,7 @@ static void scsi_mq_done(struct scsi_cmnd *cmd)
 	blk_mq_complete_request(cmd->request);
 }
 
-static int scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
 			 const struct blk_mq_queue_data *bd)
 {
 	struct request *req = bd->rq;
@@ -1900,14 +1900,14 @@ static int scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
 	struct scsi_device *sdev = q->queuedata;
 	struct Scsi_Host *shost = sdev->host;
 	struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req);
-	int ret;
+	blk_status_t ret;
 	int reason;
 
 	ret = prep_to_mq(scsi_prep_state_check(sdev, req));
-	if (ret != BLK_MQ_RQ_QUEUE_OK)
+	if (ret != BLK_STS_OK)
 		goto out;
 
-	ret = BLK_MQ_RQ_QUEUE_BUSY;
+	ret = BLK_STS_RESOURCE;
 	if (!get_device(&sdev->sdev_gendev))
 		goto out;
 
@@ -1920,7 +1920,7 @@ static int scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
 
 	if (!(req->rq_flags & RQF_DONTPREP)) {
 		ret = prep_to_mq(scsi_mq_prep_fn(req));
-		if (ret != BLK_MQ_RQ_QUEUE_OK)
+		if (ret != BLK_STS_OK)
 			goto out_dec_host_busy;
 		req->rq_flags |= RQF_DONTPREP;
 	} else {
@@ -1938,11 +1938,11 @@ static int scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
 	reason = scsi_dispatch_cmd(cmd);
 	if (reason) {
 		scsi_set_blocked(cmd, reason);
-		ret = BLK_MQ_RQ_QUEUE_BUSY;
+		ret = BLK_STS_RESOURCE;
 		goto out_dec_host_busy;
 	}
 
-	return BLK_MQ_RQ_QUEUE_OK;
+	return BLK_STS_OK;
 
 out_dec_host_busy:
 	atomic_dec(&shost->host_busy);
@@ -1955,12 +1955,14 @@ out_put_device:
 	put_device(&sdev->sdev_gendev);
 out:
 	switch (ret) {
-	case BLK_MQ_RQ_QUEUE_BUSY:
+	case BLK_STS_OK:
+		break;
+	case BLK_STS_RESOURCE:
 		if (atomic_read(&sdev->device_busy) == 0 &&
 		    !scsi_device_blocked(sdev))
 			blk_mq_delay_run_hw_queue(hctx, SCSI_QUEUE_DELAY);
 		break;
-	case BLK_MQ_RQ_QUEUE_ERROR:
+	default:
 		/*
 		 * Make sure to release all allocated ressources when
 		 * we hit an error, as we will never see this command
@@ -1969,8 +1971,6 @@ out:
 		if (req->rq_flags & RQF_DONTPREP)
 			scsi_mq_uninit_cmd(cmd);
 		break;
-	default:
-		break;
 	}
 	return ret;
 }
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 0cf6735046d3..b144b7b0e104 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -87,7 +87,8 @@ struct blk_mq_queue_data {
 	bool last;
 };
 
-typedef int (queue_rq_fn)(struct blk_mq_hw_ctx *, const struct blk_mq_queue_data *);
+typedef blk_status_t (queue_rq_fn)(struct blk_mq_hw_ctx *,
+		const struct blk_mq_queue_data *);
 typedef enum blk_eh_timer_return (timeout_fn)(struct request *, bool);
 typedef int (init_hctx_fn)(struct blk_mq_hw_ctx *, void *, unsigned int);
 typedef void (exit_hctx_fn)(struct blk_mq_hw_ctx *, unsigned int);
@@ -155,10 +156,6 @@ struct blk_mq_ops {
 };
 
 enum {
-	BLK_MQ_RQ_QUEUE_OK	= 0,	/* queued fine */
-	BLK_MQ_RQ_QUEUE_BUSY	= 1,	/* requeue IO for later */
-	BLK_MQ_RQ_QUEUE_ERROR	= 2,	/* end IO with error */
-
 	BLK_MQ_F_SHOULD_MERGE	= 1 << 0,
 	BLK_MQ_F_TAG_SHARED	= 1 << 1,
 	BLK_MQ_F_SG_MERGE	= 1 << 2,
-- 
cgit v1.2.3


From 4e4cbee93d56137ebff722be022cae5f70ef84fb Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sat, 3 Jun 2017 09:38:06 +0200
Subject: block: switch bios to blk_status_t

Replace bi_error with a new bi_status to allow for a clear conversion.
Note that device mapper overloaded bi_error with a private value, which
we'll have to keep arround at least for now and thus propagate to a
proper blk_status_t value.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/bio-integrity.c               |  8 ++--
 block/bio.c                         |  8 ++--
 block/blk-core.c                    | 20 ++++++----
 block/blk-integrity.c               |  4 +-
 block/bounce.c                      |  4 +-
 block/t10-pi.c                      | 30 +++++++--------
 drivers/block/aoe/aoecmd.c          | 10 ++---
 drivers/block/aoe/aoedev.c          |  2 +-
 drivers/block/drbd/drbd_actlog.c    |  2 +-
 drivers/block/drbd/drbd_bitmap.c    |  6 +--
 drivers/block/drbd/drbd_int.h       |  2 +-
 drivers/block/drbd/drbd_receiver.c  |  6 +--
 drivers/block/drbd/drbd_req.c       |  6 +--
 drivers/block/drbd/drbd_worker.c    | 16 ++++----
 drivers/block/floppy.c              |  4 +-
 drivers/block/pktcdvd.c             | 18 ++++-----
 drivers/block/ps3vram.c             | 14 +++----
 drivers/block/rsxx/dev.c            | 14 +++----
 drivers/block/rsxx/dma.c            | 13 +++----
 drivers/block/rsxx/rsxx_priv.h      |  2 +-
 drivers/block/umem.c                |  2 +-
 drivers/block/xen-blkback/blkback.c | 19 ++++------
 drivers/block/xen-blkfront.c        |  2 +-
 drivers/lightnvm/pblk-core.c        |  4 +-
 drivers/lightnvm/pblk-read.c        |  4 +-
 drivers/lightnvm/pblk-write.c       |  2 +-
 drivers/lightnvm/rrpc.c             |  8 ++--
 drivers/md/bcache/bcache.h          |  7 ++--
 drivers/md/bcache/btree.c           |  6 +--
 drivers/md/bcache/io.c              |  6 +--
 drivers/md/bcache/journal.c         |  2 +-
 drivers/md/bcache/movinggc.c        | 10 ++---
 drivers/md/bcache/request.c         | 28 +++++++-------
 drivers/md/bcache/request.h         |  2 +-
 drivers/md/bcache/super.c           |  6 +--
 drivers/md/bcache/writeback.c       |  4 +-
 drivers/md/dm-bio-prison-v1.c       |  4 +-
 drivers/md/dm-bio-prison-v1.h       |  2 +-
 drivers/md/dm-bufio.c               | 28 +++++++-------
 drivers/md/dm-cache-target.c        | 34 +++++++++--------
 drivers/md/dm-crypt.c               | 34 ++++++++---------
 drivers/md/dm-flakey.c              |  5 ++-
 drivers/md/dm-integrity.c           | 18 ++++-----
 drivers/md/dm-io.c                  | 10 ++---
 drivers/md/dm-log-writes.c          |  7 ++--
 drivers/md/dm-mpath.c               | 15 ++++----
 drivers/md/dm-raid1.c               | 13 ++++---
 drivers/md/dm-rq.c                  |  2 +-
 drivers/md/dm-snap.c                |  5 ++-
 drivers/md/dm-stripe.c              |  5 ++-
 drivers/md/dm-thin.c                | 65 ++++++++++++++++----------------
 drivers/md/dm-verity-target.c       | 10 ++---
 drivers/md/dm.c                     | 40 ++++++++++----------
 drivers/md/md.c                     |  8 ++--
 drivers/md/multipath.c              | 10 ++---
 drivers/md/raid1.c                  | 36 +++++++++---------
 drivers/md/raid10.c                 | 36 +++++++++---------
 drivers/md/raid5-cache.c            |  4 +-
 drivers/md/raid5-ppl.c              |  2 +-
 drivers/md/raid5.c                  | 22 +++++------
 drivers/nvdimm/blk.c                |  4 +-
 drivers/nvdimm/btt.c                |  4 +-
 drivers/nvdimm/pmem.c               | 28 +++++++-------
 drivers/nvme/target/io-cmd.c        |  4 +-
 drivers/target/target_core_iblock.c | 10 ++---
 fs/block_dev.c                      | 18 +++++----
 fs/btrfs/btrfs_inode.h              |  3 +-
 fs/btrfs/check-integrity.c          |  4 +-
 fs/btrfs/compression.c              | 44 +++++++++++-----------
 fs/btrfs/compression.h              |  4 +-
 fs/btrfs/ctree.h                    |  6 +--
 fs/btrfs/disk-io.c                  | 75 ++++++++++++++++++-------------------
 fs/btrfs/disk-io.h                  | 12 +++---
 fs/btrfs/extent_io.c                | 23 +++++++-----
 fs/btrfs/extent_io.h                |  6 +--
 fs/btrfs/file-item.c                | 14 +++----
 fs/btrfs/inode.c                    | 73 ++++++++++++++++++------------------
 fs/btrfs/raid56.c                   | 16 ++++----
 fs/btrfs/scrub.c                    | 26 ++++++-------
 fs/btrfs/volumes.c                  | 11 +++---
 fs/buffer.c                         |  2 +-
 fs/crypto/bio.c                     |  2 +-
 fs/direct-io.c                      |  8 ++--
 fs/ext4/page-io.c                   | 13 ++++---
 fs/ext4/readpage.c                  |  4 +-
 fs/f2fs/data.c                      | 10 ++---
 fs/f2fs/segment.c                   |  2 +-
 fs/gfs2/lops.c                      |  8 ++--
 fs/gfs2/meta_io.c                   |  2 +-
 fs/gfs2/ops_fstype.c                |  4 +-
 fs/iomap.c                          |  4 +-
 fs/jfs/jfs_logmgr.c                 |  2 +-
 fs/jfs/jfs_metapage.c               |  4 +-
 fs/mpage.c                          |  3 +-
 fs/nfs/blocklayout/blocklayout.c    |  4 +-
 fs/nilfs2/segbuf.c                  |  2 +-
 fs/ocfs2/cluster/heartbeat.c        |  6 +--
 fs/xfs/xfs_aops.c                   |  7 ++--
 fs/xfs/xfs_buf.c                    |  7 +++-
 include/linux/bio.h                 |  2 +-
 include/linux/blk_types.h           |  5 ++-
 include/linux/blkdev.h              |  2 +-
 include/linux/device-mapper.h       |  2 +-
 kernel/power/swap.c                 | 14 +++----
 kernel/trace/blktrace.c             |  4 +-
 mm/page_io.c                        |  4 +-
 106 files changed, 625 insertions(+), 603 deletions(-)

(limited to 'block')

diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index 5384713d48bc..17b9740e138b 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -221,7 +221,7 @@ static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi,
  * @bio:	bio to generate/verify integrity metadata for
  * @proc_fn:	Pointer to the relevant processing function
  */
-static int bio_integrity_process(struct bio *bio,
+static blk_status_t bio_integrity_process(struct bio *bio,
 				 integrity_processing_fn *proc_fn)
 {
 	struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
@@ -229,7 +229,7 @@ static int bio_integrity_process(struct bio *bio,
 	struct bvec_iter bviter;
 	struct bio_vec bv;
 	struct bio_integrity_payload *bip = bio_integrity(bio);
-	unsigned int ret = 0;
+	blk_status_t ret = BLK_STS_OK;
 	void *prot_buf = page_address(bip->bip_vec->bv_page) +
 		bip->bip_vec->bv_offset;
 
@@ -366,7 +366,7 @@ static void bio_integrity_verify_fn(struct work_struct *work)
 	struct bio *bio = bip->bip_bio;
 	struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
 
-	bio->bi_error = bio_integrity_process(bio, bi->profile->verify_fn);
+	bio->bi_status = bio_integrity_process(bio, bi->profile->verify_fn);
 
 	/* Restore original bio completion handler */
 	bio->bi_end_io = bip->bip_end_io;
@@ -395,7 +395,7 @@ void bio_integrity_endio(struct bio *bio)
 	 * integrity metadata.  Restore original bio end_io handler
 	 * and run it.
 	 */
-	if (bio->bi_error) {
+	if (bio->bi_status) {
 		bio->bi_end_io = bip->bip_end_io;
 		bio_endio(bio);
 
diff --git a/block/bio.c b/block/bio.c
index 888e7801c638..7a5c8ed27f42 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -309,8 +309,8 @@ static struct bio *__bio_chain_endio(struct bio *bio)
 {
 	struct bio *parent = bio->bi_private;
 
-	if (!parent->bi_error)
-		parent->bi_error = bio->bi_error;
+	if (!parent->bi_status)
+		parent->bi_status = bio->bi_status;
 	bio_put(bio);
 	return parent;
 }
@@ -918,7 +918,7 @@ static void submit_bio_wait_endio(struct bio *bio)
 {
 	struct submit_bio_ret *ret = bio->bi_private;
 
-	ret->error = bio->bi_error;
+	ret->error = blk_status_to_errno(bio->bi_status);
 	complete(&ret->event);
 }
 
@@ -1818,7 +1818,7 @@ again:
 
 	if (bio->bi_bdev && bio_flagged(bio, BIO_TRACE_COMPLETION)) {
 		trace_block_bio_complete(bdev_get_queue(bio->bi_bdev),
-					 bio, bio->bi_error);
+					 bio, bio->bi_status);
 		bio_clear_flag(bio, BIO_TRACE_COMPLETION);
 	}
 
diff --git a/block/blk-core.c b/block/blk-core.c
index e942a9f814c7..3d84820ace9e 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -144,6 +144,9 @@ static const struct {
 	[BLK_STS_PROTECTION]	= { -EILSEQ,	"protection" },
 	[BLK_STS_RESOURCE]	= { -ENOMEM,	"kernel resource" },
 
+	/* device mapper special case, should not leak out: */
+	[BLK_STS_DM_REQUEUE]	= { -EREMCHG, "dm internal retry" },
+
 	/* everything else not covered above: */
 	[BLK_STS_IOERR]		= { -EIO,	"I/O" },
 };
@@ -188,7 +191,7 @@ static void req_bio_endio(struct request *rq, struct bio *bio,
 			  unsigned int nbytes, blk_status_t error)
 {
 	if (error)
-		bio->bi_error = blk_status_to_errno(error);
+		bio->bi_status = error;
 
 	if (unlikely(rq->rq_flags & RQF_QUIET))
 		bio_set_flag(bio, BIO_QUIET);
@@ -1717,7 +1720,7 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio)
 	blk_queue_split(q, &bio, q->bio_split);
 
 	if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
-		bio->bi_error = -EIO;
+		bio->bi_status = BLK_STS_IOERR;
 		bio_endio(bio);
 		return BLK_QC_T_NONE;
 	}
@@ -1775,7 +1778,10 @@ get_rq:
 	req = get_request(q, bio->bi_opf, bio, GFP_NOIO);
 	if (IS_ERR(req)) {
 		__wbt_done(q->rq_wb, wb_acct);
-		bio->bi_error = PTR_ERR(req);
+		if (PTR_ERR(req) == -ENOMEM)
+			bio->bi_status = BLK_STS_RESOURCE;
+		else
+			bio->bi_status = BLK_STS_IOERR;
 		bio_endio(bio);
 		goto out_unlock;
 	}
@@ -1930,7 +1936,7 @@ generic_make_request_checks(struct bio *bio)
 {
 	struct request_queue *q;
 	int nr_sectors = bio_sectors(bio);
-	int err = -EIO;
+	blk_status_t status = BLK_STS_IOERR;
 	char b[BDEVNAME_SIZE];
 	struct hd_struct *part;
 
@@ -1973,7 +1979,7 @@ generic_make_request_checks(struct bio *bio)
 	    !test_bit(QUEUE_FLAG_WC, &q->queue_flags)) {
 		bio->bi_opf &= ~(REQ_PREFLUSH | REQ_FUA);
 		if (!nr_sectors) {
-			err = 0;
+			status = BLK_STS_OK;
 			goto end_io;
 		}
 	}
@@ -2025,9 +2031,9 @@ generic_make_request_checks(struct bio *bio)
 	return true;
 
 not_supported:
-	err = -EOPNOTSUPP;
+	status = BLK_STS_NOTSUPP;
 end_io:
-	bio->bi_error = err;
+	bio->bi_status = status;
 	bio_endio(bio);
 	return false;
 }
diff --git a/block/blk-integrity.c b/block/blk-integrity.c
index 0f891a9aff4d..feb30570eaf5 100644
--- a/block/blk-integrity.c
+++ b/block/blk-integrity.c
@@ -384,9 +384,9 @@ static struct kobj_type integrity_ktype = {
 	.sysfs_ops	= &integrity_ops,
 };
 
-static int blk_integrity_nop_fn(struct blk_integrity_iter *iter)
+static blk_status_t blk_integrity_nop_fn(struct blk_integrity_iter *iter)
 {
-	return 0;
+	return BLK_STS_OK;
 }
 
 static const struct blk_integrity_profile nop_profile = {
diff --git a/block/bounce.c b/block/bounce.c
index 1cb5dd3a5da1..e4703181d97f 100644
--- a/block/bounce.c
+++ b/block/bounce.c
@@ -143,7 +143,7 @@ static void bounce_end_io(struct bio *bio, mempool_t *pool)
 		mempool_free(bvec->bv_page, pool);
 	}
 
-	bio_orig->bi_error = bio->bi_error;
+	bio_orig->bi_status = bio->bi_status;
 	bio_endio(bio_orig);
 	bio_put(bio);
 }
@@ -163,7 +163,7 @@ static void __bounce_end_io_read(struct bio *bio, mempool_t *pool)
 {
 	struct bio *bio_orig = bio->bi_private;
 
-	if (!bio->bi_error)
+	if (!bio->bi_status)
 		copy_to_high_bio_irq(bio_orig, bio);
 
 	bounce_end_io(bio, pool);
diff --git a/block/t10-pi.c b/block/t10-pi.c
index 680c6d636298..350b3cbcf9e5 100644
--- a/block/t10-pi.c
+++ b/block/t10-pi.c
@@ -46,8 +46,8 @@ static __be16 t10_pi_ip_fn(void *data, unsigned int len)
  * 16 bit app tag, 32 bit reference tag. Type 3 does not define the ref
  * tag.
  */
-static int t10_pi_generate(struct blk_integrity_iter *iter, csum_fn *fn,
-			   unsigned int type)
+static blk_status_t t10_pi_generate(struct blk_integrity_iter *iter,
+		csum_fn *fn, unsigned int type)
 {
 	unsigned int i;
 
@@ -67,11 +67,11 @@ static int t10_pi_generate(struct blk_integrity_iter *iter, csum_fn *fn,
 		iter->seed++;
 	}
 
-	return 0;
+	return BLK_STS_OK;
 }
 
-static int t10_pi_verify(struct blk_integrity_iter *iter, csum_fn *fn,
-				unsigned int type)
+static blk_status_t t10_pi_verify(struct blk_integrity_iter *iter,
+		csum_fn *fn, unsigned int type)
 {
 	unsigned int i;
 
@@ -108,7 +108,7 @@ static int t10_pi_verify(struct blk_integrity_iter *iter, csum_fn *fn,
 			       "(rcvd %04x, want %04x)\n", iter->disk_name,
 			       (unsigned long long)iter->seed,
 			       be16_to_cpu(pi->guard_tag), be16_to_cpu(csum));
-			return -EILSEQ;
+			return BLK_STS_PROTECTION;
 		}
 
 next:
@@ -117,45 +117,45 @@ next:
 		iter->seed++;
 	}
 
-	return 0;
+	return BLK_STS_OK;
 }
 
-static int t10_pi_type1_generate_crc(struct blk_integrity_iter *iter)
+static blk_status_t t10_pi_type1_generate_crc(struct blk_integrity_iter *iter)
 {
 	return t10_pi_generate(iter, t10_pi_crc_fn, 1);
 }
 
-static int t10_pi_type1_generate_ip(struct blk_integrity_iter *iter)
+static blk_status_t t10_pi_type1_generate_ip(struct blk_integrity_iter *iter)
 {
 	return t10_pi_generate(iter, t10_pi_ip_fn, 1);
 }
 
-static int t10_pi_type1_verify_crc(struct blk_integrity_iter *iter)
+static blk_status_t t10_pi_type1_verify_crc(struct blk_integrity_iter *iter)
 {
 	return t10_pi_verify(iter, t10_pi_crc_fn, 1);
 }
 
-static int t10_pi_type1_verify_ip(struct blk_integrity_iter *iter)
+static blk_status_t t10_pi_type1_verify_ip(struct blk_integrity_iter *iter)
 {
 	return t10_pi_verify(iter, t10_pi_ip_fn, 1);
 }
 
-static int t10_pi_type3_generate_crc(struct blk_integrity_iter *iter)
+static blk_status_t t10_pi_type3_generate_crc(struct blk_integrity_iter *iter)
 {
 	return t10_pi_generate(iter, t10_pi_crc_fn, 3);
 }
 
-static int t10_pi_type3_generate_ip(struct blk_integrity_iter *iter)
+static blk_status_t t10_pi_type3_generate_ip(struct blk_integrity_iter *iter)
 {
 	return t10_pi_generate(iter, t10_pi_ip_fn, 3);
 }
 
-static int t10_pi_type3_verify_crc(struct blk_integrity_iter *iter)
+static blk_status_t t10_pi_type3_verify_crc(struct blk_integrity_iter *iter)
 {
 	return t10_pi_verify(iter, t10_pi_crc_fn, 3);
 }
 
-static int t10_pi_type3_verify_ip(struct blk_integrity_iter *iter)
+static blk_status_t t10_pi_type3_verify_ip(struct blk_integrity_iter *iter)
 {
 	return t10_pi_verify(iter, t10_pi_ip_fn, 3);
 }
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index 5bf0c9d21fc1..dc43254e05a4 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -1070,7 +1070,7 @@ aoe_end_request(struct aoedev *d, struct request *rq, int fastfail)
 		d->ip.rq = NULL;
 	do {
 		bio = rq->bio;
-		bok = !fastfail && !bio->bi_error;
+		bok = !fastfail && !bio->bi_status;
 	} while (__blk_end_request(rq, bok ? BLK_STS_OK : BLK_STS_IOERR, bio->bi_iter.bi_size));
 
 	/* cf. http://lkml.org/lkml/2006/10/31/28 */
@@ -1131,7 +1131,7 @@ ktiocomplete(struct frame *f)
 			ahout->cmdstat, ahin->cmdstat,
 			d->aoemajor, d->aoeminor);
 noskb:		if (buf)
-			buf->bio->bi_error = -EIO;
+			buf->bio->bi_status = BLK_STS_IOERR;
 		goto out;
 	}
 
@@ -1144,7 +1144,7 @@ noskb:		if (buf)
 				"aoe: runt data size in read from",
 				(long) d->aoemajor, d->aoeminor,
 			       skb->len, n);
-			buf->bio->bi_error = -EIO;
+			buf->bio->bi_status = BLK_STS_IOERR;
 			break;
 		}
 		if (n > f->iter.bi_size) {
@@ -1152,7 +1152,7 @@ noskb:		if (buf)
 				"aoe: too-large data size in read from",
 				(long) d->aoemajor, d->aoeminor,
 				n, f->iter.bi_size);
-			buf->bio->bi_error = -EIO;
+			buf->bio->bi_status = BLK_STS_IOERR;
 			break;
 		}
 		bvcpy(skb, f->buf->bio, f->iter, n);
@@ -1654,7 +1654,7 @@ aoe_failbuf(struct aoedev *d, struct buf *buf)
 	if (buf == NULL)
 		return;
 	buf->iter.bi_size = 0;
-	buf->bio->bi_error = -EIO;
+	buf->bio->bi_status = BLK_STS_IOERR;
 	if (buf->nframesout == 0)
 		aoe_end_buf(d, buf);
 }
diff --git a/drivers/block/aoe/aoedev.c b/drivers/block/aoe/aoedev.c
index ffd1947500c6..b28fefb90391 100644
--- a/drivers/block/aoe/aoedev.c
+++ b/drivers/block/aoe/aoedev.c
@@ -170,7 +170,7 @@ aoe_failip(struct aoedev *d)
 	if (rq == NULL)
 		return;
 	while ((bio = d->ip.nxbio)) {
-		bio->bi_error = -EIO;
+		bio->bi_status = BLK_STS_IOERR;
 		d->ip.nxbio = bio->bi_next;
 		n = (unsigned long) rq->special;
 		rq->special = (void *) --n;
diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index 8d7bcfa49c12..e02c45cd3c5a 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -178,7 +178,7 @@ static int _drbd_md_sync_page_io(struct drbd_device *device,
 	else
 		submit_bio(bio);
 	wait_until_done_or_force_detached(device, bdev, &device->md_io.done);
-	if (!bio->bi_error)
+	if (!bio->bi_status)
 		err = device->md_io.error;
 
  out:
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c
index a804a4107fbc..809fd245c3dc 100644
--- a/drivers/block/drbd/drbd_bitmap.c
+++ b/drivers/block/drbd/drbd_bitmap.c
@@ -959,16 +959,16 @@ static void drbd_bm_endio(struct bio *bio)
 	    !bm_test_page_unchanged(b->bm_pages[idx]))
 		drbd_warn(device, "bitmap page idx %u changed during IO!\n", idx);
 
-	if (bio->bi_error) {
+	if (bio->bi_status) {
 		/* ctx error will hold the completed-last non-zero error code,
 		 * in case error codes differ. */
-		ctx->error = bio->bi_error;
+		ctx->error = blk_status_to_errno(bio->bi_status);
 		bm_set_page_io_err(b->bm_pages[idx]);
 		/* Not identical to on disk version of it.
 		 * Is BM_PAGE_IO_ERROR enough? */
 		if (__ratelimit(&drbd_ratelimit_state))
 			drbd_err(device, "IO ERROR %d on bitmap page idx %u\n",
-					bio->bi_error, idx);
+					bio->bi_status, idx);
 	} else {
 		bm_clear_page_io_err(b->bm_pages[idx]);
 		dynamic_drbd_dbg(device, "bitmap page idx %u completed\n", idx);
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index d5da45bb03a6..76761b4ca13e 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -1627,7 +1627,7 @@ static inline void drbd_generic_make_request(struct drbd_device *device,
 	__release(local);
 	if (!bio->bi_bdev) {
 		drbd_err(device, "drbd_generic_make_request: bio->bi_bdev == NULL\n");
-		bio->bi_error = -ENODEV;
+		bio->bi_status = BLK_STS_IOERR;
 		bio_endio(bio);
 		return;
 	}
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 1b0a2be24f39..c7e95e6380fb 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -1229,9 +1229,9 @@ void one_flush_endio(struct bio *bio)
 	struct drbd_device *device = octx->device;
 	struct issue_flush_context *ctx = octx->ctx;
 
-	if (bio->bi_error) {
-		ctx->error = bio->bi_error;
-		drbd_info(device, "local disk FLUSH FAILED with status %d\n", bio->bi_error);
+	if (bio->bi_status) {
+		ctx->error = blk_status_to_errno(bio->bi_status);
+		drbd_info(device, "local disk FLUSH FAILED with status %d\n", bio->bi_status);
 	}
 	kfree(octx);
 	bio_put(bio);
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index 656624314f0d..fca6b9914948 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -203,7 +203,7 @@ void start_new_tl_epoch(struct drbd_connection *connection)
 void complete_master_bio(struct drbd_device *device,
 		struct bio_and_error *m)
 {
-	m->bio->bi_error = m->error;
+	m->bio->bi_status = errno_to_blk_status(m->error);
 	bio_endio(m->bio);
 	dec_ap_bio(device);
 }
@@ -1157,7 +1157,7 @@ static void drbd_process_discard_req(struct drbd_request *req)
 
 	if (blkdev_issue_zeroout(bdev, req->i.sector, req->i.size >> 9,
 			GFP_NOIO, 0))
-		req->private_bio->bi_error = -EIO;
+		req->private_bio->bi_status = BLK_STS_IOERR;
 	bio_endio(req->private_bio);
 }
 
@@ -1225,7 +1225,7 @@ drbd_request_prepare(struct drbd_device *device, struct bio *bio, unsigned long
 		/* only pass the error to the upper layers.
 		 * if user cannot handle io errors, that's not our business. */
 		drbd_err(device, "could not kmalloc() req\n");
-		bio->bi_error = -ENOMEM;
+		bio->bi_status = BLK_STS_RESOURCE;
 		bio_endio(bio);
 		return ERR_PTR(-ENOMEM);
 	}
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index 1afcb4e02d8d..1d8726a8df34 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -63,7 +63,7 @@ void drbd_md_endio(struct bio *bio)
 	struct drbd_device *device;
 
 	device = bio->bi_private;
-	device->md_io.error = bio->bi_error;
+	device->md_io.error = blk_status_to_errno(bio->bi_status);
 
 	/* We grabbed an extra reference in _drbd_md_sync_page_io() to be able
 	 * to timeout on the lower level device, and eventually detach from it.
@@ -177,13 +177,13 @@ void drbd_peer_request_endio(struct bio *bio)
 	bool is_discard = bio_op(bio) == REQ_OP_WRITE_ZEROES ||
 			  bio_op(bio) == REQ_OP_DISCARD;
 
-	if (bio->bi_error && __ratelimit(&drbd_ratelimit_state))
+	if (bio->bi_status && __ratelimit(&drbd_ratelimit_state))
 		drbd_warn(device, "%s: error=%d s=%llus\n",
 				is_write ? (is_discard ? "discard" : "write")
-					: "read", bio->bi_error,
+					: "read", bio->bi_status,
 				(unsigned long long)peer_req->i.sector);
 
-	if (bio->bi_error)
+	if (bio->bi_status)
 		set_bit(__EE_WAS_ERROR, &peer_req->flags);
 
 	bio_put(bio); /* no need for the bio anymore */
@@ -243,16 +243,16 @@ void drbd_request_endio(struct bio *bio)
 		if (__ratelimit(&drbd_ratelimit_state))
 			drbd_emerg(device, "delayed completion of aborted local request; disk-timeout may be too aggressive\n");
 
-		if (!bio->bi_error)
+		if (!bio->bi_status)
 			drbd_panic_after_delayed_completion_of_aborted_request(device);
 	}
 
 	/* to avoid recursion in __req_mod */
-	if (unlikely(bio->bi_error)) {
+	if (unlikely(bio->bi_status)) {
 		switch (bio_op(bio)) {
 		case REQ_OP_WRITE_ZEROES:
 		case REQ_OP_DISCARD:
-			if (bio->bi_error == -EOPNOTSUPP)
+			if (bio->bi_status == BLK_STS_NOTSUPP)
 				what = DISCARD_COMPLETED_NOTSUPP;
 			else
 				what = DISCARD_COMPLETED_WITH_ERROR;
@@ -272,7 +272,7 @@ void drbd_request_endio(struct bio *bio)
 	}
 
 	bio_put(req->private_bio);
-	req->private_bio = ERR_PTR(bio->bi_error);
+	req->private_bio = ERR_PTR(blk_status_to_errno(bio->bi_status));
 
 	/* not req_mod(), we need irqsave here! */
 	spin_lock_irqsave(&device->resource->req_lock, flags);
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index cc75a5176057..9e3cb32e365d 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -3780,9 +3780,9 @@ static void floppy_rb0_cb(struct bio *bio)
 	struct rb0_cbdata *cbdata = (struct rb0_cbdata *)bio->bi_private;
 	int drive = cbdata->drive;
 
-	if (bio->bi_error) {
+	if (bio->bi_status) {
 		pr_info("floppy: error %d while reading block 0\n",
-			bio->bi_error);
+			bio->bi_status);
 		set_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags);
 	}
 	complete(&cbdata->complete);
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 42e3c880a8a5..e8a381161db6 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -952,9 +952,9 @@ static void pkt_end_io_read(struct bio *bio)
 
 	pkt_dbg(2, pd, "bio=%p sec0=%llx sec=%llx err=%d\n",
 		bio, (unsigned long long)pkt->sector,
-		(unsigned long long)bio->bi_iter.bi_sector, bio->bi_error);
+		(unsigned long long)bio->bi_iter.bi_sector, bio->bi_status);
 
-	if (bio->bi_error)
+	if (bio->bi_status)
 		atomic_inc(&pkt->io_errors);
 	if (atomic_dec_and_test(&pkt->io_wait)) {
 		atomic_inc(&pkt->run_sm);
@@ -969,7 +969,7 @@ static void pkt_end_io_packet_write(struct bio *bio)
 	struct pktcdvd_device *pd = pkt->pd;
 	BUG_ON(!pd);
 
-	pkt_dbg(2, pd, "id=%d, err=%d\n", pkt->id, bio->bi_error);
+	pkt_dbg(2, pd, "id=%d, err=%d\n", pkt->id, bio->bi_status);
 
 	pd->stats.pkt_ended++;
 
@@ -1305,16 +1305,16 @@ static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt)
 	pkt_queue_bio(pd, pkt->w_bio);
 }
 
-static void pkt_finish_packet(struct packet_data *pkt, int error)
+static void pkt_finish_packet(struct packet_data *pkt, blk_status_t status)
 {
 	struct bio *bio;
 
-	if (error)
+	if (status)
 		pkt->cache_valid = 0;
 
 	/* Finish all bios corresponding to this packet */
 	while ((bio = bio_list_pop(&pkt->orig_bios))) {
-		bio->bi_error = error;
+		bio->bi_status = status;
 		bio_endio(bio);
 	}
 }
@@ -1349,7 +1349,7 @@ static void pkt_run_state_machine(struct pktcdvd_device *pd, struct packet_data
 			if (atomic_read(&pkt->io_wait) > 0)
 				return;
 
-			if (!pkt->w_bio->bi_error) {
+			if (!pkt->w_bio->bi_status) {
 				pkt_set_state(pkt, PACKET_FINISHED_STATE);
 			} else {
 				pkt_set_state(pkt, PACKET_RECOVERY_STATE);
@@ -1366,7 +1366,7 @@ static void pkt_run_state_machine(struct pktcdvd_device *pd, struct packet_data
 			break;
 
 		case PACKET_FINISHED_STATE:
-			pkt_finish_packet(pkt, pkt->w_bio->bi_error);
+			pkt_finish_packet(pkt, pkt->w_bio->bi_status);
 			return;
 
 		default:
@@ -2301,7 +2301,7 @@ static void pkt_end_io_read_cloned(struct bio *bio)
 	struct packet_stacked_data *psd = bio->bi_private;
 	struct pktcdvd_device *pd = psd->pd;
 
-	psd->bio->bi_error = bio->bi_error;
+	psd->bio->bi_status = bio->bi_status;
 	bio_put(bio);
 	bio_endio(psd->bio);
 	mempool_free(psd, psd_pool);
diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c
index 456b4fe21559..6fa2b8197013 100644
--- a/drivers/block/ps3vram.c
+++ b/drivers/block/ps3vram.c
@@ -428,7 +428,7 @@ static void ps3vram_cache_cleanup(struct ps3_system_bus_device *dev)
 	kfree(priv->cache.tags);
 }
 
-static int ps3vram_read(struct ps3_system_bus_device *dev, loff_t from,
+static blk_status_t ps3vram_read(struct ps3_system_bus_device *dev, loff_t from,
 			size_t len, size_t *retlen, u_char *buf)
 {
 	struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev);
@@ -438,7 +438,7 @@ static int ps3vram_read(struct ps3_system_bus_device *dev, loff_t from,
 		(unsigned int)from, len);
 
 	if (from >= priv->size)
-		return -EIO;
+		return BLK_STS_IOERR;
 
 	if (len > priv->size - from)
 		len = priv->size - from;
@@ -472,14 +472,14 @@ static int ps3vram_read(struct ps3_system_bus_device *dev, loff_t from,
 	return 0;
 }
 
-static int ps3vram_write(struct ps3_system_bus_device *dev, loff_t to,
+static blk_status_t ps3vram_write(struct ps3_system_bus_device *dev, loff_t to,
 			 size_t len, size_t *retlen, const u_char *buf)
 {
 	struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev);
 	unsigned int cached, count;
 
 	if (to >= priv->size)
-		return -EIO;
+		return BLK_STS_IOERR;
 
 	if (len > priv->size - to)
 		len = priv->size - to;
@@ -554,7 +554,7 @@ static struct bio *ps3vram_do_bio(struct ps3_system_bus_device *dev,
 	int write = bio_data_dir(bio) == WRITE;
 	const char *op = write ? "write" : "read";
 	loff_t offset = bio->bi_iter.bi_sector << 9;
-	int error = 0;
+	blk_status_t error = 0;
 	struct bio_vec bvec;
 	struct bvec_iter iter;
 	struct bio *next;
@@ -578,7 +578,7 @@ static struct bio *ps3vram_do_bio(struct ps3_system_bus_device *dev,
 
 		if (retlen != len) {
 			dev_err(&dev->core, "Short %s\n", op);
-			error = -EIO;
+			error = BLK_STS_IOERR;
 			goto out;
 		}
 
@@ -593,7 +593,7 @@ out:
 	next = bio_list_peek(&priv->list);
 	spin_unlock_irq(&priv->lock);
 
-	bio->bi_error = error;
+	bio->bi_status = error;
 	bio_endio(bio);
 	return next;
 }
diff --git a/drivers/block/rsxx/dev.c b/drivers/block/rsxx/dev.c
index 9c566364ac9c..0b0a0a902355 100644
--- a/drivers/block/rsxx/dev.c
+++ b/drivers/block/rsxx/dev.c
@@ -149,7 +149,7 @@ static blk_qc_t rsxx_make_request(struct request_queue *q, struct bio *bio)
 {
 	struct rsxx_cardinfo *card = q->queuedata;
 	struct rsxx_bio_meta *bio_meta;
-	int st = -EINVAL;
+	blk_status_t st = BLK_STS_IOERR;
 
 	blk_queue_split(q, &bio, q->bio_split);
 
@@ -161,15 +161,11 @@ static blk_qc_t rsxx_make_request(struct request_queue *q, struct bio *bio)
 	if (bio_end_sector(bio) > get_capacity(card->gendisk))
 		goto req_err;
 
-	if (unlikely(card->halt)) {
-		st = -EFAULT;
+	if (unlikely(card->halt))
 		goto req_err;
-	}
 
-	if (unlikely(card->dma_fault)) {
-		st = (-EFAULT);
+	if (unlikely(card->dma_fault))
 		goto req_err;
-	}
 
 	if (bio->bi_iter.bi_size == 0) {
 		dev_err(CARD_TO_DEV(card), "size zero BIO!\n");
@@ -178,7 +174,7 @@ static blk_qc_t rsxx_make_request(struct request_queue *q, struct bio *bio)
 
 	bio_meta = kmem_cache_alloc(bio_meta_pool, GFP_KERNEL);
 	if (!bio_meta) {
-		st = -ENOMEM;
+		st = BLK_STS_RESOURCE;
 		goto req_err;
 	}
 
@@ -205,7 +201,7 @@ queue_err:
 	kmem_cache_free(bio_meta_pool, bio_meta);
 req_err:
 	if (st)
-		bio->bi_error = st;
+		bio->bi_status = st;
 	bio_endio(bio);
 	return BLK_QC_T_NONE;
 }
diff --git a/drivers/block/rsxx/dma.c b/drivers/block/rsxx/dma.c
index 5a20385f87d0..6a1b2177951c 100644
--- a/drivers/block/rsxx/dma.c
+++ b/drivers/block/rsxx/dma.c
@@ -611,7 +611,7 @@ static void rsxx_schedule_done(struct work_struct *work)
 	mutex_unlock(&ctrl->work_lock);
 }
 
-static int rsxx_queue_discard(struct rsxx_cardinfo *card,
+static blk_status_t rsxx_queue_discard(struct rsxx_cardinfo *card,
 				  struct list_head *q,
 				  unsigned int laddr,
 				  rsxx_dma_cb cb,
@@ -621,7 +621,7 @@ static int rsxx_queue_discard(struct rsxx_cardinfo *card,
 
 	dma = kmem_cache_alloc(rsxx_dma_pool, GFP_KERNEL);
 	if (!dma)
-		return -ENOMEM;
+		return BLK_STS_RESOURCE;
 
 	dma->cmd          = HW_CMD_BLK_DISCARD;
 	dma->laddr        = laddr;
@@ -640,7 +640,7 @@ static int rsxx_queue_discard(struct rsxx_cardinfo *card,
 	return 0;
 }
 
-static int rsxx_queue_dma(struct rsxx_cardinfo *card,
+static blk_status_t rsxx_queue_dma(struct rsxx_cardinfo *card,
 			      struct list_head *q,
 			      int dir,
 			      unsigned int dma_off,
@@ -655,7 +655,7 @@ static int rsxx_queue_dma(struct rsxx_cardinfo *card,
 
 	dma = kmem_cache_alloc(rsxx_dma_pool, GFP_KERNEL);
 	if (!dma)
-		return -ENOMEM;
+		return BLK_STS_RESOURCE;
 
 	dma->cmd          = dir ? HW_CMD_BLK_WRITE : HW_CMD_BLK_READ;
 	dma->laddr        = laddr;
@@ -677,7 +677,7 @@ static int rsxx_queue_dma(struct rsxx_cardinfo *card,
 	return 0;
 }
 
-int rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
+blk_status_t rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
 			   struct bio *bio,
 			   atomic_t *n_dmas,
 			   rsxx_dma_cb cb,
@@ -694,7 +694,7 @@ int rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
 	unsigned int dma_len;
 	int dma_cnt[RSXX_MAX_TARGETS];
 	int tgt;
-	int st;
+	blk_status_t st;
 	int i;
 
 	addr8 = bio->bi_iter.bi_sector << 9; /* sectors are 512 bytes */
@@ -769,7 +769,6 @@ bvec_err:
 	for (i = 0; i < card->n_targets; i++)
 		rsxx_cleanup_dma_queue(&card->ctrl[i], &dma_list[i],
 					FREE_DMA);
-
 	return st;
 }
 
diff --git a/drivers/block/rsxx/rsxx_priv.h b/drivers/block/rsxx/rsxx_priv.h
index 6bbc64d0f690..277f27e673a2 100644
--- a/drivers/block/rsxx/rsxx_priv.h
+++ b/drivers/block/rsxx/rsxx_priv.h
@@ -391,7 +391,7 @@ int rsxx_dma_cancel(struct rsxx_dma_ctrl *ctrl);
 void rsxx_dma_cleanup(void);
 void rsxx_dma_queue_reset(struct rsxx_cardinfo *card);
 int rsxx_dma_configure(struct rsxx_cardinfo *card);
-int rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
+blk_status_t rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
 			   struct bio *bio,
 			   atomic_t *n_dmas,
 			   rsxx_dma_cb cb,
diff --git a/drivers/block/umem.c b/drivers/block/umem.c
index c141cc3be22b..4b3c947697b1 100644
--- a/drivers/block/umem.c
+++ b/drivers/block/umem.c
@@ -454,7 +454,7 @@ static void process_page(unsigned long data)
 				PCI_DMA_TODEVICE : PCI_DMA_FROMDEVICE);
 		if (control & DMASCR_HARD_ERROR) {
 			/* error */
-			bio->bi_error = -EIO;
+			bio->bi_status = BLK_STS_IOERR;
 			dev_printk(KERN_WARNING, &card->dev->dev,
 				"I/O error on sector %d/%d\n",
 				le32_to_cpu(desc->local_addr)>>9,
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 726c32e35db9..746bd8c8c09a 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -1069,20 +1069,17 @@ static void xen_blk_drain_io(struct xen_blkif_ring *ring)
 	atomic_set(&blkif->drain, 0);
 }
 
-/*
- * Completion callback on the bio's. Called as bh->b_end_io()
- */
-
-static void __end_block_io_op(struct pending_req *pending_req, int error)
+static void __end_block_io_op(struct pending_req *pending_req,
+		blk_status_t error)
 {
 	/* An error fails the entire request. */
-	if ((pending_req->operation == BLKIF_OP_FLUSH_DISKCACHE) &&
-	    (error == -EOPNOTSUPP)) {
+	if (pending_req->operation == BLKIF_OP_FLUSH_DISKCACHE &&
+	    error == BLK_STS_NOTSUPP) {
 		pr_debug("flush diskcache op failed, not supported\n");
 		xen_blkbk_flush_diskcache(XBT_NIL, pending_req->ring->blkif->be, 0);
 		pending_req->status = BLKIF_RSP_EOPNOTSUPP;
-	} else if ((pending_req->operation == BLKIF_OP_WRITE_BARRIER) &&
-		    (error == -EOPNOTSUPP)) {
+	} else if (pending_req->operation == BLKIF_OP_WRITE_BARRIER &&
+		   error == BLK_STS_NOTSUPP) {
 		pr_debug("write barrier op failed, not supported\n");
 		xen_blkbk_barrier(XBT_NIL, pending_req->ring->blkif->be, 0);
 		pending_req->status = BLKIF_RSP_EOPNOTSUPP;
@@ -1106,7 +1103,7 @@ static void __end_block_io_op(struct pending_req *pending_req, int error)
  */
 static void end_block_io_op(struct bio *bio)
 {
-	__end_block_io_op(bio->bi_private, bio->bi_error);
+	__end_block_io_op(bio->bi_private, bio->bi_status);
 	bio_put(bio);
 }
 
@@ -1423,7 +1420,7 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring,
 	for (i = 0; i < nbio; i++)
 		bio_put(biolist[i]);
 	atomic_set(&pending_req->pendcnt, 1);
-	__end_block_io_op(pending_req, -EINVAL);
+	__end_block_io_op(pending_req, BLK_STS_RESOURCE);
 	msleep(1); /* back off a bit */
 	return -EIO;
 }
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 2f468cf86dcf..e3be666c2776 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -2006,7 +2006,7 @@ static void split_bio_end(struct bio *bio)
 
 	if (atomic_dec_and_test(&split_bio->pending)) {
 		split_bio->bio->bi_phys_segments = 0;
-		split_bio->bio->bi_error = bio->bi_error;
+		split_bio->bio->bi_status = bio->bi_status;
 		bio_endio(split_bio->bio);
 		kfree(split_bio);
 	}
diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c
index 5e44768ccffa..4e0de995cd90 100644
--- a/drivers/lightnvm/pblk-core.c
+++ b/drivers/lightnvm/pblk-core.c
@@ -296,8 +296,8 @@ void pblk_flush_writer(struct pblk *pblk)
 		pr_err("pblk: tear down bio failed\n");
 	}
 
-	if (bio->bi_error)
-		pr_err("pblk: flush sync write failed (%u)\n", bio->bi_error);
+	if (bio->bi_status)
+		pr_err("pblk: flush sync write failed (%u)\n", bio->bi_status);
 
 	bio_put(bio);
 }
diff --git a/drivers/lightnvm/pblk-read.c b/drivers/lightnvm/pblk-read.c
index 4a12f14d78c6..762c0b73cb67 100644
--- a/drivers/lightnvm/pblk-read.c
+++ b/drivers/lightnvm/pblk-read.c
@@ -114,7 +114,7 @@ static void pblk_end_io_read(struct nvm_rq *rqd)
 		pblk_log_read_err(pblk, rqd);
 #ifdef CONFIG_NVM_DEBUG
 	else
-		WARN_ONCE(bio->bi_error, "pblk: corrupted read error\n");
+		WARN_ONCE(bio->bi_status, "pblk: corrupted read error\n");
 #endif
 
 	if (rqd->nr_ppas > 1)
@@ -123,7 +123,7 @@ static void pblk_end_io_read(struct nvm_rq *rqd)
 	bio_put(bio);
 	if (r_ctx->orig_bio) {
 #ifdef CONFIG_NVM_DEBUG
-		WARN_ONCE(r_ctx->orig_bio->bi_error,
+		WARN_ONCE(r_ctx->orig_bio->bi_status,
 						"pblk: corrupted read bio\n");
 #endif
 		bio_endio(r_ctx->orig_bio);
diff --git a/drivers/lightnvm/pblk-write.c b/drivers/lightnvm/pblk-write.c
index aef6fd7c4a0c..79b90d8dbcb3 100644
--- a/drivers/lightnvm/pblk-write.c
+++ b/drivers/lightnvm/pblk-write.c
@@ -186,7 +186,7 @@ static void pblk_end_io_write(struct nvm_rq *rqd)
 	}
 #ifdef CONFIG_NVM_DEBUG
 	else
-		WARN_ONCE(rqd->bio->bi_error, "pblk: corrupted write error\n");
+		WARN_ONCE(rqd->bio->bi_status, "pblk: corrupted write error\n");
 #endif
 
 	pblk_complete_write(pblk, rqd, c_ctx);
diff --git a/drivers/lightnvm/rrpc.c b/drivers/lightnvm/rrpc.c
index cf0e28a0ff61..8d3b53bb3307 100644
--- a/drivers/lightnvm/rrpc.c
+++ b/drivers/lightnvm/rrpc.c
@@ -279,8 +279,8 @@ static void rrpc_end_sync_bio(struct bio *bio)
 {
 	struct completion *waiting = bio->bi_private;
 
-	if (bio->bi_error)
-		pr_err("nvm: gc request failed (%u).\n", bio->bi_error);
+	if (bio->bi_status)
+		pr_err("nvm: gc request failed (%u).\n", bio->bi_status);
 
 	complete(waiting);
 }
@@ -359,7 +359,7 @@ try:
 			goto finished;
 		}
 		wait_for_completion_io(&wait);
-		if (bio->bi_error) {
+		if (bio->bi_status) {
 			rrpc_inflight_laddr_release(rrpc, rqd);
 			goto finished;
 		}
@@ -385,7 +385,7 @@ try:
 		wait_for_completion_io(&wait);
 
 		rrpc_inflight_laddr_release(rrpc, rqd);
-		if (bio->bi_error)
+		if (bio->bi_status)
 			goto finished;
 
 		bio_reset(bio);
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index c3ea03c9a1a8..dee542fff68e 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -849,10 +849,11 @@ static inline void wake_up_allocators(struct cache_set *c)
 
 /* Forward declarations */
 
-void bch_count_io_errors(struct cache *, int, const char *);
+void bch_count_io_errors(struct cache *, blk_status_t, const char *);
 void bch_bbio_count_io_errors(struct cache_set *, struct bio *,
-			      int, const char *);
-void bch_bbio_endio(struct cache_set *, struct bio *, int, const char *);
+			      blk_status_t, const char *);
+void bch_bbio_endio(struct cache_set *, struct bio *, blk_status_t,
+		const char *);
 void bch_bbio_free(struct bio *, struct cache_set *);
 struct bio *bch_bbio_alloc(struct cache_set *);
 
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index 450d0e848ae4..866dcf78ff8e 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -307,7 +307,7 @@ static void bch_btree_node_read(struct btree *b)
 	bch_submit_bbio(bio, b->c, &b->key, 0);
 	closure_sync(&cl);
 
-	if (bio->bi_error)
+	if (bio->bi_status)
 		set_btree_node_io_error(b);
 
 	bch_bbio_free(bio, b->c);
@@ -374,10 +374,10 @@ static void btree_node_write_endio(struct bio *bio)
 	struct closure *cl = bio->bi_private;
 	struct btree *b = container_of(cl, struct btree, io);
 
-	if (bio->bi_error)
+	if (bio->bi_status)
 		set_btree_node_io_error(b);
 
-	bch_bbio_count_io_errors(b->c, bio, bio->bi_error, "writing btree");
+	bch_bbio_count_io_errors(b->c, bio, bio->bi_status, "writing btree");
 	closure_put(cl);
 }
 
diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c
index db45a88c0ce9..6a9b85095e7b 100644
--- a/drivers/md/bcache/io.c
+++ b/drivers/md/bcache/io.c
@@ -50,7 +50,7 @@ void bch_submit_bbio(struct bio *bio, struct cache_set *c,
 
 /* IO errors */
 
-void bch_count_io_errors(struct cache *ca, int error, const char *m)
+void bch_count_io_errors(struct cache *ca, blk_status_t error, const char *m)
 {
 	/*
 	 * The halflife of an error is:
@@ -103,7 +103,7 @@ void bch_count_io_errors(struct cache *ca, int error, const char *m)
 }
 
 void bch_bbio_count_io_errors(struct cache_set *c, struct bio *bio,
-			      int error, const char *m)
+			      blk_status_t error, const char *m)
 {
 	struct bbio *b = container_of(bio, struct bbio, bio);
 	struct cache *ca = PTR_CACHE(c, &b->key, 0);
@@ -132,7 +132,7 @@ void bch_bbio_count_io_errors(struct cache_set *c, struct bio *bio,
 }
 
 void bch_bbio_endio(struct cache_set *c, struct bio *bio,
-		    int error, const char *m)
+		    blk_status_t error, const char *m)
 {
 	struct closure *cl = bio->bi_private;
 
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
index 1198e53d5670..0352d05e495c 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
@@ -549,7 +549,7 @@ static void journal_write_endio(struct bio *bio)
 {
 	struct journal_write *w = bio->bi_private;
 
-	cache_set_err_on(bio->bi_error, w->c, "journal io error");
+	cache_set_err_on(bio->bi_status, w->c, "journal io error");
 	closure_put(&w->c->journal.io);
 }
 
diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c
index 13b8a907006d..f633b30c962e 100644
--- a/drivers/md/bcache/movinggc.c
+++ b/drivers/md/bcache/movinggc.c
@@ -63,14 +63,14 @@ static void read_moving_endio(struct bio *bio)
 	struct moving_io *io = container_of(bio->bi_private,
 					    struct moving_io, cl);
 
-	if (bio->bi_error)
-		io->op.error = bio->bi_error;
+	if (bio->bi_status)
+		io->op.status = bio->bi_status;
 	else if (!KEY_DIRTY(&b->key) &&
 		 ptr_stale(io->op.c, &b->key, 0)) {
-		io->op.error = -EINTR;
+		io->op.status = BLK_STS_IOERR;
 	}
 
-	bch_bbio_endio(io->op.c, bio, bio->bi_error, "reading data to move");
+	bch_bbio_endio(io->op.c, bio, bio->bi_status, "reading data to move");
 }
 
 static void moving_init(struct moving_io *io)
@@ -92,7 +92,7 @@ static void write_moving(struct closure *cl)
 	struct moving_io *io = container_of(cl, struct moving_io, cl);
 	struct data_insert_op *op = &io->op;
 
-	if (!op->error) {
+	if (!op->status) {
 		moving_init(io);
 
 		io->bio.bio.bi_iter.bi_sector = KEY_START(&io->w->key);
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 709c9cc34369..019b3df9f1c6 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -81,7 +81,7 @@ static void bch_data_insert_keys(struct closure *cl)
 	if (ret == -ESRCH) {
 		op->replace_collision = true;
 	} else if (ret) {
-		op->error		= -ENOMEM;
+		op->status		= BLK_STS_RESOURCE;
 		op->insert_data_done	= true;
 	}
 
@@ -178,17 +178,17 @@ static void bch_data_insert_endio(struct bio *bio)
 	struct closure *cl = bio->bi_private;
 	struct data_insert_op *op = container_of(cl, struct data_insert_op, cl);
 
-	if (bio->bi_error) {
+	if (bio->bi_status) {
 		/* TODO: We could try to recover from this. */
 		if (op->writeback)
-			op->error = bio->bi_error;
+			op->status = bio->bi_status;
 		else if (!op->replace)
 			set_closure_fn(cl, bch_data_insert_error, op->wq);
 		else
 			set_closure_fn(cl, NULL, NULL);
 	}
 
-	bch_bbio_endio(op->c, bio, bio->bi_error, "writing data to cache");
+	bch_bbio_endio(op->c, bio, bio->bi_status, "writing data to cache");
 }
 
 static void bch_data_insert_start(struct closure *cl)
@@ -488,15 +488,15 @@ static void bch_cache_read_endio(struct bio *bio)
 	 * from the backing device.
 	 */
 
-	if (bio->bi_error)
-		s->iop.error = bio->bi_error;
+	if (bio->bi_status)
+		s->iop.status = bio->bi_status;
 	else if (!KEY_DIRTY(&b->key) &&
 		 ptr_stale(s->iop.c, &b->key, 0)) {
 		atomic_long_inc(&s->iop.c->cache_read_races);
-		s->iop.error = -EINTR;
+		s->iop.status = BLK_STS_IOERR;
 	}
 
-	bch_bbio_endio(s->iop.c, bio, bio->bi_error, "reading from cache");
+	bch_bbio_endio(s->iop.c, bio, bio->bi_status, "reading from cache");
 }
 
 /*
@@ -593,9 +593,9 @@ static void request_endio(struct bio *bio)
 {
 	struct closure *cl = bio->bi_private;
 
-	if (bio->bi_error) {
+	if (bio->bi_status) {
 		struct search *s = container_of(cl, struct search, cl);
-		s->iop.error = bio->bi_error;
+		s->iop.status = bio->bi_status;
 		/* Only cache read errors are recoverable */
 		s->recoverable = false;
 	}
@@ -611,7 +611,7 @@ static void bio_complete(struct search *s)
 				    &s->d->disk->part0, s->start_time);
 
 		trace_bcache_request_end(s->d, s->orig_bio);
-		s->orig_bio->bi_error = s->iop.error;
+		s->orig_bio->bi_status = s->iop.status;
 		bio_endio(s->orig_bio);
 		s->orig_bio = NULL;
 	}
@@ -664,7 +664,7 @@ static inline struct search *search_alloc(struct bio *bio,
 	s->iop.inode		= d->id;
 	s->iop.write_point	= hash_long((unsigned long) current, 16);
 	s->iop.write_prio	= 0;
-	s->iop.error		= 0;
+	s->iop.status		= 0;
 	s->iop.flags		= 0;
 	s->iop.flush_journal	= op_is_flush(bio->bi_opf);
 	s->iop.wq		= bcache_wq;
@@ -707,7 +707,7 @@ static void cached_dev_read_error(struct closure *cl)
 		/* Retry from the backing device: */
 		trace_bcache_read_retry(s->orig_bio);
 
-		s->iop.error = 0;
+		s->iop.status = 0;
 		do_bio_hook(s, s->orig_bio);
 
 		/* XXX: invalidate cache */
@@ -767,7 +767,7 @@ static void cached_dev_read_done_bh(struct closure *cl)
 				  !s->cache_miss, s->iop.bypass);
 	trace_bcache_read(s->orig_bio, !s->cache_miss, s->iop.bypass);
 
-	if (s->iop.error)
+	if (s->iop.status)
 		continue_at_nobarrier(cl, cached_dev_read_error, bcache_wq);
 	else if (s->iop.bio || verify(dc, &s->bio.bio))
 		continue_at_nobarrier(cl, cached_dev_read_done, bcache_wq);
diff --git a/drivers/md/bcache/request.h b/drivers/md/bcache/request.h
index 1ff36875c2b3..7689176951ce 100644
--- a/drivers/md/bcache/request.h
+++ b/drivers/md/bcache/request.h
@@ -10,7 +10,7 @@ struct data_insert_op {
 	unsigned		inode;
 	uint16_t		write_point;
 	uint16_t		write_prio;
-	short			error;
+	blk_status_t		status;
 
 	union {
 		uint16_t	flags;
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index e57353e39168..fbc4f5412dec 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -271,7 +271,7 @@ static void write_super_endio(struct bio *bio)
 {
 	struct cache *ca = bio->bi_private;
 
-	bch_count_io_errors(ca, bio->bi_error, "writing superblock");
+	bch_count_io_errors(ca, bio->bi_status, "writing superblock");
 	closure_put(&ca->set->sb_write);
 }
 
@@ -321,7 +321,7 @@ static void uuid_endio(struct bio *bio)
 	struct closure *cl = bio->bi_private;
 	struct cache_set *c = container_of(cl, struct cache_set, uuid_write);
 
-	cache_set_err_on(bio->bi_error, c, "accessing uuids");
+	cache_set_err_on(bio->bi_status, c, "accessing uuids");
 	bch_bbio_free(bio, c);
 	closure_put(cl);
 }
@@ -494,7 +494,7 @@ static void prio_endio(struct bio *bio)
 {
 	struct cache *ca = bio->bi_private;
 
-	cache_set_err_on(bio->bi_error, ca->set, "accessing priorities");
+	cache_set_err_on(bio->bi_status, ca->set, "accessing priorities");
 	bch_bbio_free(bio, ca->set);
 	closure_put(&ca->prio);
 }
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index 6ac2e48b9235..42c66e76f05e 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -167,7 +167,7 @@ static void dirty_endio(struct bio *bio)
 	struct keybuf_key *w = bio->bi_private;
 	struct dirty_io *io = w->private;
 
-	if (bio->bi_error)
+	if (bio->bi_status)
 		SET_KEY_DIRTY(&w->key, false);
 
 	closure_put(&io->cl);
@@ -195,7 +195,7 @@ static void read_dirty_endio(struct bio *bio)
 	struct dirty_io *io = w->private;
 
 	bch_count_io_errors(PTR_CACHE(io->dc->disk.c, &w->key, 0),
-			    bio->bi_error, "reading dirty data from cache");
+			    bio->bi_status, "reading dirty data from cache");
 
 	dirty_endio(bio);
 }
diff --git a/drivers/md/dm-bio-prison-v1.c b/drivers/md/dm-bio-prison-v1.c
index ae7da2c30a57..82d27384d31f 100644
--- a/drivers/md/dm-bio-prison-v1.c
+++ b/drivers/md/dm-bio-prison-v1.c
@@ -229,7 +229,7 @@ void dm_cell_release_no_holder(struct dm_bio_prison *prison,
 EXPORT_SYMBOL_GPL(dm_cell_release_no_holder);
 
 void dm_cell_error(struct dm_bio_prison *prison,
-		   struct dm_bio_prison_cell *cell, int error)
+		   struct dm_bio_prison_cell *cell, blk_status_t error)
 {
 	struct bio_list bios;
 	struct bio *bio;
@@ -238,7 +238,7 @@ void dm_cell_error(struct dm_bio_prison *prison,
 	dm_cell_release(prison, cell, &bios);
 
 	while ((bio = bio_list_pop(&bios))) {
-		bio->bi_error = error;
+		bio->bi_status = error;
 		bio_endio(bio);
 	}
 }
diff --git a/drivers/md/dm-bio-prison-v1.h b/drivers/md/dm-bio-prison-v1.h
index cddd4ac07e2c..cec52ac5e1ae 100644
--- a/drivers/md/dm-bio-prison-v1.h
+++ b/drivers/md/dm-bio-prison-v1.h
@@ -91,7 +91,7 @@ void dm_cell_release_no_holder(struct dm_bio_prison *prison,
 			       struct dm_bio_prison_cell *cell,
 			       struct bio_list *inmates);
 void dm_cell_error(struct dm_bio_prison *prison,
-		   struct dm_bio_prison_cell *cell, int error);
+		   struct dm_bio_prison_cell *cell, blk_status_t error);
 
 /*
  * Visits the cell and then releases.  Guarantees no new inmates are
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index cd8139593ccd..0902d2fd1743 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -145,8 +145,8 @@ struct dm_buffer {
 	enum data_mode data_mode;
 	unsigned char list_mode;		/* LIST_* */
 	unsigned hold_count;
-	int read_error;
-	int write_error;
+	blk_status_t read_error;
+	blk_status_t write_error;
 	unsigned long state;
 	unsigned long last_accessed;
 	struct dm_bufio_client *c;
@@ -555,7 +555,7 @@ static void dmio_complete(unsigned long error, void *context)
 {
 	struct dm_buffer *b = context;
 
-	b->bio.bi_error = error ? -EIO : 0;
+	b->bio.bi_status = error ? BLK_STS_IOERR : 0;
 	b->bio.bi_end_io(&b->bio);
 }
 
@@ -588,7 +588,7 @@ static void use_dmio(struct dm_buffer *b, int rw, sector_t sector,
 
 	r = dm_io(&io_req, 1, &region, NULL);
 	if (r) {
-		b->bio.bi_error = r;
+		b->bio.bi_status = errno_to_blk_status(r);
 		end_io(&b->bio);
 	}
 }
@@ -596,7 +596,7 @@ static void use_dmio(struct dm_buffer *b, int rw, sector_t sector,
 static void inline_endio(struct bio *bio)
 {
 	bio_end_io_t *end_fn = bio->bi_private;
-	int error = bio->bi_error;
+	blk_status_t status = bio->bi_status;
 
 	/*
 	 * Reset the bio to free any attached resources
@@ -604,7 +604,7 @@ static void inline_endio(struct bio *bio)
 	 */
 	bio_reset(bio);
 
-	bio->bi_error = error;
+	bio->bi_status = status;
 	end_fn(bio);
 }
 
@@ -685,11 +685,12 @@ static void write_endio(struct bio *bio)
 {
 	struct dm_buffer *b = container_of(bio, struct dm_buffer, bio);
 
-	b->write_error = bio->bi_error;
-	if (unlikely(bio->bi_error)) {
+	b->write_error = bio->bi_status;
+	if (unlikely(bio->bi_status)) {
 		struct dm_bufio_client *c = b->c;
-		int error = bio->bi_error;
-		(void)cmpxchg(&c->async_write_error, 0, error);
+
+		(void)cmpxchg(&c->async_write_error, 0,
+				blk_status_to_errno(bio->bi_status));
 	}
 
 	BUG_ON(!test_bit(B_WRITING, &b->state));
@@ -1063,7 +1064,7 @@ static void read_endio(struct bio *bio)
 {
 	struct dm_buffer *b = container_of(bio, struct dm_buffer, bio);
 
-	b->read_error = bio->bi_error;
+	b->read_error = bio->bi_status;
 
 	BUG_ON(!test_bit(B_READING, &b->state));
 
@@ -1107,7 +1108,7 @@ static void *new_read(struct dm_bufio_client *c, sector_t block,
 	wait_on_bit_io(&b->state, B_READING, TASK_UNINTERRUPTIBLE);
 
 	if (b->read_error) {
-		int error = b->read_error;
+		int error = blk_status_to_errno(b->read_error);
 
 		dm_bufio_release(b);
 
@@ -1257,7 +1258,8 @@ EXPORT_SYMBOL_GPL(dm_bufio_write_dirty_buffers_async);
  */
 int dm_bufio_write_dirty_buffers(struct dm_bufio_client *c)
 {
-	int a, f;
+	blk_status_t a;
+	int f;
 	unsigned long buffers_processed = 0;
 	struct dm_buffer *b, *tmp;
 
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index c48612e6d525..c5ea03fc7ee1 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -119,7 +119,7 @@ static void iot_io_end(struct io_tracker *iot, sector_t len)
  */
 struct continuation {
 	struct work_struct ws;
-	int input;
+	blk_status_t input;
 };
 
 static inline void init_continuation(struct continuation *k,
@@ -145,7 +145,7 @@ struct batcher {
 	/*
 	 * The operation that everyone is waiting for.
 	 */
-	int (*commit_op)(void *context);
+	blk_status_t (*commit_op)(void *context);
 	void *commit_context;
 
 	/*
@@ -171,8 +171,7 @@ struct batcher {
 static void __commit(struct work_struct *_ws)
 {
 	struct batcher *b = container_of(_ws, struct batcher, commit_work);
-
-	int r;
+	blk_status_t r;
 	unsigned long flags;
 	struct list_head work_items;
 	struct work_struct *ws, *tmp;
@@ -205,7 +204,7 @@ static void __commit(struct work_struct *_ws)
 
 	while ((bio = bio_list_pop(&bios))) {
 		if (r) {
-			bio->bi_error = r;
+			bio->bi_status = r;
 			bio_endio(bio);
 		} else
 			b->issue_op(bio, b->issue_context);
@@ -213,7 +212,7 @@ static void __commit(struct work_struct *_ws)
 }
 
 static void batcher_init(struct batcher *b,
-			 int (*commit_op)(void *),
+			 blk_status_t (*commit_op)(void *),
 			 void *commit_context,
 			 void (*issue_op)(struct bio *bio, void *),
 			 void *issue_context,
@@ -955,7 +954,7 @@ static void writethrough_endio(struct bio *bio)
 
 	dm_unhook_bio(&pb->hook_info, bio);
 
-	if (bio->bi_error) {
+	if (bio->bi_status) {
 		bio_endio(bio);
 		return;
 	}
@@ -1220,7 +1219,7 @@ static void copy_complete(int read_err, unsigned long write_err, void *context)
 	struct dm_cache_migration *mg = container_of(context, struct dm_cache_migration, k);
 
 	if (read_err || write_err)
-		mg->k.input = -EIO;
+		mg->k.input = BLK_STS_IOERR;
 
 	queue_continuation(mg->cache->wq, &mg->k);
 }
@@ -1266,8 +1265,8 @@ static void overwrite_endio(struct bio *bio)
 
 	dm_unhook_bio(&pb->hook_info, bio);
 
-	if (bio->bi_error)
-		mg->k.input = bio->bi_error;
+	if (bio->bi_status)
+		mg->k.input = bio->bi_status;
 
 	queue_continuation(mg->cache->wq, &mg->k);
 }
@@ -1323,8 +1322,10 @@ static void mg_complete(struct dm_cache_migration *mg, bool success)
 		if (mg->overwrite_bio) {
 			if (success)
 				force_set_dirty(cache, cblock);
+			else if (mg->k.input)
+				mg->overwrite_bio->bi_status = mg->k.input;
 			else
-				mg->overwrite_bio->bi_error = (mg->k.input ? : -EIO);
+				mg->overwrite_bio->bi_status = BLK_STS_IOERR;
 			bio_endio(mg->overwrite_bio);
 		} else {
 			if (success)
@@ -1504,7 +1505,7 @@ static void mg_copy(struct work_struct *ws)
 		r = copy(mg, is_policy_promote);
 		if (r) {
 			DMERR_LIMIT("%s: migration copy failed", cache_device_name(cache));
-			mg->k.input = -EIO;
+			mg->k.input = BLK_STS_IOERR;
 			mg_complete(mg, false);
 		}
 	}
@@ -1907,12 +1908,12 @@ static int commit(struct cache *cache, bool clean_shutdown)
 /*
  * Used by the batcher.
  */
-static int commit_op(void *context)
+static blk_status_t commit_op(void *context)
 {
 	struct cache *cache = context;
 
 	if (dm_cache_changed_this_transaction(cache->cmd))
-		return commit(cache, false);
+		return errno_to_blk_status(commit(cache, false));
 
 	return 0;
 }
@@ -2018,7 +2019,7 @@ static void requeue_deferred_bios(struct cache *cache)
 	bio_list_init(&cache->deferred_bios);
 
 	while ((bio = bio_list_pop(&bios))) {
-		bio->bi_error = DM_ENDIO_REQUEUE;
+		bio->bi_status = BLK_STS_DM_REQUEUE;
 		bio_endio(bio);
 	}
 }
@@ -2820,7 +2821,8 @@ static int cache_map(struct dm_target *ti, struct bio *bio)
 	return r;
 }
 
-static int cache_end_io(struct dm_target *ti, struct bio *bio, int *error)
+static int cache_end_io(struct dm_target *ti, struct bio *bio,
+		blk_status_t *error)
 {
 	struct cache *cache = ti->private;
 	unsigned long flags;
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index f4b51809db21..586cef085c6a 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -71,7 +71,7 @@ struct dm_crypt_io {
 	struct convert_context ctx;
 
 	atomic_t io_pending;
-	int error;
+	blk_status_t error;
 	sector_t sector;
 
 	struct rb_node rb_node;
@@ -1292,7 +1292,7 @@ static void crypt_free_req(struct crypt_config *cc, void *req, struct bio *base_
 /*
  * Encrypt / decrypt data from one bio to another one (can be the same one)
  */
-static int crypt_convert(struct crypt_config *cc,
+static blk_status_t crypt_convert(struct crypt_config *cc,
 			 struct convert_context *ctx)
 {
 	unsigned int tag_offset = 0;
@@ -1343,13 +1343,13 @@ static int crypt_convert(struct crypt_config *cc,
 		 */
 		case -EBADMSG:
 			atomic_dec(&ctx->cc_pending);
-			return -EILSEQ;
+			return BLK_STS_PROTECTION;
 		/*
 		 * There was an error while processing the request.
 		 */
 		default:
 			atomic_dec(&ctx->cc_pending);
-			return -EIO;
+			return BLK_STS_IOERR;
 		}
 	}
 
@@ -1463,7 +1463,7 @@ static void crypt_dec_pending(struct dm_crypt_io *io)
 {
 	struct crypt_config *cc = io->cc;
 	struct bio *base_bio = io->base_bio;
-	int error = io->error;
+	blk_status_t error = io->error;
 
 	if (!atomic_dec_and_test(&io->io_pending))
 		return;
@@ -1476,7 +1476,7 @@ static void crypt_dec_pending(struct dm_crypt_io *io)
 	else
 		kfree(io->integrity_metadata);
 
-	base_bio->bi_error = error;
+	base_bio->bi_status = error;
 	bio_endio(base_bio);
 }
 
@@ -1502,7 +1502,7 @@ static void crypt_endio(struct bio *clone)
 	struct dm_crypt_io *io = clone->bi_private;
 	struct crypt_config *cc = io->cc;
 	unsigned rw = bio_data_dir(clone);
-	int error;
+	blk_status_t error;
 
 	/*
 	 * free the processed pages
@@ -1510,7 +1510,7 @@ static void crypt_endio(struct bio *clone)
 	if (rw == WRITE)
 		crypt_free_buffer_pages(cc, clone);
 
-	error = clone->bi_error;
+	error = clone->bi_status;
 	bio_put(clone);
 
 	if (rw == READ && !error) {
@@ -1570,7 +1570,7 @@ static void kcryptd_io_read_work(struct work_struct *work)
 
 	crypt_inc_pending(io);
 	if (kcryptd_io_read(io, GFP_NOIO))
-		io->error = -ENOMEM;
+		io->error = BLK_STS_RESOURCE;
 	crypt_dec_pending(io);
 }
 
@@ -1656,7 +1656,7 @@ static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, int async)
 	sector_t sector;
 	struct rb_node **rbp, *parent;
 
-	if (unlikely(io->error < 0)) {
+	if (unlikely(io->error)) {
 		crypt_free_buffer_pages(cc, clone);
 		bio_put(clone);
 		crypt_dec_pending(io);
@@ -1697,7 +1697,7 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
 	struct bio *clone;
 	int crypt_finished;
 	sector_t sector = io->sector;
-	int r;
+	blk_status_t r;
 
 	/*
 	 * Prevent io from disappearing until this function completes.
@@ -1707,7 +1707,7 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
 
 	clone = crypt_alloc_buffer(io, io->base_bio->bi_iter.bi_size);
 	if (unlikely(!clone)) {
-		io->error = -EIO;
+		io->error = BLK_STS_IOERR;
 		goto dec;
 	}
 
@@ -1718,7 +1718,7 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
 
 	crypt_inc_pending(io);
 	r = crypt_convert(cc, &io->ctx);
-	if (r < 0)
+	if (r)
 		io->error = r;
 	crypt_finished = atomic_dec_and_test(&io->ctx.cc_pending);
 
@@ -1740,7 +1740,7 @@ static void kcryptd_crypt_read_done(struct dm_crypt_io *io)
 static void kcryptd_crypt_read_convert(struct dm_crypt_io *io)
 {
 	struct crypt_config *cc = io->cc;
-	int r = 0;
+	blk_status_t r;
 
 	crypt_inc_pending(io);
 
@@ -1748,7 +1748,7 @@ static void kcryptd_crypt_read_convert(struct dm_crypt_io *io)
 			   io->sector);
 
 	r = crypt_convert(cc, &io->ctx);
-	if (r < 0)
+	if (r)
 		io->error = r;
 
 	if (atomic_dec_and_test(&io->ctx.cc_pending))
@@ -1781,9 +1781,9 @@ static void kcryptd_async_done(struct crypto_async_request *async_req,
 	if (error == -EBADMSG) {
 		DMERR_LIMIT("INTEGRITY AEAD ERROR, sector %llu",
 			    (unsigned long long)le64_to_cpu(*org_sector_of_dmreq(cc, dmreq)));
-		io->error = -EILSEQ;
+		io->error = BLK_STS_PROTECTION;
 	} else if (error < 0)
-		io->error = -EIO;
+		io->error = BLK_STS_IOERR;
 
 	crypt_free_req(cc, req_of_dmreq(cc, dmreq), io->base_bio);
 
diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c
index c9539917a59b..3d04d5ce19d9 100644
--- a/drivers/md/dm-flakey.c
+++ b/drivers/md/dm-flakey.c
@@ -358,7 +358,8 @@ map_bio:
 	return DM_MAPIO_REMAPPED;
 }
 
-static int flakey_end_io(struct dm_target *ti, struct bio *bio, int *error)
+static int flakey_end_io(struct dm_target *ti, struct bio *bio,
+		blk_status_t *error)
 {
 	struct flakey_c *fc = ti->private;
 	struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data));
@@ -377,7 +378,7 @@ static int flakey_end_io(struct dm_target *ti, struct bio *bio, int *error)
 			 * Error read during the down_interval if drop_writes
 			 * and error_writes were not configured.
 			 */
-			*error = -EIO;
+			*error = BLK_STS_IOERR;
 		}
 	}
 
diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
index ee78fb471229..ccc6ef4d00b9 100644
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -246,7 +246,7 @@ struct dm_integrity_io {
 	unsigned metadata_offset;
 
 	atomic_t in_flight;
-	int bi_error;
+	blk_status_t bi_status;
 
 	struct completion *completion;
 
@@ -1114,8 +1114,8 @@ static void submit_flush_bio(struct dm_integrity_c *ic, struct dm_integrity_io *
 static void do_endio(struct dm_integrity_c *ic, struct bio *bio)
 {
 	int r = dm_integrity_failed(ic);
-	if (unlikely(r) && !bio->bi_error)
-		bio->bi_error = r;
+	if (unlikely(r) && !bio->bi_status)
+		bio->bi_status = errno_to_blk_status(r);
 	bio_endio(bio);
 }
 
@@ -1123,7 +1123,7 @@ static void do_endio_flush(struct dm_integrity_c *ic, struct dm_integrity_io *di
 {
 	struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
 
-	if (unlikely(dio->fua) && likely(!bio->bi_error) && likely(!dm_integrity_failed(ic)))
+	if (unlikely(dio->fua) && likely(!bio->bi_status) && likely(!dm_integrity_failed(ic)))
 		submit_flush_bio(ic, dio);
 	else
 		do_endio(ic, bio);
@@ -1142,9 +1142,9 @@ static void dec_in_flight(struct dm_integrity_io *dio)
 
 		bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
 
-		if (unlikely(dio->bi_error) && !bio->bi_error)
-			bio->bi_error = dio->bi_error;
-		if (likely(!bio->bi_error) && unlikely(bio_sectors(bio) != dio->range.n_sectors)) {
+		if (unlikely(dio->bi_status) && !bio->bi_status)
+			bio->bi_status = dio->bi_status;
+		if (likely(!bio->bi_status) && unlikely(bio_sectors(bio) != dio->range.n_sectors)) {
 			dio->range.logical_sector += dio->range.n_sectors;
 			bio_advance(bio, dio->range.n_sectors << SECTOR_SHIFT);
 			INIT_WORK(&dio->work, integrity_bio_wait);
@@ -1318,7 +1318,7 @@ skip_io:
 	dec_in_flight(dio);
 	return;
 error:
-	dio->bi_error = r;
+	dio->bi_status = errno_to_blk_status(r);
 	dec_in_flight(dio);
 }
 
@@ -1331,7 +1331,7 @@ static int dm_integrity_map(struct dm_target *ti, struct bio *bio)
 	sector_t area, offset;
 
 	dio->ic = ic;
-	dio->bi_error = 0;
+	dio->bi_status = 0;
 
 	if (unlikely(bio->bi_opf & REQ_PREFLUSH)) {
 		submit_flush_bio(ic, dio);
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index 3702e502466d..c8f8f3004085 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -124,7 +124,7 @@ static void complete_io(struct io *io)
 	fn(error_bits, context);
 }
 
-static void dec_count(struct io *io, unsigned int region, int error)
+static void dec_count(struct io *io, unsigned int region, blk_status_t error)
 {
 	if (error)
 		set_bit(region, &io->error_bits);
@@ -137,9 +137,9 @@ static void endio(struct bio *bio)
 {
 	struct io *io;
 	unsigned region;
-	int error;
+	blk_status_t error;
 
-	if (bio->bi_error && bio_data_dir(bio) == READ)
+	if (bio->bi_status && bio_data_dir(bio) == READ)
 		zero_fill_bio(bio);
 
 	/*
@@ -147,7 +147,7 @@ static void endio(struct bio *bio)
 	 */
 	retrieve_io_and_region_from_bio(bio, &io, &region);
 
-	error = bio->bi_error;
+	error = bio->bi_status;
 	bio_put(bio);
 
 	dec_count(io, region, error);
@@ -319,7 +319,7 @@ static void do_region(int op, int op_flags, unsigned region,
 	if ((op == REQ_OP_DISCARD || op == REQ_OP_WRITE_ZEROES ||
 	     op == REQ_OP_WRITE_SAME)  &&
 	    special_cmd_max_sectors == 0) {
-		dec_count(io, region, -EOPNOTSUPP);
+		dec_count(io, region, BLK_STS_NOTSUPP);
 		return;
 	}
 
diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c
index cc57c7fa1268..a1da0eb58a93 100644
--- a/drivers/md/dm-log-writes.c
+++ b/drivers/md/dm-log-writes.c
@@ -150,10 +150,10 @@ static void log_end_io(struct bio *bio)
 {
 	struct log_writes_c *lc = bio->bi_private;
 
-	if (bio->bi_error) {
+	if (bio->bi_status) {
 		unsigned long flags;
 
-		DMERR("Error writing log block, error=%d", bio->bi_error);
+		DMERR("Error writing log block, error=%d", bio->bi_status);
 		spin_lock_irqsave(&lc->blocks_lock, flags);
 		lc->logging_enabled = false;
 		spin_unlock_irqrestore(&lc->blocks_lock, flags);
@@ -664,7 +664,8 @@ map_bio:
 	return DM_MAPIO_REMAPPED;
 }
 
-static int normal_end_io(struct dm_target *ti, struct bio *bio, int *error)
+static int normal_end_io(struct dm_target *ti, struct bio *bio,
+		blk_status_t *error)
 {
 	struct log_writes_c *lc = ti->private;
 	struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data));
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index 39262e344ae1..a7d2e0840cc5 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -565,7 +565,7 @@ static int __multipath_map_bio(struct multipath *m, struct bio *bio, struct dm_m
 	mpio->pgpath = pgpath;
 	mpio->nr_bytes = nr_bytes;
 
-	bio->bi_error = 0;
+	bio->bi_status = 0;
 	bio->bi_bdev = pgpath->path.dev->bdev;
 	bio->bi_opf |= REQ_FAILFAST_TRANSPORT;
 
@@ -623,10 +623,10 @@ static void process_queued_bios(struct work_struct *work)
 		r = __multipath_map_bio(m, bio, get_mpio_from_bio(bio));
 		switch (r) {
 		case DM_MAPIO_KILL:
-			r = -EIO;
-			/*FALLTHRU*/
+			bio->bi_status = BLK_STS_IOERR;
+			bio_endio(bio);
 		case DM_MAPIO_REQUEUE:
-			bio->bi_error = r;
+			bio->bi_status = BLK_STS_DM_REQUEUE;
 			bio_endio(bio);
 			break;
 		case DM_MAPIO_REMAPPED:
@@ -1510,7 +1510,8 @@ static int multipath_end_io(struct dm_target *ti, struct request *clone,
 	return r;
 }
 
-static int multipath_end_io_bio(struct dm_target *ti, struct bio *clone, int *error)
+static int multipath_end_io_bio(struct dm_target *ti, struct bio *clone,
+		blk_status_t *error)
 {
 	struct multipath *m = ti->private;
 	struct dm_mpath_io *mpio = get_mpio_from_bio(clone);
@@ -1518,7 +1519,7 @@ static int multipath_end_io_bio(struct dm_target *ti, struct bio *clone, int *er
 	unsigned long flags;
 	int r = DM_ENDIO_DONE;
 
-	if (!*error || noretry_error(errno_to_blk_status(*error)))
+	if (!*error || noretry_error(*error))
 		goto done;
 
 	if (pgpath)
@@ -1527,7 +1528,7 @@ static int multipath_end_io_bio(struct dm_target *ti, struct bio *clone, int *er
 	if (atomic_read(&m->nr_valid_paths) == 0 &&
 	    !test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) {
 		dm_report_EIO(m);
-		*error = -EIO;
+		*error = BLK_STS_IOERR;
 		goto done;
 	}
 
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index 77bcf50ce75f..0822e4a6f67d 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -490,9 +490,9 @@ static void hold_bio(struct mirror_set *ms, struct bio *bio)
 		 * If device is suspended, complete the bio.
 		 */
 		if (dm_noflush_suspending(ms->ti))
-			bio->bi_error = DM_ENDIO_REQUEUE;
+			bio->bi_status = BLK_STS_DM_REQUEUE;
 		else
-			bio->bi_error = -EIO;
+			bio->bi_status = BLK_STS_IOERR;
 
 		bio_endio(bio);
 		return;
@@ -626,7 +626,7 @@ static void write_callback(unsigned long error, void *context)
 	 * degrade the array.
 	 */
 	if (bio_op(bio) == REQ_OP_DISCARD) {
-		bio->bi_error = -EOPNOTSUPP;
+		bio->bi_status = BLK_STS_NOTSUPP;
 		bio_endio(bio);
 		return;
 	}
@@ -1236,7 +1236,8 @@ static int mirror_map(struct dm_target *ti, struct bio *bio)
 	return DM_MAPIO_REMAPPED;
 }
 
-static int mirror_end_io(struct dm_target *ti, struct bio *bio, int *error)
+static int mirror_end_io(struct dm_target *ti, struct bio *bio,
+		blk_status_t *error)
 {
 	int rw = bio_data_dir(bio);
 	struct mirror_set *ms = (struct mirror_set *) ti->private;
@@ -1255,7 +1256,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, int *error)
 		return DM_ENDIO_DONE;
 	}
 
-	if (*error == -EOPNOTSUPP)
+	if (*error == BLK_STS_NOTSUPP)
 		return DM_ENDIO_DONE;
 
 	if (bio->bi_opf & REQ_RAHEAD)
@@ -1277,7 +1278,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, int *error)
 			bd = &bio_record->details;
 
 			dm_bio_restore(bd, bio);
-			bio->bi_error = 0;
+			bio->bi_status = 0;
 
 			queue_bio(ms, bio, rw);
 			return DM_ENDIO_INCOMPLETE;
diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
index 63402f8a38de..fafd5326e572 100644
--- a/drivers/md/dm-rq.c
+++ b/drivers/md/dm-rq.c
@@ -119,7 +119,7 @@ static void end_clone_bio(struct bio *clone)
 	struct dm_rq_target_io *tio = info->tio;
 	struct bio *bio = info->orig;
 	unsigned int nr_bytes = info->orig->bi_iter.bi_size;
-	blk_status_t error = errno_to_blk_status(clone->bi_error);
+	blk_status_t error = clone->bi_status;
 
 	bio_put(clone);
 
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 79a845798e2f..1ba41048b438 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -1590,7 +1590,7 @@ static void full_bio_end_io(struct bio *bio)
 {
 	void *callback_data = bio->bi_private;
 
-	dm_kcopyd_do_callback(callback_data, 0, bio->bi_error ? 1 : 0);
+	dm_kcopyd_do_callback(callback_data, 0, bio->bi_status ? 1 : 0);
 }
 
 static void start_full_bio(struct dm_snap_pending_exception *pe,
@@ -1851,7 +1851,8 @@ out_unlock:
 	return r;
 }
 
-static int snapshot_end_io(struct dm_target *ti, struct bio *bio, int *error)
+static int snapshot_end_io(struct dm_target *ti, struct bio *bio,
+		blk_status_t *error)
 {
 	struct dm_snapshot *s = ti->private;
 
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index 49888bc2c909..11621a0af887 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -375,7 +375,8 @@ static void stripe_status(struct dm_target *ti, status_type_t type,
 	}
 }
 
-static int stripe_end_io(struct dm_target *ti, struct bio *bio, int *error)
+static int stripe_end_io(struct dm_target *ti, struct bio *bio,
+		blk_status_t *error)
 {
 	unsigned i;
 	char major_minor[16];
@@ -387,7 +388,7 @@ static int stripe_end_io(struct dm_target *ti, struct bio *bio, int *error)
 	if (bio->bi_opf & REQ_RAHEAD)
 		return DM_ENDIO_DONE;
 
-	if (*error == -EOPNOTSUPP)
+	if (*error == BLK_STS_NOTSUPP)
 		return DM_ENDIO_DONE;
 
 	memset(major_minor, 0, sizeof(major_minor));
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index 22b1a64c44b7..3490b300cbff 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -383,8 +383,8 @@ static void end_discard(struct discard_op *op, int r)
 	 * Even if r is set, there could be sub discards in flight that we
 	 * need to wait for.
 	 */
-	if (r && !op->parent_bio->bi_error)
-		op->parent_bio->bi_error = r;
+	if (r && !op->parent_bio->bi_status)
+		op->parent_bio->bi_status = errno_to_blk_status(r);
 	bio_endio(op->parent_bio);
 }
 
@@ -450,22 +450,20 @@ static void cell_release_no_holder(struct pool *pool,
 }
 
 static void cell_error_with_code(struct pool *pool,
-				 struct dm_bio_prison_cell *cell, int error_code)
+		struct dm_bio_prison_cell *cell, blk_status_t error_code)
 {
 	dm_cell_error(pool->prison, cell, error_code);
 	dm_bio_prison_free_cell(pool->prison, cell);
 }
 
-static int get_pool_io_error_code(struct pool *pool)
+static blk_status_t get_pool_io_error_code(struct pool *pool)
 {
-	return pool->out_of_data_space ? -ENOSPC : -EIO;
+	return pool->out_of_data_space ? BLK_STS_NOSPC : BLK_STS_IOERR;
 }
 
 static void cell_error(struct pool *pool, struct dm_bio_prison_cell *cell)
 {
-	int error = get_pool_io_error_code(pool);
-
-	cell_error_with_code(pool, cell, error);
+	cell_error_with_code(pool, cell, get_pool_io_error_code(pool));
 }
 
 static void cell_success(struct pool *pool, struct dm_bio_prison_cell *cell)
@@ -475,7 +473,7 @@ static void cell_success(struct pool *pool, struct dm_bio_prison_cell *cell)
 
 static void cell_requeue(struct pool *pool, struct dm_bio_prison_cell *cell)
 {
-	cell_error_with_code(pool, cell, DM_ENDIO_REQUEUE);
+	cell_error_with_code(pool, cell, BLK_STS_DM_REQUEUE);
 }
 
 /*----------------------------------------------------------------*/
@@ -555,17 +553,18 @@ static void __merge_bio_list(struct bio_list *bios, struct bio_list *master)
 	bio_list_init(master);
 }
 
-static void error_bio_list(struct bio_list *bios, int error)
+static void error_bio_list(struct bio_list *bios, blk_status_t error)
 {
 	struct bio *bio;
 
 	while ((bio = bio_list_pop(bios))) {
-		bio->bi_error = error;
+		bio->bi_status = error;
 		bio_endio(bio);
 	}
 }
 
-static void error_thin_bio_list(struct thin_c *tc, struct bio_list *master, int error)
+static void error_thin_bio_list(struct thin_c *tc, struct bio_list *master,
+		blk_status_t error)
 {
 	struct bio_list bios;
 	unsigned long flags;
@@ -608,11 +607,11 @@ static void requeue_io(struct thin_c *tc)
 	__merge_bio_list(&bios, &tc->retry_on_resume_list);
 	spin_unlock_irqrestore(&tc->lock, flags);
 
-	error_bio_list(&bios, DM_ENDIO_REQUEUE);
+	error_bio_list(&bios, BLK_STS_DM_REQUEUE);
 	requeue_deferred_cells(tc);
 }
 
-static void error_retry_list_with_code(struct pool *pool, int error)
+static void error_retry_list_with_code(struct pool *pool, blk_status_t error)
 {
 	struct thin_c *tc;
 
@@ -624,9 +623,7 @@ static void error_retry_list_with_code(struct pool *pool, int error)
 
 static void error_retry_list(struct pool *pool)
 {
-	int error = get_pool_io_error_code(pool);
-
-	error_retry_list_with_code(pool, error);
+	error_retry_list_with_code(pool, get_pool_io_error_code(pool));
 }
 
 /*
@@ -774,7 +771,7 @@ struct dm_thin_new_mapping {
 	 */
 	atomic_t prepare_actions;
 
-	int err;
+	blk_status_t status;
 	struct thin_c *tc;
 	dm_block_t virt_begin, virt_end;
 	dm_block_t data_block;
@@ -814,7 +811,7 @@ static void copy_complete(int read_err, unsigned long write_err, void *context)
 {
 	struct dm_thin_new_mapping *m = context;
 
-	m->err = read_err || write_err ? -EIO : 0;
+	m->status = read_err || write_err ? BLK_STS_IOERR : 0;
 	complete_mapping_preparation(m);
 }
 
@@ -825,7 +822,7 @@ static void overwrite_endio(struct bio *bio)
 
 	bio->bi_end_io = m->saved_bi_end_io;
 
-	m->err = bio->bi_error;
+	m->status = bio->bi_status;
 	complete_mapping_preparation(m);
 }
 
@@ -925,7 +922,7 @@ static void process_prepared_mapping(struct dm_thin_new_mapping *m)
 	struct bio *bio = m->bio;
 	int r;
 
-	if (m->err) {
+	if (m->status) {
 		cell_error(pool, m->cell);
 		goto out;
 	}
@@ -1495,7 +1492,7 @@ static void retry_on_resume(struct bio *bio)
 	spin_unlock_irqrestore(&tc->lock, flags);
 }
 
-static int should_error_unserviceable_bio(struct pool *pool)
+static blk_status_t should_error_unserviceable_bio(struct pool *pool)
 {
 	enum pool_mode m = get_pool_mode(pool);
 
@@ -1503,27 +1500,27 @@ static int should_error_unserviceable_bio(struct pool *pool)
 	case PM_WRITE:
 		/* Shouldn't get here */
 		DMERR_LIMIT("bio unserviceable, yet pool is in PM_WRITE mode");
-		return -EIO;
+		return BLK_STS_IOERR;
 
 	case PM_OUT_OF_DATA_SPACE:
-		return pool->pf.error_if_no_space ? -ENOSPC : 0;
+		return pool->pf.error_if_no_space ? BLK_STS_NOSPC : 0;
 
 	case PM_READ_ONLY:
 	case PM_FAIL:
-		return -EIO;
+		return BLK_STS_IOERR;
 	default:
 		/* Shouldn't get here */
 		DMERR_LIMIT("bio unserviceable, yet pool has an unknown mode");
-		return -EIO;
+		return BLK_STS_IOERR;
 	}
 }
 
 static void handle_unserviceable_bio(struct pool *pool, struct bio *bio)
 {
-	int error = should_error_unserviceable_bio(pool);
+	blk_status_t error = should_error_unserviceable_bio(pool);
 
 	if (error) {
-		bio->bi_error = error;
+		bio->bi_status = error;
 		bio_endio(bio);
 	} else
 		retry_on_resume(bio);
@@ -1533,7 +1530,7 @@ static void retry_bios_on_resume(struct pool *pool, struct dm_bio_prison_cell *c
 {
 	struct bio *bio;
 	struct bio_list bios;
-	int error;
+	blk_status_t error;
 
 	error = should_error_unserviceable_bio(pool);
 	if (error) {
@@ -2071,7 +2068,8 @@ static void process_thin_deferred_bios(struct thin_c *tc)
 	unsigned count = 0;
 
 	if (tc->requeue_mode) {
-		error_thin_bio_list(tc, &tc->deferred_bio_list, DM_ENDIO_REQUEUE);
+		error_thin_bio_list(tc, &tc->deferred_bio_list,
+				BLK_STS_DM_REQUEUE);
 		return;
 	}
 
@@ -2322,7 +2320,7 @@ static void do_no_space_timeout(struct work_struct *ws)
 	if (get_pool_mode(pool) == PM_OUT_OF_DATA_SPACE && !pool->pf.error_if_no_space) {
 		pool->pf.error_if_no_space = true;
 		notify_of_pool_mode_change_to_oods(pool);
-		error_retry_list_with_code(pool, -ENOSPC);
+		error_retry_list_with_code(pool, BLK_STS_NOSPC);
 	}
 }
 
@@ -2624,7 +2622,7 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio)
 	thin_hook_bio(tc, bio);
 
 	if (tc->requeue_mode) {
-		bio->bi_error = DM_ENDIO_REQUEUE;
+		bio->bi_status = BLK_STS_DM_REQUEUE;
 		bio_endio(bio);
 		return DM_MAPIO_SUBMITTED;
 	}
@@ -4177,7 +4175,8 @@ static int thin_map(struct dm_target *ti, struct bio *bio)
 	return thin_bio_map(ti, bio);
 }
 
-static int thin_endio(struct dm_target *ti, struct bio *bio, int *err)
+static int thin_endio(struct dm_target *ti, struct bio *bio,
+		blk_status_t *err)
 {
 	unsigned long flags;
 	struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));
diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c
index 9ed55468b98b..2dca66eb67e1 100644
--- a/drivers/md/dm-verity-target.c
+++ b/drivers/md/dm-verity-target.c
@@ -538,13 +538,13 @@ static int verity_verify_io(struct dm_verity_io *io)
 /*
  * End one "io" structure with a given error.
  */
-static void verity_finish_io(struct dm_verity_io *io, int error)
+static void verity_finish_io(struct dm_verity_io *io, blk_status_t status)
 {
 	struct dm_verity *v = io->v;
 	struct bio *bio = dm_bio_from_per_bio_data(io, v->ti->per_io_data_size);
 
 	bio->bi_end_io = io->orig_bi_end_io;
-	bio->bi_error = error;
+	bio->bi_status = status;
 
 	verity_fec_finish_io(io);
 
@@ -555,15 +555,15 @@ static void verity_work(struct work_struct *w)
 {
 	struct dm_verity_io *io = container_of(w, struct dm_verity_io, work);
 
-	verity_finish_io(io, verity_verify_io(io));
+	verity_finish_io(io, errno_to_blk_status(verity_verify_io(io)));
 }
 
 static void verity_end_io(struct bio *bio)
 {
 	struct dm_verity_io *io = bio->bi_private;
 
-	if (bio->bi_error && !verity_fec_is_enabled(io->v)) {
-		verity_finish_io(io, bio->bi_error);
+	if (bio->bi_status && !verity_fec_is_enabled(io->v)) {
+		verity_finish_io(io, bio->bi_status);
 		return;
 	}
 
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 7a7047211c64..f38f9dd5cbdd 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -63,7 +63,7 @@ static struct workqueue_struct *deferred_remove_workqueue;
  */
 struct dm_io {
 	struct mapped_device *md;
-	int error;
+	blk_status_t status;
 	atomic_t io_count;
 	struct bio *bio;
 	unsigned long start_time;
@@ -768,23 +768,24 @@ static int __noflush_suspending(struct mapped_device *md)
  * Decrements the number of outstanding ios that a bio has been
  * cloned into, completing the original io if necc.
  */
-static void dec_pending(struct dm_io *io, int error)
+static void dec_pending(struct dm_io *io, blk_status_t error)
 {
 	unsigned long flags;
-	int io_error;
+	blk_status_t io_error;
 	struct bio *bio;
 	struct mapped_device *md = io->md;
 
 	/* Push-back supersedes any I/O errors */
 	if (unlikely(error)) {
 		spin_lock_irqsave(&io->endio_lock, flags);
-		if (!(io->error > 0 && __noflush_suspending(md)))
-			io->error = error;
+		if (!(io->status == BLK_STS_DM_REQUEUE &&
+				__noflush_suspending(md)))
+			io->status = error;
 		spin_unlock_irqrestore(&io->endio_lock, flags);
 	}
 
 	if (atomic_dec_and_test(&io->io_count)) {
-		if (io->error == DM_ENDIO_REQUEUE) {
+		if (io->status == BLK_STS_DM_REQUEUE) {
 			/*
 			 * Target requested pushing back the I/O.
 			 */
@@ -793,16 +794,16 @@ static void dec_pending(struct dm_io *io, int error)
 				bio_list_add_head(&md->deferred, io->bio);
 			else
 				/* noflush suspend was interrupted. */
-				io->error = -EIO;
+				io->status = BLK_STS_IOERR;
 			spin_unlock_irqrestore(&md->deferred_lock, flags);
 		}
 
-		io_error = io->error;
+		io_error = io->status;
 		bio = io->bio;
 		end_io_acct(io);
 		free_io(md, io);
 
-		if (io_error == DM_ENDIO_REQUEUE)
+		if (io_error == BLK_STS_DM_REQUEUE)
 			return;
 
 		if ((bio->bi_opf & REQ_PREFLUSH) && bio->bi_iter.bi_size) {
@@ -814,7 +815,7 @@ static void dec_pending(struct dm_io *io, int error)
 			queue_io(md, bio);
 		} else {
 			/* done with normal IO or empty flush */
-			bio->bi_error = io_error;
+			bio->bi_status = io_error;
 			bio_endio(bio);
 		}
 	}
@@ -838,14 +839,13 @@ void disable_write_zeroes(struct mapped_device *md)
 
 static void clone_endio(struct bio *bio)
 {
-	int error = bio->bi_error;
-	int r = error;
+	blk_status_t error = bio->bi_status;
 	struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone);
 	struct dm_io *io = tio->io;
 	struct mapped_device *md = tio->io->md;
 	dm_endio_fn endio = tio->ti->type->end_io;
 
-	if (unlikely(error == -EREMOTEIO)) {
+	if (unlikely(error == BLK_STS_TARGET)) {
 		if (bio_op(bio) == REQ_OP_WRITE_SAME &&
 		    !bdev_get_queue(bio->bi_bdev)->limits.max_write_same_sectors)
 			disable_write_same(md);
@@ -855,10 +855,10 @@ static void clone_endio(struct bio *bio)
 	}
 
 	if (endio) {
-		r = endio(tio->ti, bio, &error);
+		int r = endio(tio->ti, bio, &error);
 		switch (r) {
 		case DM_ENDIO_REQUEUE:
-			error = DM_ENDIO_REQUEUE;
+			error = BLK_STS_DM_REQUEUE;
 			/*FALLTHRU*/
 		case DM_ENDIO_DONE:
 			break;
@@ -1094,11 +1094,11 @@ static void __map_bio(struct dm_target_io *tio)
 		generic_make_request(clone);
 		break;
 	case DM_MAPIO_KILL:
-		r = -EIO;
-		/*FALLTHRU*/
+		dec_pending(tio->io, BLK_STS_IOERR);
+		free_tio(tio);
+		break;
 	case DM_MAPIO_REQUEUE:
-		/* error the io and bail out, or requeue it if needed */
-		dec_pending(tio->io, r);
+		dec_pending(tio->io, BLK_STS_DM_REQUEUE);
 		free_tio(tio);
 		break;
 	default:
@@ -1366,7 +1366,7 @@ static void __split_and_process_bio(struct mapped_device *md,
 	ci.map = map;
 	ci.md = md;
 	ci.io = alloc_io(md);
-	ci.io->error = 0;
+	ci.io->status = 0;
 	atomic_set(&ci.io->io_count, 1);
 	ci.io->bio = bio;
 	ci.io->md = md;
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 10367ffe92e3..6452e83fd650 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -273,7 +273,7 @@ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
 	}
 	if (mddev->ro == 1 && unlikely(rw == WRITE)) {
 		if (bio_sectors(bio) != 0)
-			bio->bi_error = -EROFS;
+			bio->bi_status = BLK_STS_IOERR;
 		bio_endio(bio);
 		return BLK_QC_T_NONE;
 	}
@@ -719,8 +719,8 @@ static void super_written(struct bio *bio)
 	struct md_rdev *rdev = bio->bi_private;
 	struct mddev *mddev = rdev->mddev;
 
-	if (bio->bi_error) {
-		pr_err("md: super_written gets error=%d\n", bio->bi_error);
+	if (bio->bi_status) {
+		pr_err("md: super_written gets error=%d\n", bio->bi_status);
 		md_error(mddev, rdev);
 		if (!test_bit(Faulty, &rdev->flags)
 		    && (bio->bi_opf & MD_FAILFAST)) {
@@ -801,7 +801,7 @@ int sync_page_io(struct md_rdev *rdev, sector_t sector, int size,
 
 	submit_bio_wait(bio);
 
-	ret = !bio->bi_error;
+	ret = !bio->bi_status;
 	bio_put(bio);
 	return ret;
 }
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c
index e95d521d93e9..68d036e64041 100644
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c
@@ -73,12 +73,12 @@ static void multipath_reschedule_retry (struct multipath_bh *mp_bh)
  * operation and are ready to return a success/failure code to the buffer
  * cache layer.
  */
-static void multipath_end_bh_io (struct multipath_bh *mp_bh, int err)
+static void multipath_end_bh_io(struct multipath_bh *mp_bh, blk_status_t status)
 {
 	struct bio *bio = mp_bh->master_bio;
 	struct mpconf *conf = mp_bh->mddev->private;
 
-	bio->bi_error = err;
+	bio->bi_status = status;
 	bio_endio(bio);
 	mempool_free(mp_bh, conf->pool);
 }
@@ -89,7 +89,7 @@ static void multipath_end_request(struct bio *bio)
 	struct mpconf *conf = mp_bh->mddev->private;
 	struct md_rdev *rdev = conf->multipaths[mp_bh->path].rdev;
 
-	if (!bio->bi_error)
+	if (!bio->bi_status)
 		multipath_end_bh_io(mp_bh, 0);
 	else if (!(bio->bi_opf & REQ_RAHEAD)) {
 		/*
@@ -102,7 +102,7 @@ static void multipath_end_request(struct bio *bio)
 			(unsigned long long)bio->bi_iter.bi_sector);
 		multipath_reschedule_retry(mp_bh);
 	} else
-		multipath_end_bh_io(mp_bh, bio->bi_error);
+		multipath_end_bh_io(mp_bh, bio->bi_status);
 	rdev_dec_pending(rdev, conf->mddev);
 }
 
@@ -347,7 +347,7 @@ static void multipathd(struct md_thread *thread)
 			pr_err("multipath: %s: unrecoverable IO read error for block %llu\n",
 			       bdevname(bio->bi_bdev,b),
 			       (unsigned long long)bio->bi_iter.bi_sector);
-			multipath_end_bh_io(mp_bh, -EIO);
+			multipath_end_bh_io(mp_bh, BLK_STS_IOERR);
 		} else {
 			pr_err("multipath: %s: redirecting sector %llu to another IO path\n",
 			       bdevname(bio->bi_bdev,b),
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index af5056d56878..94b87c4d0f7b 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -277,7 +277,7 @@ static void call_bio_endio(struct r1bio *r1_bio)
 	struct r1conf *conf = r1_bio->mddev->private;
 
 	if (!test_bit(R1BIO_Uptodate, &r1_bio->state))
-		bio->bi_error = -EIO;
+		bio->bi_status = BLK_STS_IOERR;
 
 	bio_endio(bio);
 	/*
@@ -335,7 +335,7 @@ static int find_bio_disk(struct r1bio *r1_bio, struct bio *bio)
 
 static void raid1_end_read_request(struct bio *bio)
 {
-	int uptodate = !bio->bi_error;
+	int uptodate = !bio->bi_status;
 	struct r1bio *r1_bio = bio->bi_private;
 	struct r1conf *conf = r1_bio->mddev->private;
 	struct md_rdev *rdev = conf->mirrors[r1_bio->read_disk].rdev;
@@ -426,12 +426,12 @@ static void raid1_end_write_request(struct bio *bio)
 	struct md_rdev *rdev = conf->mirrors[mirror].rdev;
 	bool discard_error;
 
-	discard_error = bio->bi_error && bio_op(bio) == REQ_OP_DISCARD;
+	discard_error = bio->bi_status && bio_op(bio) == REQ_OP_DISCARD;
 
 	/*
 	 * 'one mirror IO has finished' event handler:
 	 */
-	if (bio->bi_error && !discard_error) {
+	if (bio->bi_status && !discard_error) {
 		set_bit(WriteErrorSeen,	&rdev->flags);
 		if (!test_and_set_bit(WantReplacement, &rdev->flags))
 			set_bit(MD_RECOVERY_NEEDED, &
@@ -802,7 +802,7 @@ static void flush_bio_list(struct r1conf *conf, struct bio *bio)
 		bio->bi_next = NULL;
 		bio->bi_bdev = rdev->bdev;
 		if (test_bit(Faulty, &rdev->flags)) {
-			bio->bi_error = -EIO;
+			bio->bi_status = BLK_STS_IOERR;
 			bio_endio(bio);
 		} else if (unlikely((bio_op(bio) == REQ_OP_DISCARD) &&
 				    !blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
@@ -1856,7 +1856,7 @@ static void end_sync_read(struct bio *bio)
 	 * or re-read if the read failed.
 	 * We don't do much here, just schedule handling by raid1d
 	 */
-	if (!bio->bi_error)
+	if (!bio->bi_status)
 		set_bit(R1BIO_Uptodate, &r1_bio->state);
 
 	if (atomic_dec_and_test(&r1_bio->remaining))
@@ -1865,7 +1865,7 @@ static void end_sync_read(struct bio *bio)
 
 static void end_sync_write(struct bio *bio)
 {
-	int uptodate = !bio->bi_error;
+	int uptodate = !bio->bi_status;
 	struct r1bio *r1_bio = get_resync_r1bio(bio);
 	struct mddev *mddev = r1_bio->mddev;
 	struct r1conf *conf = mddev->private;
@@ -2058,7 +2058,7 @@ static int fix_sync_read_error(struct r1bio *r1_bio)
 		idx ++;
 	}
 	set_bit(R1BIO_Uptodate, &r1_bio->state);
-	bio->bi_error = 0;
+	bio->bi_status = 0;
 	return 1;
 }
 
@@ -2082,16 +2082,16 @@ static void process_checks(struct r1bio *r1_bio)
 	for (i = 0; i < conf->raid_disks * 2; i++) {
 		int j;
 		int size;
-		int error;
+		blk_status_t status;
 		struct bio_vec *bi;
 		struct bio *b = r1_bio->bios[i];
 		struct resync_pages *rp = get_resync_pages(b);
 		if (b->bi_end_io != end_sync_read)
 			continue;
 		/* fixup the bio for reuse, but preserve errno */
-		error = b->bi_error;
+		status = b->bi_status;
 		bio_reset(b);
-		b->bi_error = error;
+		b->bi_status = status;
 		b->bi_vcnt = vcnt;
 		b->bi_iter.bi_size = r1_bio->sectors << 9;
 		b->bi_iter.bi_sector = r1_bio->sector +
@@ -2113,7 +2113,7 @@ static void process_checks(struct r1bio *r1_bio)
 	}
 	for (primary = 0; primary < conf->raid_disks * 2; primary++)
 		if (r1_bio->bios[primary]->bi_end_io == end_sync_read &&
-		    !r1_bio->bios[primary]->bi_error) {
+		    !r1_bio->bios[primary]->bi_status) {
 			r1_bio->bios[primary]->bi_end_io = NULL;
 			rdev_dec_pending(conf->mirrors[primary].rdev, mddev);
 			break;
@@ -2123,7 +2123,7 @@ static void process_checks(struct r1bio *r1_bio)
 		int j;
 		struct bio *pbio = r1_bio->bios[primary];
 		struct bio *sbio = r1_bio->bios[i];
-		int error = sbio->bi_error;
+		blk_status_t status = sbio->bi_status;
 		struct page **ppages = get_resync_pages(pbio)->pages;
 		struct page **spages = get_resync_pages(sbio)->pages;
 		struct bio_vec *bi;
@@ -2132,12 +2132,12 @@ static void process_checks(struct r1bio *r1_bio)
 		if (sbio->bi_end_io != end_sync_read)
 			continue;
 		/* Now we can 'fixup' the error value */
-		sbio->bi_error = 0;
+		sbio->bi_status = 0;
 
 		bio_for_each_segment_all(bi, sbio, j)
 			page_len[j] = bi->bv_len;
 
-		if (!error) {
+		if (!status) {
 			for (j = vcnt; j-- ; ) {
 				if (memcmp(page_address(ppages[j]),
 					   page_address(spages[j]),
@@ -2149,7 +2149,7 @@ static void process_checks(struct r1bio *r1_bio)
 		if (j >= 0)
 			atomic64_add(r1_bio->sectors, &mddev->resync_mismatches);
 		if (j < 0 || (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)
-			      && !error)) {
+			      && !status)) {
 			/* No need to write to this device. */
 			sbio->bi_end_io = NULL;
 			rdev_dec_pending(conf->mirrors[i].rdev, mddev);
@@ -2400,11 +2400,11 @@ static void handle_sync_write_finished(struct r1conf *conf, struct r1bio *r1_bio
 		struct bio *bio = r1_bio->bios[m];
 		if (bio->bi_end_io == NULL)
 			continue;
-		if (!bio->bi_error &&
+		if (!bio->bi_status &&
 		    test_bit(R1BIO_MadeGood, &r1_bio->state)) {
 			rdev_clear_badblocks(rdev, r1_bio->sector, s, 0);
 		}
-		if (bio->bi_error &&
+		if (bio->bi_status &&
 		    test_bit(R1BIO_WriteError, &r1_bio->state)) {
 			if (!rdev_set_badblocks(rdev, r1_bio->sector, s, 0))
 				md_error(conf->mddev, rdev);
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 4343d7ff9916..89ad1cd29037 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -336,7 +336,7 @@ static void raid_end_bio_io(struct r10bio *r10_bio)
 	struct r10conf *conf = r10_bio->mddev->private;
 
 	if (!test_bit(R10BIO_Uptodate, &r10_bio->state))
-		bio->bi_error = -EIO;
+		bio->bi_status = BLK_STS_IOERR;
 
 	bio_endio(bio);
 	/*
@@ -389,7 +389,7 @@ static int find_bio_disk(struct r10conf *conf, struct r10bio *r10_bio,
 
 static void raid10_end_read_request(struct bio *bio)
 {
-	int uptodate = !bio->bi_error;
+	int uptodate = !bio->bi_status;
 	struct r10bio *r10_bio = bio->bi_private;
 	int slot, dev;
 	struct md_rdev *rdev;
@@ -477,7 +477,7 @@ static void raid10_end_write_request(struct bio *bio)
 	struct bio *to_put = NULL;
 	bool discard_error;
 
-	discard_error = bio->bi_error && bio_op(bio) == REQ_OP_DISCARD;
+	discard_error = bio->bi_status && bio_op(bio) == REQ_OP_DISCARD;
 
 	dev = find_bio_disk(conf, r10_bio, bio, &slot, &repl);
 
@@ -491,7 +491,7 @@ static void raid10_end_write_request(struct bio *bio)
 	/*
 	 * this branch is our 'one mirror IO has finished' event handler:
 	 */
-	if (bio->bi_error && !discard_error) {
+	if (bio->bi_status && !discard_error) {
 		if (repl)
 			/* Never record new bad blocks to replacement,
 			 * just fail it.
@@ -913,7 +913,7 @@ static void flush_pending_writes(struct r10conf *conf)
 			bio->bi_next = NULL;
 			bio->bi_bdev = rdev->bdev;
 			if (test_bit(Faulty, &rdev->flags)) {
-				bio->bi_error = -EIO;
+				bio->bi_status = BLK_STS_IOERR;
 				bio_endio(bio);
 			} else if (unlikely((bio_op(bio) ==  REQ_OP_DISCARD) &&
 					    !blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
@@ -1098,7 +1098,7 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule)
 		bio->bi_next = NULL;
 		bio->bi_bdev = rdev->bdev;
 		if (test_bit(Faulty, &rdev->flags)) {
-			bio->bi_error = -EIO;
+			bio->bi_status = BLK_STS_IOERR;
 			bio_endio(bio);
 		} else if (unlikely((bio_op(bio) ==  REQ_OP_DISCARD) &&
 				    !blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
@@ -1888,7 +1888,7 @@ static void __end_sync_read(struct r10bio *r10_bio, struct bio *bio, int d)
 {
 	struct r10conf *conf = r10_bio->mddev->private;
 
-	if (!bio->bi_error)
+	if (!bio->bi_status)
 		set_bit(R10BIO_Uptodate, &r10_bio->state);
 	else
 		/* The write handler will notice the lack of
@@ -1972,7 +1972,7 @@ static void end_sync_write(struct bio *bio)
 	else
 		rdev = conf->mirrors[d].rdev;
 
-	if (bio->bi_error) {
+	if (bio->bi_status) {
 		if (repl)
 			md_error(mddev, rdev);
 		else {
@@ -2021,7 +2021,7 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
 
 	/* find the first device with a block */
 	for (i=0; i<conf->copies; i++)
-		if (!r10_bio->devs[i].bio->bi_error)
+		if (!r10_bio->devs[i].bio->bi_status)
 			break;
 
 	if (i == conf->copies)
@@ -2050,7 +2050,7 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
 		tpages = get_resync_pages(tbio)->pages;
 		d = r10_bio->devs[i].devnum;
 		rdev = conf->mirrors[d].rdev;
-		if (!r10_bio->devs[i].bio->bi_error) {
+		if (!r10_bio->devs[i].bio->bi_status) {
 			/* We know that the bi_io_vec layout is the same for
 			 * both 'first' and 'i', so we just compare them.
 			 * All vec entries are PAGE_SIZE;
@@ -2633,7 +2633,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
 			rdev = conf->mirrors[dev].rdev;
 			if (r10_bio->devs[m].bio == NULL)
 				continue;
-			if (!r10_bio->devs[m].bio->bi_error) {
+			if (!r10_bio->devs[m].bio->bi_status) {
 				rdev_clear_badblocks(
 					rdev,
 					r10_bio->devs[m].addr,
@@ -2649,7 +2649,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
 			if (r10_bio->devs[m].repl_bio == NULL)
 				continue;
 
-			if (!r10_bio->devs[m].repl_bio->bi_error) {
+			if (!r10_bio->devs[m].repl_bio->bi_status) {
 				rdev_clear_badblocks(
 					rdev,
 					r10_bio->devs[m].addr,
@@ -2675,7 +2675,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
 					r10_bio->devs[m].addr,
 					r10_bio->sectors, 0);
 				rdev_dec_pending(rdev, conf->mddev);
-			} else if (bio != NULL && bio->bi_error) {
+			} else if (bio != NULL && bio->bi_status) {
 				fail = true;
 				if (!narrow_write_error(r10_bio, m)) {
 					md_error(conf->mddev, rdev);
@@ -3267,7 +3267,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
 				r10_bio->devs[i].repl_bio->bi_end_io = NULL;
 
 			bio = r10_bio->devs[i].bio;
-			bio->bi_error = -EIO;
+			bio->bi_status = BLK_STS_IOERR;
 			rcu_read_lock();
 			rdev = rcu_dereference(conf->mirrors[d].rdev);
 			if (rdev == NULL || test_bit(Faulty, &rdev->flags)) {
@@ -3309,7 +3309,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
 
 			/* Need to set up for writing to the replacement */
 			bio = r10_bio->devs[i].repl_bio;
-			bio->bi_error = -EIO;
+			bio->bi_status = BLK_STS_IOERR;
 
 			sector = r10_bio->devs[i].addr;
 			bio->bi_next = biolist;
@@ -3375,7 +3375,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
 
 		if (bio->bi_end_io == end_sync_read) {
 			md_sync_acct(bio->bi_bdev, nr_sectors);
-			bio->bi_error = 0;
+			bio->bi_status = 0;
 			generic_make_request(bio);
 		}
 	}
@@ -4394,7 +4394,7 @@ read_more:
 	read_bio->bi_end_io = end_reshape_read;
 	bio_set_op_attrs(read_bio, REQ_OP_READ, 0);
 	read_bio->bi_flags &= (~0UL << BIO_RESET_BITS);
-	read_bio->bi_error = 0;
+	read_bio->bi_status = 0;
 	read_bio->bi_vcnt = 0;
 	read_bio->bi_iter.bi_size = 0;
 	r10_bio->master_bio = read_bio;
@@ -4638,7 +4638,7 @@ static void end_reshape_write(struct bio *bio)
 		rdev = conf->mirrors[d].rdev;
 	}
 
-	if (bio->bi_error) {
+	if (bio->bi_status) {
 		/* FIXME should record badblock */
 		md_error(mddev, rdev);
 	}
diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index 4c00bc248287..3ed6a0d89db8 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -572,7 +572,7 @@ static void r5l_log_endio(struct bio *bio)
 	struct r5l_log *log = io->log;
 	unsigned long flags;
 
-	if (bio->bi_error)
+	if (bio->bi_status)
 		md_error(log->rdev->mddev, log->rdev);
 
 	bio_put(bio);
@@ -1247,7 +1247,7 @@ static void r5l_log_flush_endio(struct bio *bio)
 	unsigned long flags;
 	struct r5l_io_unit *io;
 
-	if (bio->bi_error)
+	if (bio->bi_status)
 		md_error(log->rdev->mddev, log->rdev);
 
 	spin_lock_irqsave(&log->io_list_lock, flags);
diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c
index 5d25bebf3328..09e04be34e5f 100644
--- a/drivers/md/raid5-ppl.c
+++ b/drivers/md/raid5-ppl.c
@@ -397,7 +397,7 @@ static void ppl_log_endio(struct bio *bio)
 
 	pr_debug("%s: seq: %llu\n", __func__, io->seq);
 
-	if (bio->bi_error)
+	if (bio->bi_status)
 		md_error(ppl_conf->mddev, log->rdev);
 
 	list_for_each_entry_safe(sh, next, &io->stripe_list, log_list) {
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 9c4f7659f8b1..e1bdc320f664 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -2476,7 +2476,7 @@ static void raid5_end_read_request(struct bio * bi)
 
 	pr_debug("end_read_request %llu/%d, count: %d, error %d.\n",
 		(unsigned long long)sh->sector, i, atomic_read(&sh->count),
-		bi->bi_error);
+		bi->bi_status);
 	if (i == disks) {
 		bio_reset(bi);
 		BUG();
@@ -2496,7 +2496,7 @@ static void raid5_end_read_request(struct bio * bi)
 		s = sh->sector + rdev->new_data_offset;
 	else
 		s = sh->sector + rdev->data_offset;
-	if (!bi->bi_error) {
+	if (!bi->bi_status) {
 		set_bit(R5_UPTODATE, &sh->dev[i].flags);
 		if (test_bit(R5_ReadError, &sh->dev[i].flags)) {
 			/* Note that this cannot happen on a
@@ -2613,7 +2613,7 @@ static void raid5_end_write_request(struct bio *bi)
 	}
 	pr_debug("end_write_request %llu/%d, count %d, error: %d.\n",
 		(unsigned long long)sh->sector, i, atomic_read(&sh->count),
-		bi->bi_error);
+		bi->bi_status);
 	if (i == disks) {
 		bio_reset(bi);
 		BUG();
@@ -2621,14 +2621,14 @@ static void raid5_end_write_request(struct bio *bi)
 	}
 
 	if (replacement) {
-		if (bi->bi_error)
+		if (bi->bi_status)
 			md_error(conf->mddev, rdev);
 		else if (is_badblock(rdev, sh->sector,
 				     STRIPE_SECTORS,
 				     &first_bad, &bad_sectors))
 			set_bit(R5_MadeGoodRepl, &sh->dev[i].flags);
 	} else {
-		if (bi->bi_error) {
+		if (bi->bi_status) {
 			set_bit(STRIPE_DEGRADED, &sh->state);
 			set_bit(WriteErrorSeen, &rdev->flags);
 			set_bit(R5_WriteError, &sh->dev[i].flags);
@@ -2649,7 +2649,7 @@ static void raid5_end_write_request(struct bio *bi)
 	}
 	rdev_dec_pending(rdev, conf->mddev);
 
-	if (sh->batch_head && bi->bi_error && !replacement)
+	if (sh->batch_head && bi->bi_status && !replacement)
 		set_bit(STRIPE_BATCH_ERR, &sh->batch_head->state);
 
 	bio_reset(bi);
@@ -3381,7 +3381,7 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
 			sh->dev[i].sector + STRIPE_SECTORS) {
 			struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector);
 
-			bi->bi_error = -EIO;
+			bi->bi_status = BLK_STS_IOERR;
 			md_write_end(conf->mddev);
 			bio_endio(bi);
 			bi = nextbi;
@@ -3403,7 +3403,7 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
 		       sh->dev[i].sector + STRIPE_SECTORS) {
 			struct bio *bi2 = r5_next_bio(bi, sh->dev[i].sector);
 
-			bi->bi_error = -EIO;
+			bi->bi_status = BLK_STS_IOERR;
 			md_write_end(conf->mddev);
 			bio_endio(bi);
 			bi = bi2;
@@ -3429,7 +3429,7 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
 				struct bio *nextbi =
 					r5_next_bio(bi, sh->dev[i].sector);
 
-				bi->bi_error = -EIO;
+				bi->bi_status = BLK_STS_IOERR;
 				bio_endio(bi);
 				bi = nextbi;
 			}
@@ -5144,7 +5144,7 @@ static void raid5_align_endio(struct bio *bi)
 	struct mddev *mddev;
 	struct r5conf *conf;
 	struct md_rdev *rdev;
-	int error = bi->bi_error;
+	blk_status_t error = bi->bi_status;
 
 	bio_put(bi);
 
@@ -5721,7 +5721,7 @@ static void raid5_make_request(struct mddev *mddev, struct bio * bi)
 			release_stripe_plug(mddev, sh);
 		} else {
 			/* cannot get stripe for read-ahead, just give-up */
-			bi->bi_error = -EIO;
+			bi->bi_status = BLK_STS_IOERR;
 			break;
 		}
 	}
diff --git a/drivers/nvdimm/blk.c b/drivers/nvdimm/blk.c
index 822198a75e96..79eb9fb358d5 100644
--- a/drivers/nvdimm/blk.c
+++ b/drivers/nvdimm/blk.c
@@ -186,7 +186,7 @@ static blk_qc_t nd_blk_make_request(struct request_queue *q, struct bio *bio)
 	 * another kernel subsystem, and we just pass it through.
 	 */
 	if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
-		bio->bi_error = -EIO;
+		bio->bi_status = BLK_STS_IOERR;
 		goto out;
 	}
 
@@ -205,7 +205,7 @@ static blk_qc_t nd_blk_make_request(struct request_queue *q, struct bio *bio)
 					"io error in %s sector %lld, len %d,\n",
 					(rw == READ) ? "READ" : "WRITE",
 					(unsigned long long) iter.bi_sector, len);
-			bio->bi_error = err;
+			bio->bi_status = errno_to_blk_status(err);
 			break;
 		}
 	}
diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c
index 983718b8fd9b..31b2d14e210d 100644
--- a/drivers/nvdimm/btt.c
+++ b/drivers/nvdimm/btt.c
@@ -1210,7 +1210,7 @@ static blk_qc_t btt_make_request(struct request_queue *q, struct bio *bio)
 	 * another kernel subsystem, and we just pass it through.
 	 */
 	if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
-		bio->bi_error = -EIO;
+		bio->bi_status = BLK_STS_IOERR;
 		goto out;
 	}
 
@@ -1232,7 +1232,7 @@ static blk_qc_t btt_make_request(struct request_queue *q, struct bio *bio)
 					(op_is_write(bio_op(bio))) ? "WRITE" :
 					"READ",
 					(unsigned long long) iter.bi_sector, len);
-			bio->bi_error = err;
+			bio->bi_status = errno_to_blk_status(err);
 			break;
 		}
 	}
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index c544d466ea51..7bd383aeea14 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -49,19 +49,19 @@ static struct nd_region *to_region(struct pmem_device *pmem)
 	return to_nd_region(to_dev(pmem)->parent);
 }
 
-static int pmem_clear_poison(struct pmem_device *pmem, phys_addr_t offset,
-		unsigned int len)
+static blk_status_t pmem_clear_poison(struct pmem_device *pmem,
+		phys_addr_t offset, unsigned int len)
 {
 	struct device *dev = to_dev(pmem);
 	sector_t sector;
 	long cleared;
-	int rc = 0;
+	blk_status_t rc = BLK_STS_OK;
 
 	sector = (offset - pmem->data_offset) / 512;
 
 	cleared = nvdimm_clear_poison(dev, pmem->phys_addr + offset, len);
 	if (cleared < len)
-		rc = -EIO;
+		rc = BLK_STS_IOERR;
 	if (cleared > 0 && cleared / 512) {
 		cleared /= 512;
 		dev_dbg(dev, "%s: %#llx clear %ld sector%s\n", __func__,
@@ -84,7 +84,7 @@ static void write_pmem(void *pmem_addr, struct page *page,
 	kunmap_atomic(mem);
 }
 
-static int read_pmem(struct page *page, unsigned int off,
+static blk_status_t read_pmem(struct page *page, unsigned int off,
 		void *pmem_addr, unsigned int len)
 {
 	int rc;
@@ -93,15 +93,15 @@ static int read_pmem(struct page *page, unsigned int off,
 	rc = memcpy_mcsafe(mem + off, pmem_addr, len);
 	kunmap_atomic(mem);
 	if (rc)
-		return -EIO;
-	return 0;
+		return BLK_STS_IOERR;
+	return BLK_STS_OK;
 }
 
-static int pmem_do_bvec(struct pmem_device *pmem, struct page *page,
+static blk_status_t pmem_do_bvec(struct pmem_device *pmem, struct page *page,
 			unsigned int len, unsigned int off, bool is_write,
 			sector_t sector)
 {
-	int rc = 0;
+	blk_status_t rc = BLK_STS_OK;
 	bool bad_pmem = false;
 	phys_addr_t pmem_off = sector * 512 + pmem->data_offset;
 	void *pmem_addr = pmem->virt_addr + pmem_off;
@@ -111,7 +111,7 @@ static int pmem_do_bvec(struct pmem_device *pmem, struct page *page,
 
 	if (!is_write) {
 		if (unlikely(bad_pmem))
-			rc = -EIO;
+			rc = BLK_STS_IOERR;
 		else {
 			rc = read_pmem(page, off, pmem_addr, len);
 			flush_dcache_page(page);
@@ -149,7 +149,7 @@ static int pmem_do_bvec(struct pmem_device *pmem, struct page *page,
 
 static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
 {
-	int rc = 0;
+	blk_status_t rc = 0;
 	bool do_acct;
 	unsigned long start;
 	struct bio_vec bvec;
@@ -166,7 +166,7 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
 				bvec.bv_offset, op_is_write(bio_op(bio)),
 				iter.bi_sector);
 		if (rc) {
-			bio->bi_error = rc;
+			bio->bi_status = rc;
 			break;
 		}
 	}
@@ -184,7 +184,7 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector,
 		       struct page *page, bool is_write)
 {
 	struct pmem_device *pmem = bdev->bd_queue->queuedata;
-	int rc;
+	blk_status_t rc;
 
 	rc = pmem_do_bvec(pmem, page, PAGE_SIZE, 0, is_write, sector);
 
@@ -197,7 +197,7 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector,
 	if (rc == 0)
 		page_endio(page, is_write, 0);
 
-	return rc;
+	return blk_status_to_errno(rc);
 }
 
 /* see "strong" declaration in tools/testing/nvdimm/pmem-dax.c */
diff --git a/drivers/nvme/target/io-cmd.c b/drivers/nvme/target/io-cmd.c
index c77940d80fc8..40128793e613 100644
--- a/drivers/nvme/target/io-cmd.c
+++ b/drivers/nvme/target/io-cmd.c
@@ -21,7 +21,7 @@ static void nvmet_bio_done(struct bio *bio)
 	struct nvmet_req *req = bio->bi_private;
 
 	nvmet_req_complete(req,
-		bio->bi_error ? NVME_SC_INTERNAL | NVME_SC_DNR : 0);
+		bio->bi_status ? NVME_SC_INTERNAL | NVME_SC_DNR : 0);
 
 	if (bio != &req->inline_bio)
 		bio_put(bio);
@@ -145,7 +145,7 @@ static void nvmet_execute_discard(struct nvmet_req *req)
 		bio->bi_private = req;
 		bio->bi_end_io = nvmet_bio_done;
 		if (status) {
-			bio->bi_error = -EIO;
+			bio->bi_status = BLK_STS_IOERR;
 			bio_endio(bio);
 		} else {
 			submit_bio(bio);
diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c
index bb069ebe4aa6..75373624604b 100644
--- a/drivers/target/target_core_iblock.c
+++ b/drivers/target/target_core_iblock.c
@@ -296,8 +296,8 @@ static void iblock_bio_done(struct bio *bio)
 	struct se_cmd *cmd = bio->bi_private;
 	struct iblock_req *ibr = cmd->priv;
 
-	if (bio->bi_error) {
-		pr_err("bio error: %p,  err: %d\n", bio, bio->bi_error);
+	if (bio->bi_status) {
+		pr_err("bio error: %p,  err: %d\n", bio, bio->bi_status);
 		/*
 		 * Bump the ib_bio_err_cnt and release bio.
 		 */
@@ -354,11 +354,11 @@ static void iblock_end_io_flush(struct bio *bio)
 {
 	struct se_cmd *cmd = bio->bi_private;
 
-	if (bio->bi_error)
-		pr_err("IBLOCK: cache flush failed: %d\n", bio->bi_error);
+	if (bio->bi_status)
+		pr_err("IBLOCK: cache flush failed: %d\n", bio->bi_status);
 
 	if (cmd) {
-		if (bio->bi_error)
+		if (bio->bi_status)
 			target_complete_cmd(cmd, SAM_STAT_CHECK_CONDITION);
 		else
 			target_complete_cmd(cmd, SAM_STAT_GOOD);
diff --git a/fs/block_dev.c b/fs/block_dev.c
index c1dc393ad6b9..bcd8e16a34e1 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -262,8 +262,8 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter,
 	if (vecs != inline_vecs)
 		kfree(vecs);
 
-	if (unlikely(bio.bi_error))
-		return bio.bi_error;
+	if (unlikely(bio.bi_status))
+		return blk_status_to_errno(bio.bi_status);
 	return ret;
 }
 
@@ -288,16 +288,18 @@ static void blkdev_bio_end_io(struct bio *bio)
 	bool should_dirty = dio->should_dirty;
 
 	if (dio->multi_bio && !atomic_dec_and_test(&dio->ref)) {
-		if (bio->bi_error && !dio->bio.bi_error)
-			dio->bio.bi_error = bio->bi_error;
+		if (bio->bi_status && !dio->bio.bi_status)
+			dio->bio.bi_status = bio->bi_status;
 	} else {
 		if (!dio->is_sync) {
 			struct kiocb *iocb = dio->iocb;
-			ssize_t ret = dio->bio.bi_error;
+			ssize_t ret;
 
-			if (likely(!ret)) {
+			if (likely(!dio->bio.bi_status)) {
 				ret = dio->size;
 				iocb->ki_pos += ret;
+			} else {
+				ret = blk_status_to_errno(dio->bio.bi_status);
 			}
 
 			dio->iocb->ki_complete(iocb, ret, 0);
@@ -363,7 +365,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
 
 		ret = bio_iov_iter_get_pages(bio, iter);
 		if (unlikely(ret)) {
-			bio->bi_error = -EIO;
+			bio->bi_status = BLK_STS_IOERR;
 			bio_endio(bio);
 			break;
 		}
@@ -413,7 +415,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
 	__set_current_state(TASK_RUNNING);
 
 	if (!ret)
-		ret = dio->bio.bi_error;
+		ret = blk_status_to_errno(dio->bio.bi_status);
 	if (likely(!ret))
 		ret = dio->size;
 
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index b8622e4d1744..d87ac27a5f2b 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -310,7 +310,8 @@ struct btrfs_dio_private {
 	 * The original bio may be split to several sub-bios, this is
 	 * done during endio of sub-bios
 	 */
-	int (*subio_endio)(struct inode *, struct btrfs_io_bio *, int);
+	blk_status_t (*subio_endio)(struct inode *, struct btrfs_io_bio *,
+			blk_status_t);
 };
 
 /*
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
index ab14c2e635ca..4ded1c3f92b8 100644
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -2129,7 +2129,7 @@ static void btrfsic_bio_end_io(struct bio *bp)
 	/* mutex is not held! This is not save if IO is not yet completed
 	 * on umount */
 	iodone_w_error = 0;
-	if (bp->bi_error)
+	if (bp->bi_status)
 		iodone_w_error = 1;
 
 	BUG_ON(NULL == block);
@@ -2143,7 +2143,7 @@ static void btrfsic_bio_end_io(struct bio *bp)
 		if ((dev_state->state->print_mask &
 		     BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
 			pr_info("bio_end_io(err=%d) for %c @%llu (%s/%llu/%d)\n",
-			       bp->bi_error,
+			       bp->bi_status,
 			       btrfsic_get_block_type(dev_state->state, block),
 			       block->logical_bytenr, dev_state->name,
 			       block->dev_bytenr, block->mirror_num);
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 10e6b282d09d..9ac55b266e78 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -155,7 +155,7 @@ static void end_compressed_bio_read(struct bio *bio)
 	unsigned long index;
 	int ret;
 
-	if (bio->bi_error)
+	if (bio->bi_status)
 		cb->errors = 1;
 
 	/* if there are more bios still pending for this compressed
@@ -268,7 +268,7 @@ static void end_compressed_bio_write(struct bio *bio)
 	struct page *page;
 	unsigned long index;
 
-	if (bio->bi_error)
+	if (bio->bi_status)
 		cb->errors = 1;
 
 	/* if there are more bios still pending for this compressed
@@ -287,7 +287,7 @@ static void end_compressed_bio_write(struct bio *bio)
 					 cb->start,
 					 cb->start + cb->len - 1,
 					 NULL,
-					 bio->bi_error ? 0 : 1);
+					 bio->bi_status ? 0 : 1);
 	cb->compressed_pages[0]->mapping = NULL;
 
 	end_compressed_writeback(inode, cb);
@@ -320,7 +320,7 @@ out:
  * This also checksums the file bytes and gets things ready for
  * the end io hooks.
  */
-int btrfs_submit_compressed_write(struct inode *inode, u64 start,
+blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
 				 unsigned long len, u64 disk_start,
 				 unsigned long compressed_len,
 				 struct page **compressed_pages,
@@ -335,13 +335,13 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
 	struct page *page;
 	u64 first_byte = disk_start;
 	struct block_device *bdev;
-	int ret;
+	blk_status_t ret;
 	int skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
 
 	WARN_ON(start & ((u64)PAGE_SIZE - 1));
 	cb = kmalloc(compressed_bio_size(fs_info, compressed_len), GFP_NOFS);
 	if (!cb)
-		return -ENOMEM;
+		return BLK_STS_RESOURCE;
 	refcount_set(&cb->pending_bios, 0);
 	cb->errors = 0;
 	cb->inode = inode;
@@ -358,7 +358,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
 	bio = compressed_bio_alloc(bdev, first_byte, GFP_NOFS);
 	if (!bio) {
 		kfree(cb);
-		return -ENOMEM;
+		return BLK_STS_RESOURCE;
 	}
 	bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
 	bio->bi_private = cb;
@@ -368,17 +368,17 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
 	/* create and submit bios for the compressed pages */
 	bytes_left = compressed_len;
 	for (pg_index = 0; pg_index < cb->nr_pages; pg_index++) {
+		int submit = 0;
+
 		page = compressed_pages[pg_index];
 		page->mapping = inode->i_mapping;
 		if (bio->bi_iter.bi_size)
-			ret = io_tree->ops->merge_bio_hook(page, 0,
+			submit = io_tree->ops->merge_bio_hook(page, 0,
 							   PAGE_SIZE,
 							   bio, 0);
-		else
-			ret = 0;
 
 		page->mapping = NULL;
-		if (ret || bio_add_page(bio, page, PAGE_SIZE, 0) <
+		if (submit || bio_add_page(bio, page, PAGE_SIZE, 0) <
 		    PAGE_SIZE) {
 			bio_get(bio);
 
@@ -400,7 +400,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
 
 			ret = btrfs_map_bio(fs_info, bio, 0, 1);
 			if (ret) {
-				bio->bi_error = ret;
+				bio->bi_status = ret;
 				bio_endio(bio);
 			}
 
@@ -434,7 +434,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
 
 	ret = btrfs_map_bio(fs_info, bio, 0, 1);
 	if (ret) {
-		bio->bi_error = ret;
+		bio->bi_status = ret;
 		bio_endio(bio);
 	}
 
@@ -569,7 +569,7 @@ next:
  * After the compressed pages are read, we copy the bytes into the
  * bio we were passed and then call the bio end_io calls
  */
-int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
+blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 				 int mirror_num, unsigned long bio_flags)
 {
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
@@ -586,7 +586,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 	u64 em_len;
 	u64 em_start;
 	struct extent_map *em;
-	int ret = -ENOMEM;
+	blk_status_t ret = BLK_STS_RESOURCE;
 	int faili = 0;
 	u32 *sums;
 
@@ -600,7 +600,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 				   PAGE_SIZE);
 	read_unlock(&em_tree->lock);
 	if (!em)
-		return -EIO;
+		return BLK_STS_IOERR;
 
 	compressed_len = em->block_len;
 	cb = kmalloc(compressed_bio_size(fs_info, compressed_len), GFP_NOFS);
@@ -659,19 +659,19 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 	refcount_set(&cb->pending_bios, 1);
 
 	for (pg_index = 0; pg_index < nr_pages; pg_index++) {
+		int submit = 0;
+
 		page = cb->compressed_pages[pg_index];
 		page->mapping = inode->i_mapping;
 		page->index = em_start >> PAGE_SHIFT;
 
 		if (comp_bio->bi_iter.bi_size)
-			ret = tree->ops->merge_bio_hook(page, 0,
+			submit = tree->ops->merge_bio_hook(page, 0,
 							PAGE_SIZE,
 							comp_bio, 0);
-		else
-			ret = 0;
 
 		page->mapping = NULL;
-		if (ret || bio_add_page(comp_bio, page, PAGE_SIZE, 0) <
+		if (submit || bio_add_page(comp_bio, page, PAGE_SIZE, 0) <
 		    PAGE_SIZE) {
 			bio_get(comp_bio);
 
@@ -697,7 +697,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 
 			ret = btrfs_map_bio(fs_info, comp_bio, mirror_num, 0);
 			if (ret) {
-				comp_bio->bi_error = ret;
+				comp_bio->bi_status = ret;
 				bio_endio(comp_bio);
 			}
 
@@ -726,7 +726,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 
 	ret = btrfs_map_bio(fs_info, comp_bio, mirror_num, 0);
 	if (ret) {
-		comp_bio->bi_error = ret;
+		comp_bio->bi_status = ret;
 		bio_endio(comp_bio);
 	}
 
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
index 39ec43ab8df1..680d4265d601 100644
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@@ -48,12 +48,12 @@ int btrfs_decompress_buf2page(const char *buf, unsigned long buf_start,
 			      unsigned long total_out, u64 disk_start,
 			      struct bio *bio);
 
-int btrfs_submit_compressed_write(struct inode *inode, u64 start,
+blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
 				  unsigned long len, u64 disk_start,
 				  unsigned long compressed_len,
 				  struct page **compressed_pages,
 				  unsigned long nr_pages);
-int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
+blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 				 int mirror_num, unsigned long bio_flags);
 
 enum btrfs_compression_type {
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 643c70d2b2e6..d2da0a52d560 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3078,8 +3078,8 @@ int btrfs_find_name_in_ext_backref(struct btrfs_path *path,
 struct btrfs_dio_private;
 int btrfs_del_csums(struct btrfs_trans_handle *trans,
 		    struct btrfs_fs_info *fs_info, u64 bytenr, u64 len);
-int btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u32 *dst);
-int btrfs_lookup_bio_sums_dio(struct inode *inode, struct bio *bio,
+blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u32 *dst);
+blk_status_t btrfs_lookup_bio_sums_dio(struct inode *inode, struct bio *bio,
 			      u64 logical_offset);
 int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
 			     struct btrfs_root *root,
@@ -3094,7 +3094,7 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
 int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
 			   struct btrfs_root *root,
 			   struct btrfs_ordered_sum *sums);
-int btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
+blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
 		       u64 file_start, int contig);
 int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
 			     struct list_head *list, int search_commit);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 8685d67185d0..46accc75ad5a 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -87,7 +87,7 @@ struct btrfs_end_io_wq {
 	bio_end_io_t *end_io;
 	void *private;
 	struct btrfs_fs_info *info;
-	int error;
+	blk_status_t status;
 	enum btrfs_wq_endio_type metadata;
 	struct list_head list;
 	struct btrfs_work work;
@@ -131,7 +131,7 @@ struct async_submit_bio {
 	 */
 	u64 bio_offset;
 	struct btrfs_work work;
-	int error;
+	blk_status_t status;
 };
 
 /*
@@ -799,7 +799,7 @@ static void end_workqueue_bio(struct bio *bio)
 	btrfs_work_func_t func;
 
 	fs_info = end_io_wq->info;
-	end_io_wq->error = bio->bi_error;
+	end_io_wq->status = bio->bi_status;
 
 	if (bio_op(bio) == REQ_OP_WRITE) {
 		if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA) {
@@ -836,19 +836,19 @@ static void end_workqueue_bio(struct bio *bio)
 	btrfs_queue_work(wq, &end_io_wq->work);
 }
 
-int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
+blk_status_t btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
 			enum btrfs_wq_endio_type metadata)
 {
 	struct btrfs_end_io_wq *end_io_wq;
 
 	end_io_wq = kmem_cache_alloc(btrfs_end_io_wq_cache, GFP_NOFS);
 	if (!end_io_wq)
-		return -ENOMEM;
+		return BLK_STS_RESOURCE;
 
 	end_io_wq->private = bio->bi_private;
 	end_io_wq->end_io = bio->bi_end_io;
 	end_io_wq->info = info;
-	end_io_wq->error = 0;
+	end_io_wq->status = 0;
 	end_io_wq->bio = bio;
 	end_io_wq->metadata = metadata;
 
@@ -868,14 +868,14 @@ unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info)
 static void run_one_async_start(struct btrfs_work *work)
 {
 	struct async_submit_bio *async;
-	int ret;
+	blk_status_t ret;
 
 	async = container_of(work, struct  async_submit_bio, work);
 	ret = async->submit_bio_start(async->inode, async->bio,
 				      async->mirror_num, async->bio_flags,
 				      async->bio_offset);
 	if (ret)
-		async->error = ret;
+		async->status = ret;
 }
 
 static void run_one_async_done(struct btrfs_work *work)
@@ -898,8 +898,8 @@ static void run_one_async_done(struct btrfs_work *work)
 		wake_up(&fs_info->async_submit_wait);
 
 	/* If an error occurred we just want to clean up the bio and move on */
-	if (async->error) {
-		async->bio->bi_error = async->error;
+	if (async->status) {
+		async->bio->bi_status = async->status;
 		bio_endio(async->bio);
 		return;
 	}
@@ -916,18 +916,17 @@ static void run_one_async_free(struct btrfs_work *work)
 	kfree(async);
 }
 
-int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
-			struct bio *bio, int mirror_num,
-			unsigned long bio_flags,
-			u64 bio_offset,
-			extent_submit_bio_hook_t *submit_bio_start,
-			extent_submit_bio_hook_t *submit_bio_done)
+blk_status_t btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info,
+		struct inode *inode, struct bio *bio, int mirror_num,
+		unsigned long bio_flags, u64 bio_offset,
+		extent_submit_bio_hook_t *submit_bio_start,
+		extent_submit_bio_hook_t *submit_bio_done)
 {
 	struct async_submit_bio *async;
 
 	async = kmalloc(sizeof(*async), GFP_NOFS);
 	if (!async)
-		return -ENOMEM;
+		return BLK_STS_RESOURCE;
 
 	async->inode = inode;
 	async->bio = bio;
@@ -941,7 +940,7 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
 	async->bio_flags = bio_flags;
 	async->bio_offset = bio_offset;
 
-	async->error = 0;
+	async->status = 0;
 
 	atomic_inc(&fs_info->nr_async_submits);
 
@@ -959,7 +958,7 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
 	return 0;
 }
 
-static int btree_csum_one_bio(struct bio *bio)
+static blk_status_t btree_csum_one_bio(struct bio *bio)
 {
 	struct bio_vec *bvec;
 	struct btrfs_root *root;
@@ -972,12 +971,12 @@ static int btree_csum_one_bio(struct bio *bio)
 			break;
 	}
 
-	return ret;
+	return errno_to_blk_status(ret);
 }
 
-static int __btree_submit_bio_start(struct inode *inode, struct bio *bio,
-				    int mirror_num, unsigned long bio_flags,
-				    u64 bio_offset)
+static blk_status_t __btree_submit_bio_start(struct inode *inode,
+		struct bio *bio, int mirror_num, unsigned long bio_flags,
+		u64 bio_offset)
 {
 	/*
 	 * when we're called for a write, we're already in the async
@@ -986,11 +985,11 @@ static int __btree_submit_bio_start(struct inode *inode, struct bio *bio,
 	return btree_csum_one_bio(bio);
 }
 
-static int __btree_submit_bio_done(struct inode *inode, struct bio *bio,
-				 int mirror_num, unsigned long bio_flags,
-				 u64 bio_offset)
+static blk_status_t __btree_submit_bio_done(struct inode *inode,
+		struct bio *bio, int mirror_num, unsigned long bio_flags,
+		u64 bio_offset)
 {
-	int ret;
+	blk_status_t ret;
 
 	/*
 	 * when we're called for a write, we're already in the async
@@ -998,7 +997,7 @@ static int __btree_submit_bio_done(struct inode *inode, struct bio *bio,
 	 */
 	ret = btrfs_map_bio(btrfs_sb(inode->i_sb), bio, mirror_num, 1);
 	if (ret) {
-		bio->bi_error = ret;
+		bio->bi_status = ret;
 		bio_endio(bio);
 	}
 	return ret;
@@ -1015,13 +1014,13 @@ static int check_async_write(unsigned long bio_flags)
 	return 1;
 }
 
-static int btree_submit_bio_hook(struct inode *inode, struct bio *bio,
+static blk_status_t btree_submit_bio_hook(struct inode *inode, struct bio *bio,
 				 int mirror_num, unsigned long bio_flags,
 				 u64 bio_offset)
 {
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
 	int async = check_async_write(bio_flags);
-	int ret;
+	blk_status_t ret;
 
 	if (bio_op(bio) != REQ_OP_WRITE) {
 		/*
@@ -1054,7 +1053,7 @@ static int btree_submit_bio_hook(struct inode *inode, struct bio *bio,
 	return 0;
 
 out_w_error:
-	bio->bi_error = ret;
+	bio->bi_status = ret;
 	bio_endio(bio);
 	return ret;
 }
@@ -1820,7 +1819,7 @@ static void end_workqueue_fn(struct btrfs_work *work)
 	end_io_wq = container_of(work, struct btrfs_end_io_wq, work);
 	bio = end_io_wq->bio;
 
-	bio->bi_error = end_io_wq->error;
+	bio->bi_status = end_io_wq->status;
 	bio->bi_private = end_io_wq->private;
 	bio->bi_end_io = end_io_wq->end_io;
 	kmem_cache_free(btrfs_end_io_wq_cache, end_io_wq);
@@ -3495,11 +3494,11 @@ static void btrfs_end_empty_barrier(struct bio *bio)
  * any device where the flush fails with eopnotsupp are flagged as not-barrier
  * capable
  */
-static int write_dev_flush(struct btrfs_device *device, int wait)
+static blk_status_t write_dev_flush(struct btrfs_device *device, int wait)
 {
 	struct request_queue *q = bdev_get_queue(device->bdev);
 	struct bio *bio;
-	int ret = 0;
+	blk_status_t ret = 0;
 
 	if (!test_bit(QUEUE_FLAG_WC, &q->queue_flags))
 		return 0;
@@ -3511,8 +3510,8 @@ static int write_dev_flush(struct btrfs_device *device, int wait)
 
 		wait_for_completion(&device->flush_wait);
 
-		if (bio->bi_error) {
-			ret = bio->bi_error;
+		if (bio->bi_status) {
+			ret = bio->bi_status;
 			btrfs_dev_stat_inc_and_print(device,
 				BTRFS_DEV_STAT_FLUSH_ERRS);
 		}
@@ -3531,7 +3530,7 @@ static int write_dev_flush(struct btrfs_device *device, int wait)
 	device->flush_bio = NULL;
 	bio = btrfs_io_bio_alloc(GFP_NOFS, 0);
 	if (!bio)
-		return -ENOMEM;
+		return BLK_STS_RESOURCE;
 
 	bio->bi_end_io = btrfs_end_empty_barrier;
 	bio->bi_bdev = device->bdev;
@@ -3556,7 +3555,7 @@ static int barrier_all_devices(struct btrfs_fs_info *info)
 	struct btrfs_device *dev;
 	int errors_send = 0;
 	int errors_wait = 0;
-	int ret;
+	blk_status_t ret;
 
 	/* send down all the barriers */
 	head = &info->fs_devices->devices;
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 21f1ceb85b76..c581927555f3 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -118,13 +118,13 @@ int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid,
 int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid);
 u32 btrfs_csum_data(const char *data, u32 seed, size_t len);
 void btrfs_csum_final(u32 crc, u8 *result);
-int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
+blk_status_t btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
 			enum btrfs_wq_endio_type metadata);
-int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
-			struct bio *bio, int mirror_num,
-			unsigned long bio_flags, u64 bio_offset,
-			extent_submit_bio_hook_t *submit_bio_start,
-			extent_submit_bio_hook_t *submit_bio_done);
+blk_status_t btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info,
+		struct inode *inode, struct bio *bio, int mirror_num,
+		unsigned long bio_flags, u64 bio_offset,
+		extent_submit_bio_hook_t *submit_bio_start,
+		extent_submit_bio_hook_t *submit_bio_done);
 unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info);
 int btrfs_write_tree_block(struct extent_buffer *buf);
 int btrfs_wait_tree_block_writeback(struct extent_buffer *buf);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index d8da3edf2ac3..35cbb6ceb70d 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2399,6 +2399,7 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
 	struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
 	struct bio *bio;
 	int read_mode = 0;
+	blk_status_t status;
 	int ret;
 
 	BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE);
@@ -2431,11 +2432,12 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
 		"Repair Read Error: submitting new read[%#x] to this_mirror=%d, in_validation=%d",
 		read_mode, failrec->this_mirror, failrec->in_validation);
 
-	ret = tree->ops->submit_bio_hook(inode, bio, failrec->this_mirror,
+	status = tree->ops->submit_bio_hook(inode, bio, failrec->this_mirror,
 					 failrec->bio_flags, 0);
-	if (ret) {
+	if (status) {
 		free_io_failure(BTRFS_I(inode), failrec);
 		bio_put(bio);
+		ret = blk_status_to_errno(status);
 	}
 
 	return ret;
@@ -2474,6 +2476,7 @@ void end_extent_writepage(struct page *page, int err, u64 start, u64 end)
  */
 static void end_bio_extent_writepage(struct bio *bio)
 {
+	int error = blk_status_to_errno(bio->bi_status);
 	struct bio_vec *bvec;
 	u64 start;
 	u64 end;
@@ -2503,7 +2506,7 @@ static void end_bio_extent_writepage(struct bio *bio)
 		start = page_offset(page);
 		end = start + bvec->bv_offset + bvec->bv_len - 1;
 
-		end_extent_writepage(page, bio->bi_error, start, end);
+		end_extent_writepage(page, error, start, end);
 		end_page_writeback(page);
 	}
 
@@ -2536,7 +2539,7 @@ endio_readpage_release_extent(struct extent_io_tree *tree, u64 start, u64 len,
 static void end_bio_extent_readpage(struct bio *bio)
 {
 	struct bio_vec *bvec;
-	int uptodate = !bio->bi_error;
+	int uptodate = !bio->bi_status;
 	struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
 	struct extent_io_tree *tree;
 	u64 offset = 0;
@@ -2556,7 +2559,7 @@ static void end_bio_extent_readpage(struct bio *bio)
 
 		btrfs_debug(fs_info,
 			"end_bio_extent_readpage: bi_sector=%llu, err=%d, mirror=%u",
-			(u64)bio->bi_iter.bi_sector, bio->bi_error,
+			(u64)bio->bi_iter.bi_sector, bio->bi_status,
 			io_bio->mirror_num);
 		tree = &BTRFS_I(inode)->io_tree;
 
@@ -2615,7 +2618,7 @@ static void end_bio_extent_readpage(struct bio *bio)
 				ret = bio_readpage_error(bio, offset, page,
 							 start, end, mirror);
 				if (ret == 0) {
-					uptodate = !bio->bi_error;
+					uptodate = !bio->bi_status;
 					offset += len;
 					continue;
 				}
@@ -2673,7 +2676,7 @@ readpage_ok:
 		endio_readpage_release_extent(tree, extent_start, extent_len,
 					      uptodate);
 	if (io_bio->end_io)
-		io_bio->end_io(io_bio, bio->bi_error);
+		io_bio->end_io(io_bio, blk_status_to_errno(bio->bi_status));
 	bio_put(bio);
 }
 
@@ -2743,7 +2746,7 @@ struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs)
 static int __must_check submit_one_bio(struct bio *bio, int mirror_num,
 				       unsigned long bio_flags)
 {
-	int ret = 0;
+	blk_status_t ret = 0;
 	struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
 	struct page *page = bvec->bv_page;
 	struct extent_io_tree *tree = bio->bi_private;
@@ -2761,7 +2764,7 @@ static int __must_check submit_one_bio(struct bio *bio, int mirror_num,
 		btrfsic_submit_bio(bio);
 
 	bio_put(bio);
-	return ret;
+	return blk_status_to_errno(ret);
 }
 
 static int merge_bio(struct extent_io_tree *tree, struct page *page,
@@ -3707,7 +3710,7 @@ static void end_bio_extent_buffer_writepage(struct bio *bio)
 		BUG_ON(!eb);
 		done = atomic_dec_and_test(&eb->io_pages);
 
-		if (bio->bi_error ||
+		if (bio->bi_status ||
 		    test_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags)) {
 			ClearPageUptodate(page);
 			set_btree_ioerr(page);
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 1eafa2f0ede3..487ca0207cb6 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -92,9 +92,9 @@ struct btrfs_inode;
 struct btrfs_io_bio;
 struct io_failure_record;
 
-typedef	int (extent_submit_bio_hook_t)(struct inode *inode, struct bio *bio,
-				       int mirror_num, unsigned long bio_flags,
-				       u64 bio_offset);
+typedef	blk_status_t (extent_submit_bio_hook_t)(struct inode *inode,
+		struct bio *bio, int mirror_num, unsigned long bio_flags,
+		u64 bio_offset);
 struct extent_io_ops {
 	/*
 	 * The following callbacks must be allways defined, the function
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index 64fcb31d7163..5b1c7090e546 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -160,7 +160,7 @@ static void btrfs_io_bio_endio_readpage(struct btrfs_io_bio *bio, int err)
 	kfree(bio->csum_allocated);
 }
 
-static int __btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio,
+static blk_status_t __btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio,
 				   u64 logical_offset, u32 *dst, int dio)
 {
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
@@ -182,7 +182,7 @@ static int __btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio,
 
 	path = btrfs_alloc_path();
 	if (!path)
-		return -ENOMEM;
+		return BLK_STS_RESOURCE;
 
 	nblocks = bio->bi_iter.bi_size >> inode->i_sb->s_blocksize_bits;
 	if (!dst) {
@@ -191,7 +191,7 @@ static int __btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio,
 					csum_size, GFP_NOFS);
 			if (!btrfs_bio->csum_allocated) {
 				btrfs_free_path(path);
-				return -ENOMEM;
+				return BLK_STS_RESOURCE;
 			}
 			btrfs_bio->csum = btrfs_bio->csum_allocated;
 			btrfs_bio->end_io = btrfs_io_bio_endio_readpage;
@@ -303,12 +303,12 @@ next:
 	return 0;
 }
 
-int btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u32 *dst)
+blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u32 *dst)
 {
 	return __btrfs_lookup_bio_sums(inode, bio, 0, dst, 0);
 }
 
-int btrfs_lookup_bio_sums_dio(struct inode *inode, struct bio *bio, u64 offset)
+blk_status_t btrfs_lookup_bio_sums_dio(struct inode *inode, struct bio *bio, u64 offset)
 {
 	return __btrfs_lookup_bio_sums(inode, bio, offset, NULL, 1);
 }
@@ -433,7 +433,7 @@ fail:
 	return ret;
 }
 
-int btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
+blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
 		       u64 file_start, int contig)
 {
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
@@ -452,7 +452,7 @@ int btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
 	sums = kzalloc(btrfs_ordered_sum_size(fs_info, bio->bi_iter.bi_size),
 		       GFP_NOFS);
 	if (!sums)
-		return -ENOMEM;
+		return BLK_STS_RESOURCE;
 
 	sums->len = bio->bi_iter.bi_size;
 	INIT_LIST_HEAD(&sums->list);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 758b2666885e..ea7cae1003eb 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -842,13 +842,12 @@ retry:
 				NULL, EXTENT_LOCKED | EXTENT_DELALLOC,
 				PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
 				PAGE_SET_WRITEBACK);
-		ret = btrfs_submit_compressed_write(inode,
+		if (btrfs_submit_compressed_write(inode,
 				    async_extent->start,
 				    async_extent->ram_size,
 				    ins.objectid,
 				    ins.offset, async_extent->pages,
-				    async_extent->nr_pages);
-		if (ret) {
+				    async_extent->nr_pages)) {
 			struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
 			struct page *p = async_extent->pages[0];
 			const u64 start = async_extent->start;
@@ -1901,11 +1900,11 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
  * At IO completion time the cums attached on the ordered extent record
  * are inserted into the btree
  */
-static int __btrfs_submit_bio_start(struct inode *inode, struct bio *bio,
-				    int mirror_num, unsigned long bio_flags,
-				    u64 bio_offset)
+static blk_status_t __btrfs_submit_bio_start(struct inode *inode,
+		struct bio *bio, int mirror_num, unsigned long bio_flags,
+		u64 bio_offset)
 {
-	int ret = 0;
+	blk_status_t ret = 0;
 
 	ret = btrfs_csum_one_bio(inode, bio, 0, 0);
 	BUG_ON(ret); /* -ENOMEM */
@@ -1920,16 +1919,16 @@ static int __btrfs_submit_bio_start(struct inode *inode, struct bio *bio,
  * At IO completion time the cums attached on the ordered extent record
  * are inserted into the btree
  */
-static int __btrfs_submit_bio_done(struct inode *inode, struct bio *bio,
-			  int mirror_num, unsigned long bio_flags,
-			  u64 bio_offset)
+static blk_status_t __btrfs_submit_bio_done(struct inode *inode,
+		struct bio *bio, int mirror_num, unsigned long bio_flags,
+		u64 bio_offset)
 {
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
-	int ret;
+	blk_status_t ret;
 
 	ret = btrfs_map_bio(fs_info, bio, mirror_num, 1);
 	if (ret) {
-		bio->bi_error = ret;
+		bio->bi_status = ret;
 		bio_endio(bio);
 	}
 	return ret;
@@ -1939,14 +1938,14 @@ static int __btrfs_submit_bio_done(struct inode *inode, struct bio *bio,
  * extent_io.c submission hook. This does the right thing for csum calculation
  * on write, or reading the csums from the tree before a read
  */
-static int btrfs_submit_bio_hook(struct inode *inode, struct bio *bio,
+static blk_status_t btrfs_submit_bio_hook(struct inode *inode, struct bio *bio,
 			  int mirror_num, unsigned long bio_flags,
 			  u64 bio_offset)
 {
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 	enum btrfs_wq_endio_type metadata = BTRFS_WQ_ENDIO_DATA;
-	int ret = 0;
+	blk_status_t ret = 0;
 	int skip_sum;
 	int async = !atomic_read(&BTRFS_I(inode)->sync_writers);
 
@@ -1991,8 +1990,8 @@ mapit:
 	ret = btrfs_map_bio(fs_info, bio, mirror_num, 0);
 
 out:
-	if (ret < 0) {
-		bio->bi_error = ret;
+	if (ret) {
+		bio->bi_status = ret;
 		bio_endio(bio);
 	}
 	return ret;
@@ -8037,7 +8036,7 @@ static void btrfs_retry_endio_nocsum(struct bio *bio)
 	struct bio_vec *bvec;
 	int i;
 
-	if (bio->bi_error)
+	if (bio->bi_status)
 		goto end;
 
 	ASSERT(bio->bi_vcnt == 1);
@@ -8116,7 +8115,7 @@ static void btrfs_retry_endio(struct bio *bio)
 	int ret;
 	int i;
 
-	if (bio->bi_error)
+	if (bio->bi_status)
 		goto end;
 
 	uptodate = 1;
@@ -8141,8 +8140,8 @@ end:
 	bio_put(bio);
 }
 
-static int __btrfs_subio_endio_read(struct inode *inode,
-				    struct btrfs_io_bio *io_bio, int err)
+static blk_status_t __btrfs_subio_endio_read(struct inode *inode,
+		struct btrfs_io_bio *io_bio, blk_status_t err)
 {
 	struct btrfs_fs_info *fs_info;
 	struct bio_vec *bvec;
@@ -8184,7 +8183,7 @@ try_again:
 				io_bio->mirror_num,
 				btrfs_retry_endio, &done);
 		if (ret) {
-			err = ret;
+			err = errno_to_blk_status(ret);
 			goto next;
 		}
 
@@ -8211,8 +8210,8 @@ next:
 	return err;
 }
 
-static int btrfs_subio_endio_read(struct inode *inode,
-				  struct btrfs_io_bio *io_bio, int err)
+static blk_status_t btrfs_subio_endio_read(struct inode *inode,
+		struct btrfs_io_bio *io_bio, blk_status_t err)
 {
 	bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
 
@@ -8232,7 +8231,7 @@ static void btrfs_endio_direct_read(struct bio *bio)
 	struct inode *inode = dip->inode;
 	struct bio *dio_bio;
 	struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
-	int err = bio->bi_error;
+	blk_status_t err = bio->bi_status;
 
 	if (dip->flags & BTRFS_DIO_ORIG_BIO_SUBMITTED)
 		err = btrfs_subio_endio_read(inode, io_bio, err);
@@ -8243,11 +8242,11 @@ static void btrfs_endio_direct_read(struct bio *bio)
 
 	kfree(dip);
 
-	dio_bio->bi_error = bio->bi_error;
+	dio_bio->bi_status = bio->bi_status;
 	dio_end_io(dio_bio);
 
 	if (io_bio->end_io)
-		io_bio->end_io(io_bio, err);
+		io_bio->end_io(io_bio, blk_status_to_errno(err));
 	bio_put(bio);
 }
 
@@ -8299,20 +8298,20 @@ static void btrfs_endio_direct_write(struct bio *bio)
 	struct bio *dio_bio = dip->dio_bio;
 
 	__endio_write_update_ordered(dip->inode, dip->logical_offset,
-				     dip->bytes, !bio->bi_error);
+				     dip->bytes, !bio->bi_status);
 
 	kfree(dip);
 
-	dio_bio->bi_error = bio->bi_error;
+	dio_bio->bi_status = bio->bi_status;
 	dio_end_io(dio_bio);
 	bio_put(bio);
 }
 
-static int __btrfs_submit_bio_start_direct_io(struct inode *inode,
+static blk_status_t __btrfs_submit_bio_start_direct_io(struct inode *inode,
 				    struct bio *bio, int mirror_num,
 				    unsigned long bio_flags, u64 offset)
 {
-	int ret;
+	blk_status_t ret;
 	ret = btrfs_csum_one_bio(inode, bio, offset, 1);
 	BUG_ON(ret); /* -ENOMEM */
 	return 0;
@@ -8321,7 +8320,7 @@ static int __btrfs_submit_bio_start_direct_io(struct inode *inode,
 static void btrfs_end_dio_bio(struct bio *bio)
 {
 	struct btrfs_dio_private *dip = bio->bi_private;
-	int err = bio->bi_error;
+	blk_status_t err = bio->bi_status;
 
 	if (err)
 		btrfs_warn(BTRFS_I(dip->inode)->root->fs_info,
@@ -8351,7 +8350,7 @@ static void btrfs_end_dio_bio(struct bio *bio)
 	if (dip->errors) {
 		bio_io_error(dip->orig_bio);
 	} else {
-		dip->dio_bio->bi_error = 0;
+		dip->dio_bio->bi_status = 0;
 		bio_endio(dip->orig_bio);
 	}
 out:
@@ -8368,14 +8367,14 @@ static struct bio *btrfs_dio_bio_alloc(struct block_device *bdev,
 	return bio;
 }
 
-static inline int btrfs_lookup_and_bind_dio_csum(struct inode *inode,
+static inline blk_status_t btrfs_lookup_and_bind_dio_csum(struct inode *inode,
 						 struct btrfs_dio_private *dip,
 						 struct bio *bio,
 						 u64 file_offset)
 {
 	struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
 	struct btrfs_io_bio *orig_io_bio = btrfs_io_bio(dip->orig_bio);
-	int ret;
+	blk_status_t ret;
 
 	/*
 	 * We load all the csum data we need when we submit
@@ -8406,7 +8405,7 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
 	struct btrfs_dio_private *dip = bio->bi_private;
 	bool write = bio_op(bio) == REQ_OP_WRITE;
-	int ret;
+	blk_status_t ret;
 
 	if (async_submit)
 		async_submit = !atomic_read(&BTRFS_I(inode)->sync_writers);
@@ -8649,7 +8648,7 @@ free_ordered:
 	 * callbacks - they require an allocated dip and a clone of dio_bio.
 	 */
 	if (io_bio && dip) {
-		io_bio->bi_error = -EIO;
+		io_bio->bi_status = BLK_STS_IOERR;
 		bio_endio(io_bio);
 		/*
 		 * The end io callbacks free our dip, do the final put on io_bio
@@ -8668,7 +8667,7 @@ free_ordered:
 			unlock_extent(&BTRFS_I(inode)->io_tree, file_offset,
 			      file_offset + dio_bio->bi_iter.bi_size - 1);
 
-		dio_bio->bi_error = -EIO;
+		dio_bio->bi_status = BLK_STS_IOERR;
 		/*
 		 * Releases and cleans up our dio_bio, no need to bio_put()
 		 * nor bio_endio()/bio_io_error() against dio_bio.
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index d8ea0eb76325..f3d30d9ea8f9 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -871,7 +871,7 @@ static void free_raid_bio(struct btrfs_raid_bio *rbio)
  * this frees the rbio and runs through all the bios in the
  * bio_list and calls end_io on them
  */
-static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, int err)
+static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, blk_status_t err)
 {
 	struct bio *cur = bio_list_get(&rbio->bio_list);
 	struct bio *next;
@@ -884,7 +884,7 @@ static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, int err)
 	while (cur) {
 		next = cur->bi_next;
 		cur->bi_next = NULL;
-		cur->bi_error = err;
+		cur->bi_status = err;
 		bio_endio(cur);
 		cur = next;
 	}
@@ -897,7 +897,7 @@ static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, int err)
 static void raid_write_end_io(struct bio *bio)
 {
 	struct btrfs_raid_bio *rbio = bio->bi_private;
-	int err = bio->bi_error;
+	blk_status_t err = bio->bi_status;
 	int max_errors;
 
 	if (err)
@@ -914,7 +914,7 @@ static void raid_write_end_io(struct bio *bio)
 	max_errors = (rbio->operation == BTRFS_RBIO_PARITY_SCRUB) ?
 		     0 : rbio->bbio->max_errors;
 	if (atomic_read(&rbio->error) > max_errors)
-		err = -EIO;
+		err = BLK_STS_IOERR;
 
 	rbio_orig_end_io(rbio, err);
 }
@@ -1092,7 +1092,7 @@ static int rbio_add_io_page(struct btrfs_raid_bio *rbio,
 		 * devices or if they are not contiguous
 		 */
 		if (last_end == disk_start && stripe->dev->bdev &&
-		    !last->bi_error &&
+		    !last->bi_status &&
 		    last->bi_bdev == stripe->dev->bdev) {
 			ret = bio_add_page(last, page, PAGE_SIZE, 0);
 			if (ret == PAGE_SIZE)
@@ -1448,7 +1448,7 @@ static void raid_rmw_end_io(struct bio *bio)
 {
 	struct btrfs_raid_bio *rbio = bio->bi_private;
 
-	if (bio->bi_error)
+	if (bio->bi_status)
 		fail_bio_stripe(rbio, bio);
 	else
 		set_bio_pages_uptodate(bio);
@@ -1991,7 +1991,7 @@ static void raid_recover_end_io(struct bio *bio)
 	 * we only read stripe pages off the disk, set them
 	 * up to date if there were no errors
 	 */
-	if (bio->bi_error)
+	if (bio->bi_status)
 		fail_bio_stripe(rbio, bio);
 	else
 		set_bio_pages_uptodate(bio);
@@ -2530,7 +2530,7 @@ static void raid56_parity_scrub_end_io(struct bio *bio)
 {
 	struct btrfs_raid_bio *rbio = bio->bi_private;
 
-	if (bio->bi_error)
+	if (bio->bi_status)
 		fail_bio_stripe(rbio, bio);
 	else
 		set_bio_pages_uptodate(bio);
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index c7b45eb2403d..ba5595d19de1 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -95,7 +95,7 @@ struct scrub_bio {
 	struct scrub_ctx	*sctx;
 	struct btrfs_device	*dev;
 	struct bio		*bio;
-	int			err;
+	blk_status_t		status;
 	u64			logical;
 	u64			physical;
 #if SCRUB_PAGES_PER_WR_BIO >= SCRUB_PAGES_PER_RD_BIO
@@ -1668,14 +1668,14 @@ leave_nomem:
 
 struct scrub_bio_ret {
 	struct completion event;
-	int error;
+	blk_status_t status;
 };
 
 static void scrub_bio_wait_endio(struct bio *bio)
 {
 	struct scrub_bio_ret *ret = bio->bi_private;
 
-	ret->error = bio->bi_error;
+	ret->status = bio->bi_status;
 	complete(&ret->event);
 }
 
@@ -1693,7 +1693,7 @@ static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info,
 	int ret;
 
 	init_completion(&done.event);
-	done.error = 0;
+	done.status = 0;
 	bio->bi_iter.bi_sector = page->logical >> 9;
 	bio->bi_private = &done;
 	bio->bi_end_io = scrub_bio_wait_endio;
@@ -1705,7 +1705,7 @@ static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info,
 		return ret;
 
 	wait_for_completion(&done.event);
-	if (done.error)
+	if (done.status)
 		return -EIO;
 
 	return 0;
@@ -1937,7 +1937,7 @@ again:
 		bio->bi_bdev = sbio->dev->bdev;
 		bio->bi_iter.bi_sector = sbio->physical >> 9;
 		bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
-		sbio->err = 0;
+		sbio->status = 0;
 	} else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
 		   spage->physical_for_dev_replace ||
 		   sbio->logical + sbio->page_count * PAGE_SIZE !=
@@ -1992,7 +1992,7 @@ static void scrub_wr_bio_end_io(struct bio *bio)
 	struct scrub_bio *sbio = bio->bi_private;
 	struct btrfs_fs_info *fs_info = sbio->dev->fs_info;
 
-	sbio->err = bio->bi_error;
+	sbio->status = bio->bi_status;
 	sbio->bio = bio;
 
 	btrfs_init_work(&sbio->work, btrfs_scrubwrc_helper,
@@ -2007,7 +2007,7 @@ static void scrub_wr_bio_end_io_worker(struct btrfs_work *work)
 	int i;
 
 	WARN_ON(sbio->page_count > SCRUB_PAGES_PER_WR_BIO);
-	if (sbio->err) {
+	if (sbio->status) {
 		struct btrfs_dev_replace *dev_replace =
 			&sbio->sctx->fs_info->dev_replace;
 
@@ -2341,7 +2341,7 @@ again:
 		bio->bi_bdev = sbio->dev->bdev;
 		bio->bi_iter.bi_sector = sbio->physical >> 9;
 		bio_set_op_attrs(bio, REQ_OP_READ, 0);
-		sbio->err = 0;
+		sbio->status = 0;
 	} else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
 		   spage->physical ||
 		   sbio->logical + sbio->page_count * PAGE_SIZE !=
@@ -2377,7 +2377,7 @@ static void scrub_missing_raid56_end_io(struct bio *bio)
 	struct scrub_block *sblock = bio->bi_private;
 	struct btrfs_fs_info *fs_info = sblock->sctx->fs_info;
 
-	if (bio->bi_error)
+	if (bio->bi_status)
 		sblock->no_io_error_seen = 0;
 
 	bio_put(bio);
@@ -2588,7 +2588,7 @@ static void scrub_bio_end_io(struct bio *bio)
 	struct scrub_bio *sbio = bio->bi_private;
 	struct btrfs_fs_info *fs_info = sbio->dev->fs_info;
 
-	sbio->err = bio->bi_error;
+	sbio->status = bio->bi_status;
 	sbio->bio = bio;
 
 	btrfs_queue_work(fs_info->scrub_workers, &sbio->work);
@@ -2601,7 +2601,7 @@ static void scrub_bio_end_io_worker(struct btrfs_work *work)
 	int i;
 
 	BUG_ON(sbio->page_count > SCRUB_PAGES_PER_RD_BIO);
-	if (sbio->err) {
+	if (sbio->status) {
 		for (i = 0; i < sbio->page_count; i++) {
 			struct scrub_page *spage = sbio->pagev[i];
 
@@ -3004,7 +3004,7 @@ static void scrub_parity_bio_endio(struct bio *bio)
 	struct scrub_parity *sparity = (struct scrub_parity *)bio->bi_private;
 	struct btrfs_fs_info *fs_info = sparity->sctx->fs_info;
 
-	if (bio->bi_error)
+	if (bio->bi_status)
 		bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap,
 			  sparity->nsectors);
 
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 017b67daa3bb..84a495967e0a 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -6042,9 +6042,10 @@ static void btrfs_end_bio(struct bio *bio)
 	struct btrfs_bio *bbio = bio->bi_private;
 	int is_orig_bio = 0;
 
-	if (bio->bi_error) {
+	if (bio->bi_status) {
 		atomic_inc(&bbio->error);
-		if (bio->bi_error == -EIO || bio->bi_error == -EREMOTEIO) {
+		if (bio->bi_status == BLK_STS_IOERR ||
+		    bio->bi_status == BLK_STS_TARGET) {
 			unsigned int stripe_index =
 				btrfs_io_bio(bio)->stripe_index;
 			struct btrfs_device *dev;
@@ -6082,13 +6083,13 @@ static void btrfs_end_bio(struct bio *bio)
 		 * beyond the tolerance of the btrfs bio
 		 */
 		if (atomic_read(&bbio->error) > bbio->max_errors) {
-			bio->bi_error = -EIO;
+			bio->bi_status = BLK_STS_IOERR;
 		} else {
 			/*
 			 * this bio is actually up to date, we didn't
 			 * go over the max number of errors
 			 */
-			bio->bi_error = 0;
+			bio->bi_status = 0;
 		}
 
 		btrfs_end_bbio(bbio, bio);
@@ -6199,7 +6200,7 @@ static void bbio_error(struct btrfs_bio *bbio, struct bio *bio, u64 logical)
 
 		btrfs_io_bio(bio)->mirror_num = bbio->mirror_num;
 		bio->bi_iter.bi_sector = logical >> 9;
-		bio->bi_error = -EIO;
+		bio->bi_status = BLK_STS_IOERR;
 		btrfs_end_bbio(bbio, bio);
 	}
 }
diff --git a/fs/buffer.c b/fs/buffer.c
index 161be58c5cb0..306b720f7383 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -3038,7 +3038,7 @@ static void end_bio_bh_io_sync(struct bio *bio)
 	if (unlikely(bio_flagged(bio, BIO_QUIET)))
 		set_bit(BH_Quiet, &bh->b_state);
 
-	bh->b_end_io(bh, !bio->bi_error);
+	bh->b_end_io(bh, !bio->bi_status);
 	bio_put(bio);
 }
 
diff --git a/fs/crypto/bio.c b/fs/crypto/bio.c
index a409a84f1bca..6181e9526860 100644
--- a/fs/crypto/bio.c
+++ b/fs/crypto/bio.c
@@ -129,7 +129,7 @@ int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk,
 			goto errout;
 		}
 		err = submit_bio_wait(bio);
-		if ((err == 0) && bio->bi_error)
+		if (err == 0 && bio->bi_status)
 			err = -EIO;
 		bio_put(bio);
 		if (err)
diff --git a/fs/direct-io.c b/fs/direct-io.c
index bb711e4b86c2..e8baaabebf13 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -294,7 +294,7 @@ static void dio_aio_complete_work(struct work_struct *work)
 	dio_complete(dio, 0, true);
 }
 
-static int dio_bio_complete(struct dio *dio, struct bio *bio);
+static blk_status_t dio_bio_complete(struct dio *dio, struct bio *bio);
 
 /*
  * Asynchronous IO callback. 
@@ -473,11 +473,11 @@ static struct bio *dio_await_one(struct dio *dio)
 /*
  * Process one completed BIO.  No locks are held.
  */
-static int dio_bio_complete(struct dio *dio, struct bio *bio)
+static blk_status_t dio_bio_complete(struct dio *dio, struct bio *bio)
 {
 	struct bio_vec *bvec;
 	unsigned i;
-	int err = bio->bi_error;
+	blk_status_t err = bio->bi_status;
 
 	if (err)
 		dio->io_error = -EIO;
@@ -536,7 +536,7 @@ static inline int dio_bio_reap(struct dio *dio, struct dio_submit *sdio)
 			bio = dio->bio_list;
 			dio->bio_list = bio->bi_private;
 			spin_unlock_irqrestore(&dio->bio_lock, flags);
-			ret2 = dio_bio_complete(dio, bio);
+			ret2 = blk_status_to_errno(dio_bio_complete(dio, bio));
 			if (ret == 0)
 				ret = ret2;
 		}
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 1a82138ba739..930ca0fc9a0f 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -85,7 +85,7 @@ static void ext4_finish_bio(struct bio *bio)
 		}
 #endif
 
-		if (bio->bi_error) {
+		if (bio->bi_status) {
 			SetPageError(page);
 			mapping_set_error(page->mapping, -EIO);
 		}
@@ -104,7 +104,7 @@ static void ext4_finish_bio(struct bio *bio)
 				continue;
 			}
 			clear_buffer_async_write(bh);
-			if (bio->bi_error)
+			if (bio->bi_status)
 				buffer_io_error(bh);
 		} while ((bh = bh->b_this_page) != head);
 		bit_spin_unlock(BH_Uptodate_Lock, &head->b_state);
@@ -303,24 +303,25 @@ static void ext4_end_bio(struct bio *bio)
 		      bdevname(bio->bi_bdev, b),
 		      (long long) bio->bi_iter.bi_sector,
 		      (unsigned) bio_sectors(bio),
-		      bio->bi_error)) {
+		      bio->bi_status)) {
 		ext4_finish_bio(bio);
 		bio_put(bio);
 		return;
 	}
 	bio->bi_end_io = NULL;
 
-	if (bio->bi_error) {
+	if (bio->bi_status) {
 		struct inode *inode = io_end->inode;
 
 		ext4_warning(inode->i_sb, "I/O error %d writing to inode %lu "
 			     "(offset %llu size %ld starting block %llu)",
-			     bio->bi_error, inode->i_ino,
+			     bio->bi_status, inode->i_ino,
 			     (unsigned long long) io_end->offset,
 			     (long) io_end->size,
 			     (unsigned long long)
 			     bi_sector >> (inode->i_blkbits - 9));
-		mapping_set_error(inode->i_mapping, bio->bi_error);
+		mapping_set_error(inode->i_mapping,
+				blk_status_to_errno(bio->bi_status));
 	}
 
 	if (io_end->flag & EXT4_IO_END_UNWRITTEN) {
diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c
index a81b829d56de..40a5497b0f60 100644
--- a/fs/ext4/readpage.c
+++ b/fs/ext4/readpage.c
@@ -73,7 +73,7 @@ static void mpage_end_io(struct bio *bio)
 	int i;
 
 	if (ext4_bio_encrypted(bio)) {
-		if (bio->bi_error) {
+		if (bio->bi_status) {
 			fscrypt_release_ctx(bio->bi_private);
 		} else {
 			fscrypt_decrypt_bio_pages(bio->bi_private, bio);
@@ -83,7 +83,7 @@ static void mpage_end_io(struct bio *bio)
 	bio_for_each_segment_all(bv, bio, i) {
 		struct page *page = bv->bv_page;
 
-		if (!bio->bi_error) {
+		if (!bio->bi_status) {
 			SetPageUptodate(page);
 		} else {
 			ClearPageUptodate(page);
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 7c0f6bdf817d..36fe82012a33 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -58,12 +58,12 @@ static void f2fs_read_end_io(struct bio *bio)
 #ifdef CONFIG_F2FS_FAULT_INJECTION
 	if (time_to_inject(F2FS_P_SB(bio->bi_io_vec->bv_page), FAULT_IO)) {
 		f2fs_show_injection_info(FAULT_IO);
-		bio->bi_error = -EIO;
+		bio->bi_status = BLK_STS_IOERR;
 	}
 #endif
 
 	if (f2fs_bio_encrypted(bio)) {
-		if (bio->bi_error) {
+		if (bio->bi_status) {
 			fscrypt_release_ctx(bio->bi_private);
 		} else {
 			fscrypt_decrypt_bio_pages(bio->bi_private, bio);
@@ -74,7 +74,7 @@ static void f2fs_read_end_io(struct bio *bio)
 	bio_for_each_segment_all(bvec, bio, i) {
 		struct page *page = bvec->bv_page;
 
-		if (!bio->bi_error) {
+		if (!bio->bi_status) {
 			if (!PageUptodate(page))
 				SetPageUptodate(page);
 		} else {
@@ -102,14 +102,14 @@ static void f2fs_write_end_io(struct bio *bio)
 			unlock_page(page);
 			mempool_free(page, sbi->write_io_dummy);
 
-			if (unlikely(bio->bi_error))
+			if (unlikely(bio->bi_status))
 				f2fs_stop_checkpoint(sbi, true);
 			continue;
 		}
 
 		fscrypt_pullback_bio_page(&page, true);
 
-		if (unlikely(bio->bi_error)) {
+		if (unlikely(bio->bi_status)) {
 			mapping_set_error(page->mapping, -EIO);
 			f2fs_stop_checkpoint(sbi, true);
 		}
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 96845854e7ee..ea9f455d94ba 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -749,7 +749,7 @@ static void f2fs_submit_discard_endio(struct bio *bio)
 {
 	struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private;
 
-	dc->error = bio->bi_error;
+	dc->error = blk_status_to_errno(bio->bi_status);
 	dc->state = D_DONE;
 	complete(&dc->wait);
 	bio_put(bio);
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 13ebf15a4db0..885d36e7a29f 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -170,7 +170,7 @@ static u64 gfs2_log_bmap(struct gfs2_sbd *sdp)
  */
 
 static void gfs2_end_log_write_bh(struct gfs2_sbd *sdp, struct bio_vec *bvec,
-				  int error)
+				  blk_status_t error)
 {
 	struct buffer_head *bh, *next;
 	struct page *page = bvec->bv_page;
@@ -209,13 +209,13 @@ static void gfs2_end_log_write(struct bio *bio)
 	struct page *page;
 	int i;
 
-	if (bio->bi_error)
-		fs_err(sdp, "Error %d writing to log\n", bio->bi_error);
+	if (bio->bi_status)
+		fs_err(sdp, "Error %d writing to log\n", bio->bi_status);
 
 	bio_for_each_segment_all(bvec, bio, i) {
 		page = bvec->bv_page;
 		if (page_has_buffers(page))
-			gfs2_end_log_write_bh(sdp, bvec, bio->bi_error);
+			gfs2_end_log_write_bh(sdp, bvec, bio->bi_status);
 		else
 			mempool_free(page, gfs2_page_pool);
 	}
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 663ffc135ef3..fabe1614f879 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -201,7 +201,7 @@ static void gfs2_meta_read_endio(struct bio *bio)
 		do {
 			struct buffer_head *next = bh->b_this_page;
 			len -= bh->b_size;
-			bh->b_end_io(bh, !bio->bi_error);
+			bh->b_end_io(bh, !bio->bi_status);
 			bh = next;
 		} while (bh && len);
 	}
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index ed67548b286c..83953cdbbc6c 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -176,10 +176,10 @@ static void end_bio_io_page(struct bio *bio)
 {
 	struct page *page = bio->bi_private;
 
-	if (!bio->bi_error)
+	if (!bio->bi_status)
 		SetPageUptodate(page);
 	else
-		pr_warn("error %d reading superblock\n", bio->bi_error);
+		pr_warn("error %d reading superblock\n", bio->bi_status);
 	unlock_page(page);
 }
 
diff --git a/fs/iomap.c b/fs/iomap.c
index 4b10892967a5..18f2f2b8ba2c 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -672,8 +672,8 @@ static void iomap_dio_bio_end_io(struct bio *bio)
 	struct iomap_dio *dio = bio->bi_private;
 	bool should_dirty = (dio->flags & IOMAP_DIO_DIRTY);
 
-	if (bio->bi_error)
-		iomap_dio_set_error(dio, bio->bi_error);
+	if (bio->bi_status)
+		iomap_dio_set_error(dio, blk_status_to_errno(bio->bi_status));
 
 	if (atomic_dec_and_test(&dio->ref)) {
 		if (is_sync_kiocb(dio->iocb)) {
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index bb1da1feafeb..a21f0e9eecd4 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -2205,7 +2205,7 @@ static void lbmIODone(struct bio *bio)
 
 	bp->l_flag |= lbmDONE;
 
-	if (bio->bi_error) {
+	if (bio->bi_status) {
 		bp->l_flag |= lbmERROR;
 
 		jfs_err("lbmIODone: I/O error in JFS log");
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index 489aaa1403e5..ce93db3aef3c 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -280,7 +280,7 @@ static void metapage_read_end_io(struct bio *bio)
 {
 	struct page *page = bio->bi_private;
 
-	if (bio->bi_error) {
+	if (bio->bi_status) {
 		printk(KERN_ERR "metapage_read_end_io: I/O error\n");
 		SetPageError(page);
 	}
@@ -337,7 +337,7 @@ static void metapage_write_end_io(struct bio *bio)
 
 	BUG_ON(!PagePrivate(page));
 
-	if (bio->bi_error) {
+	if (bio->bi_status) {
 		printk(KERN_ERR "metapage_write_end_io: I/O error\n");
 		SetPageError(page);
 	}
diff --git a/fs/mpage.c b/fs/mpage.c
index baff8f820c29..9524fdde00c2 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -50,7 +50,8 @@ static void mpage_end_io(struct bio *bio)
 
 	bio_for_each_segment_all(bv, bio, i) {
 		struct page *page = bv->bv_page;
-		page_endio(page, op_is_write(bio_op(bio)), bio->bi_error);
+		page_endio(page, op_is_write(bio_op(bio)),
+				blk_status_to_errno(bio->bi_status));
 	}
 
 	bio_put(bio);
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 0ca370d23ddb..d8863a804b15 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -188,7 +188,7 @@ static void bl_end_io_read(struct bio *bio)
 {
 	struct parallel_io *par = bio->bi_private;
 
-	if (bio->bi_error) {
+	if (bio->bi_status) {
 		struct nfs_pgio_header *header = par->data;
 
 		if (!header->pnfs_error)
@@ -319,7 +319,7 @@ static void bl_end_io_write(struct bio *bio)
 	struct parallel_io *par = bio->bi_private;
 	struct nfs_pgio_header *header = par->data;
 
-	if (bio->bi_error) {
+	if (bio->bi_status) {
 		if (!header->pnfs_error)
 			header->pnfs_error = -EIO;
 		pnfs_set_lo_fail(header->lseg);
diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c
index 6f87b2ac1aeb..e73c86d9855c 100644
--- a/fs/nilfs2/segbuf.c
+++ b/fs/nilfs2/segbuf.c
@@ -338,7 +338,7 @@ static void nilfs_end_bio_write(struct bio *bio)
 {
 	struct nilfs_segment_buffer *segbuf = bio->bi_private;
 
-	if (bio->bi_error)
+	if (bio->bi_status)
 		atomic_inc(&segbuf->sb_err);
 
 	bio_put(bio);
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 0da0332725aa..ffe003982d95 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -516,9 +516,9 @@ static void o2hb_bio_end_io(struct bio *bio)
 {
 	struct o2hb_bio_wait_ctxt *wc = bio->bi_private;
 
-	if (bio->bi_error) {
-		mlog(ML_ERROR, "IO Error %d\n", bio->bi_error);
-		wc->wc_error = bio->bi_error;
+	if (bio->bi_status) {
+		mlog(ML_ERROR, "IO Error %d\n", bio->bi_status);
+		wc->wc_error = blk_status_to_errno(bio->bi_status);
 	}
 
 	o2hb_bio_wait_dec(wc, 1);
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 09af0f7cd55e..76b6f988e2fa 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -276,7 +276,7 @@ xfs_end_io(
 	struct xfs_inode	*ip = XFS_I(ioend->io_inode);
 	xfs_off_t		offset = ioend->io_offset;
 	size_t			size = ioend->io_size;
-	int			error = ioend->io_bio->bi_error;
+	int			error;
 
 	/*
 	 * Just clean up the in-memory strutures if the fs has been shut down.
@@ -289,6 +289,7 @@ xfs_end_io(
 	/*
 	 * Clean up any COW blocks on an I/O error.
 	 */
+	error = blk_status_to_errno(ioend->io_bio->bi_status);
 	if (unlikely(error)) {
 		switch (ioend->io_type) {
 		case XFS_IO_COW:
@@ -332,7 +333,7 @@ xfs_end_bio(
 	else if (ioend->io_append_trans)
 		queue_work(mp->m_data_workqueue, &ioend->io_work);
 	else
-		xfs_destroy_ioend(ioend, bio->bi_error);
+		xfs_destroy_ioend(ioend, blk_status_to_errno(bio->bi_status));
 }
 
 STATIC int
@@ -500,7 +501,7 @@ xfs_submit_ioend(
 	 * time.
 	 */
 	if (status) {
-		ioend->io_bio->bi_error = status;
+		ioend->io_bio->bi_status = errno_to_blk_status(status);
 		bio_endio(ioend->io_bio);
 		return status;
 	}
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 62fa39276a24..15c7a484a5d2 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1213,8 +1213,11 @@ xfs_buf_bio_end_io(
 	 * don't overwrite existing errors - otherwise we can lose errors on
 	 * buffers that require multiple bios to complete.
 	 */
-	if (bio->bi_error)
-		cmpxchg(&bp->b_io_error, 0, bio->bi_error);
+	if (bio->bi_status) {
+		int error = blk_status_to_errno(bio->bi_status);
+
+		cmpxchg(&bp->b_io_error, 0, error);
+	}
 
 	if (!bp->b_error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ))
 		invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp));
diff --git a/include/linux/bio.h b/include/linux/bio.h
index d1b04b0e99cf..9455aada1399 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -414,7 +414,7 @@ extern void bio_endio(struct bio *);
 
 static inline void bio_io_error(struct bio *bio)
 {
-	bio->bi_error = -EIO;
+	bio->bi_status = BLK_STS_IOERR;
 	bio_endio(bio);
 }
 
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 59378939a8cd..dcd45b15a3a5 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -33,6 +33,9 @@ typedef u8 __bitwise blk_status_t;
 #define BLK_STS_RESOURCE	((__force blk_status_t)9)
 #define BLK_STS_IOERR		((__force blk_status_t)10)
 
+/* hack for device mapper, don't use elsewhere: */
+#define BLK_STS_DM_REQUEUE    ((__force blk_status_t)11)
+
 struct blk_issue_stat {
 	u64 stat;
 };
@@ -44,7 +47,7 @@ struct blk_issue_stat {
 struct bio {
 	struct bio		*bi_next;	/* request queue link */
 	struct block_device	*bi_bdev;
-	int			bi_error;
+	blk_status_t		bi_status;
 	unsigned int		bi_opf;		/* bottom bits req flags,
 						 * top bits REQ_OP. Use
 						 * accessors.
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 2a8871638453..76b6df862a12 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1782,7 +1782,7 @@ struct blk_integrity_iter {
 	const char		*disk_name;
 };
 
-typedef int (integrity_processing_fn) (struct blk_integrity_iter *);
+typedef blk_status_t (integrity_processing_fn) (struct blk_integrity_iter *);
 
 struct blk_integrity_profile {
 	integrity_processing_fn		*generate_fn;
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 5de5c53251ec..456da5017b32 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -72,7 +72,7 @@ typedef void (*dm_release_clone_request_fn) (struct request *clone);
  * 2   : The target wants to push back the io
  */
 typedef int (*dm_endio_fn) (struct dm_target *ti,
-			    struct bio *bio, int *error);
+			    struct bio *bio, blk_status_t *error);
 typedef int (*dm_request_endio_fn) (struct dm_target *ti,
 				    struct request *clone, blk_status_t error,
 				    union map_info *map_context);
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index f80fd33639e0..57d22571f306 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -225,14 +225,14 @@ static struct block_device *hib_resume_bdev;
 struct hib_bio_batch {
 	atomic_t		count;
 	wait_queue_head_t	wait;
-	int			error;
+	blk_status_t		error;
 };
 
 static void hib_init_batch(struct hib_bio_batch *hb)
 {
 	atomic_set(&hb->count, 0);
 	init_waitqueue_head(&hb->wait);
-	hb->error = 0;
+	hb->error = BLK_STS_OK;
 }
 
 static void hib_end_io(struct bio *bio)
@@ -240,7 +240,7 @@ static void hib_end_io(struct bio *bio)
 	struct hib_bio_batch *hb = bio->bi_private;
 	struct page *page = bio->bi_io_vec[0].bv_page;
 
-	if (bio->bi_error) {
+	if (bio->bi_status) {
 		printk(KERN_ALERT "Read-error on swap-device (%u:%u:%Lu)\n",
 				imajor(bio->bi_bdev->bd_inode),
 				iminor(bio->bi_bdev->bd_inode),
@@ -253,8 +253,8 @@ static void hib_end_io(struct bio *bio)
 		flush_icache_range((unsigned long)page_address(page),
 				   (unsigned long)page_address(page) + PAGE_SIZE);
 
-	if (bio->bi_error && !hb->error)
-		hb->error = bio->bi_error;
+	if (bio->bi_status && !hb->error)
+		hb->error = bio->bi_status;
 	if (atomic_dec_and_test(&hb->count))
 		wake_up(&hb->wait);
 
@@ -293,10 +293,10 @@ static int hib_submit_io(int op, int op_flags, pgoff_t page_off, void *addr,
 	return error;
 }
 
-static int hib_wait_io(struct hib_bio_batch *hb)
+static blk_status_t hib_wait_io(struct hib_bio_batch *hb)
 {
 	wait_event(hb->wait, atomic_read(&hb->count) == 0);
-	return hb->error;
+	return blk_status_to_errno(hb->error);
 }
 
 /*
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 193c5f5e3f79..bc364f86100a 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -867,7 +867,7 @@ static void blk_add_trace_split(void *ignore,
 
 		__blk_add_trace(bt, bio->bi_iter.bi_sector,
 				bio->bi_iter.bi_size, bio_op(bio), bio->bi_opf,
-				BLK_TA_SPLIT, bio->bi_error, sizeof(rpdu),
+				BLK_TA_SPLIT, bio->bi_status, sizeof(rpdu),
 				&rpdu);
 	}
 }
@@ -900,7 +900,7 @@ static void blk_add_trace_bio_remap(void *ignore,
 	r.sector_from = cpu_to_be64(from);
 
 	__blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size,
-			bio_op(bio), bio->bi_opf, BLK_TA_REMAP, bio->bi_error,
+			bio_op(bio), bio->bi_opf, BLK_TA_REMAP, bio->bi_status,
 			sizeof(r), &r);
 }
 
diff --git a/mm/page_io.c b/mm/page_io.c
index 23f6d0d3470f..2da71e627812 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -45,7 +45,7 @@ void end_swap_bio_write(struct bio *bio)
 {
 	struct page *page = bio->bi_io_vec[0].bv_page;
 
-	if (bio->bi_error) {
+	if (bio->bi_status) {
 		SetPageError(page);
 		/*
 		 * We failed to write the page out to swap-space.
@@ -118,7 +118,7 @@ static void end_swap_bio_read(struct bio *bio)
 {
 	struct page *page = bio->bi_io_vec[0].bv_page;
 
-	if (bio->bi_error) {
+	if (bio->bi_status) {
 		SetPageError(page);
 		ClearPageUptodate(page);
 		pr_alert("Read-error on swap-device (%u:%u:%llu)\n",
-- 
cgit v1.2.3


From f06345add95f388519e83ec398134853e0f64ac9 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Mon, 12 Jun 2017 11:22:46 -0600
Subject: blk-mq: fixup type of 'ret' in __blk_mq_try_issue_directly()

Should be a blk_status_t type, not an integer.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'block')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index da2f21961525..359d2dc0d414 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1406,7 +1406,7 @@ static void __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
 		.last = true,
 	};
 	blk_qc_t new_cookie;
-	int ret;
+	blk_status_t ret;
 	bool run_queue = true;
 
 	if (blk_mq_hctx_stopped(hctx)) {
-- 
cgit v1.2.3


From a462b950834945db22f94aa98181f861eff0574d Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Tue, 13 Jun 2017 08:07:33 -0700
Subject: block: Dedicated error code fixups

This patch fixes two sparse warnings introduced by the "dedicated
error codes for the block layer V3" patch series. These changes
have not been tested.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/bio.c    | 4 ++--
 block/t10-pi.c | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'block')

diff --git a/block/bio.c b/block/bio.c
index 7a5c8ed27f42..0e36ca5407b5 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1817,8 +1817,8 @@ again:
 	}
 
 	if (bio->bi_bdev && bio_flagged(bio, BIO_TRACE_COMPLETION)) {
-		trace_block_bio_complete(bdev_get_queue(bio->bi_bdev),
-					 bio, bio->bi_status);
+		trace_block_bio_complete(bdev_get_queue(bio->bi_bdev), bio,
+					 blk_status_to_errno(bio->bi_status));
 		bio_clear_flag(bio, BIO_TRACE_COMPLETION);
 	}
 
diff --git a/block/t10-pi.c b/block/t10-pi.c
index 350b3cbcf9e5..3416dadf7b15 100644
--- a/block/t10-pi.c
+++ b/block/t10-pi.c
@@ -91,7 +91,7 @@ static blk_status_t t10_pi_verify(struct blk_integrity_iter *iter,
 				       "(rcvd %u)\n", iter->disk_name,
 				       (unsigned long long)
 				       iter->seed, be32_to_cpu(pi->ref_tag));
-				return -EILSEQ;
+				return BLK_STS_PROTECTION;
 			}
 			break;
 		case 3:
-- 
cgit v1.2.3


From 6e15cf2a0bc1a75237ed8ae6293db707e471bb81 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 16 Jun 2017 18:15:18 +0200
Subject: blk-mq: mark blk_mq_rq_ctx_init static

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c | 5 ++---
 block/blk-mq.h | 2 --
 2 files changed, 2 insertions(+), 5 deletions(-)

(limited to 'block')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 359d2dc0d414..e1d650804c8e 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -204,8 +204,8 @@ bool blk_mq_can_queue(struct blk_mq_hw_ctx *hctx)
 }
 EXPORT_SYMBOL(blk_mq_can_queue);
 
-void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
-			struct request *rq, unsigned int op)
+static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
+		struct request *rq, unsigned int op)
 {
 	INIT_LIST_HEAD(&rq->queuelist);
 	/* csd/requeue_work/fifo_time is initialized before use */
@@ -243,7 +243,6 @@ void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
 
 	ctx->rq_dispatched[op_is_sync(op)]++;
 }
-EXPORT_SYMBOL_GPL(blk_mq_rq_ctx_init);
 
 struct request *__blk_mq_alloc_request(struct blk_mq_alloc_data *data,
 				       unsigned int op)
diff --git a/block/blk-mq.h b/block/blk-mq.h
index cc67b48e3551..806fed53f607 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -131,8 +131,6 @@ static inline struct blk_mq_tags *blk_mq_tags_from_data(struct blk_mq_alloc_data
 /*
  * Internal helpers for request allocation/init/free
  */
-void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
-			struct request *rq, unsigned int op);
 void __blk_mq_finish_request(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
 				struct request *rq);
 void blk_mq_finish_request(struct request *rq);
-- 
cgit v1.2.3


From d2c0d3832469b947ca158e8977e66e8e2e64d8dd Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 16 Jun 2017 18:15:19 +0200
Subject: blk-mq: move blk_mq_sched_{get,put}_request to blk-mq.c

Having them out of line in blk-mq-sched.c just makes the code flow
unnecessarily complicated.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq-sched.c | 69 ++--------------------------------------------------
 block/blk-mq-sched.h |  4 +--
 block/blk-mq.c       | 67 +++++++++++++++++++++++++++++++++++++++++++++++---
 3 files changed, 67 insertions(+), 73 deletions(-)

(limited to 'block')

diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index c4e2afb9d12d..62db188595dc 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -58,8 +58,8 @@ static void __blk_mq_sched_assign_ioc(struct request_queue *q,
 	rq->elv.icq = NULL;
 }
 
-static void blk_mq_sched_assign_ioc(struct request_queue *q,
-				    struct request *rq, struct bio *bio)
+void blk_mq_sched_assign_ioc(struct request_queue *q, struct request *rq,
+			     struct bio *bio)
 {
 	struct io_context *ioc;
 
@@ -68,71 +68,6 @@ static void blk_mq_sched_assign_ioc(struct request_queue *q,
 		__blk_mq_sched_assign_ioc(q, rq, bio, ioc);
 }
 
-struct request *blk_mq_sched_get_request(struct request_queue *q,
-					 struct bio *bio,
-					 unsigned int op,
-					 struct blk_mq_alloc_data *data)
-{
-	struct elevator_queue *e = q->elevator;
-	struct request *rq;
-
-	blk_queue_enter_live(q);
-	data->q = q;
-	if (likely(!data->ctx))
-		data->ctx = blk_mq_get_ctx(q);
-	if (likely(!data->hctx))
-		data->hctx = blk_mq_map_queue(q, data->ctx->cpu);
-
-	if (e) {
-		data->flags |= BLK_MQ_REQ_INTERNAL;
-
-		/*
-		 * Flush requests are special and go directly to the
-		 * dispatch list.
-		 */
-		if (!op_is_flush(op) && e->type->ops.mq.get_request) {
-			rq = e->type->ops.mq.get_request(q, op, data);
-			if (rq)
-				rq->rq_flags |= RQF_QUEUED;
-		} else
-			rq = __blk_mq_alloc_request(data, op);
-	} else {
-		rq = __blk_mq_alloc_request(data, op);
-	}
-
-	if (rq) {
-		if (!op_is_flush(op)) {
-			rq->elv.icq = NULL;
-			if (e && e->type->icq_cache)
-				blk_mq_sched_assign_ioc(q, rq, bio);
-		}
-		data->hctx->queued++;
-		return rq;
-	}
-
-	blk_queue_exit(q);
-	return NULL;
-}
-
-void blk_mq_sched_put_request(struct request *rq)
-{
-	struct request_queue *q = rq->q;
-	struct elevator_queue *e = q->elevator;
-
-	if (rq->rq_flags & RQF_ELVPRIV) {
-		blk_mq_sched_put_rq_priv(rq->q, rq);
-		if (rq->elv.icq) {
-			put_io_context(rq->elv.icq->ioc);
-			rq->elv.icq = NULL;
-		}
-	}
-
-	if ((rq->rq_flags & RQF_QUEUED) && e && e->type->ops.mq.put_request)
-		e->type->ops.mq.put_request(rq);
-	else
-		blk_mq_finish_request(rq);
-}
-
 void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
 {
 	struct request_queue *q = hctx->queue;
diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h
index b87e5be5db8c..5d12529538d0 100644
--- a/block/blk-mq-sched.h
+++ b/block/blk-mq-sched.h
@@ -7,8 +7,8 @@
 void blk_mq_sched_free_hctx_data(struct request_queue *q,
 				 void (*exit)(struct blk_mq_hw_ctx *));
 
-struct request *blk_mq_sched_get_request(struct request_queue *q, struct bio *bio, unsigned int op, struct blk_mq_alloc_data *data);
-void blk_mq_sched_put_request(struct request *rq);
+void blk_mq_sched_assign_ioc(struct request_queue *q, struct request *rq,
+			     struct bio *bio);
 
 void blk_mq_sched_request_inserted(struct request *rq);
 bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio,
diff --git a/block/blk-mq.c b/block/blk-mq.c
index e1d650804c8e..694cbd698507 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -277,6 +277,51 @@ struct request *__blk_mq_alloc_request(struct blk_mq_alloc_data *data,
 }
 EXPORT_SYMBOL_GPL(__blk_mq_alloc_request);
 
+static struct request *blk_mq_get_request(struct request_queue *q,
+		struct bio *bio, unsigned int op,
+		struct blk_mq_alloc_data *data)
+{
+	struct elevator_queue *e = q->elevator;
+	struct request *rq;
+
+	blk_queue_enter_live(q);
+	data->q = q;
+	if (likely(!data->ctx))
+		data->ctx = blk_mq_get_ctx(q);
+	if (likely(!data->hctx))
+		data->hctx = blk_mq_map_queue(q, data->ctx->cpu);
+
+	if (e) {
+		data->flags |= BLK_MQ_REQ_INTERNAL;
+
+		/*
+		 * Flush requests are special and go directly to the
+		 * dispatch list.
+		 */
+		if (!op_is_flush(op) && e->type->ops.mq.get_request) {
+			rq = e->type->ops.mq.get_request(q, op, data);
+			if (rq)
+				rq->rq_flags |= RQF_QUEUED;
+		} else
+			rq = __blk_mq_alloc_request(data, op);
+	} else {
+		rq = __blk_mq_alloc_request(data, op);
+	}
+
+	if (rq) {
+		if (!op_is_flush(op)) {
+			rq->elv.icq = NULL;
+			if (e && e->type->icq_cache)
+				blk_mq_sched_assign_ioc(q, rq, bio);
+		}
+		data->hctx->queued++;
+		return rq;
+	}
+
+	blk_queue_exit(q);
+	return NULL;
+}
+
 struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
 		unsigned int flags)
 {
@@ -288,7 +333,7 @@ struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
 	if (ret)
 		return ERR_PTR(ret);
 
-	rq = blk_mq_sched_get_request(q, NULL, rw, &alloc_data);
+	rq = blk_mq_get_request(q, NULL, rw, &alloc_data);
 
 	blk_mq_put_ctx(alloc_data.ctx);
 	blk_queue_exit(q);
@@ -339,7 +384,7 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q, int rw,
 	cpu = cpumask_first(alloc_data.hctx->cpumask);
 	alloc_data.ctx = __blk_mq_get_ctx(q, cpu);
 
-	rq = blk_mq_sched_get_request(q, NULL, rw, &alloc_data);
+	rq = blk_mq_get_request(q, NULL, rw, &alloc_data);
 
 	blk_queue_exit(q);
 
@@ -389,7 +434,21 @@ EXPORT_SYMBOL_GPL(blk_mq_finish_request);
 
 void blk_mq_free_request(struct request *rq)
 {
-	blk_mq_sched_put_request(rq);
+	struct request_queue *q = rq->q;
+	struct elevator_queue *e = q->elevator;
+
+	if (rq->rq_flags & RQF_ELVPRIV) {
+		blk_mq_sched_put_rq_priv(rq->q, rq);
+		if (rq->elv.icq) {
+			put_io_context(rq->elv.icq->ioc);
+			rq->elv.icq = NULL;
+		}
+	}
+
+	if ((rq->rq_flags & RQF_QUEUED) && e && e->type->ops.mq.put_request)
+		e->type->ops.mq.put_request(rq);
+	else
+		blk_mq_finish_request(rq);
 }
 EXPORT_SYMBOL_GPL(blk_mq_free_request);
 
@@ -1494,7 +1553,7 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
 
 	trace_block_getrq(q, bio, bio->bi_opf);
 
-	rq = blk_mq_sched_get_request(q, bio, bio->bi_opf, &data);
+	rq = blk_mq_get_request(q, bio, bio->bi_opf, &data);
 	if (unlikely(!rq)) {
 		__wbt_done(q->rq_wb, wb_acct);
 		return BLK_QC_T_NONE;
-- 
cgit v1.2.3


From ea511e3c28c892f689173c91662437c4ddb2ab38 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 16 Jun 2017 18:15:20 +0200
Subject: blk-mq: remove blk_mq_sched_{get,put}_rq_priv

Having these as separate helpers in a header really does not help
readability, or my chances to refactor this code sanely.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq-sched.c | 10 ++++++----
 block/blk-mq-sched.h | 21 ---------------------
 block/blk-mq.c       |  3 ++-
 3 files changed, 8 insertions(+), 26 deletions(-)

(limited to 'block')

diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 62db188595dc..22601e5c6f19 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -36,6 +36,7 @@ static void __blk_mq_sched_assign_ioc(struct request_queue *q,
 				      struct bio *bio,
 				      struct io_context *ioc)
 {
+	struct elevator_queue *e = q->elevator;
 	struct io_cq *icq;
 
 	spin_lock_irq(q->queue_lock);
@@ -49,13 +50,14 @@ static void __blk_mq_sched_assign_ioc(struct request_queue *q,
 	}
 
 	rq->elv.icq = icq;
-	if (!blk_mq_sched_get_rq_priv(q, rq, bio)) {
-		rq->rq_flags |= RQF_ELVPRIV;
-		get_io_context(icq->ioc);
+	if (e && e->type->ops.mq.get_rq_priv &&
+	    e->type->ops.mq.get_rq_priv(q, rq, bio)) {
+		rq->elv.icq = NULL;
 		return;
 	}
 
-	rq->elv.icq = NULL;
+	rq->rq_flags |= RQF_ELVPRIV;
+	get_io_context(icq->ioc);
 }
 
 void blk_mq_sched_assign_ioc(struct request_queue *q, struct request *rq,
diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h
index 5d12529538d0..f34e6a522105 100644
--- a/block/blk-mq-sched.h
+++ b/block/blk-mq-sched.h
@@ -44,27 +44,6 @@ blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio)
 	return __blk_mq_sched_bio_merge(q, bio);
 }
 
-static inline int blk_mq_sched_get_rq_priv(struct request_queue *q,
-					   struct request *rq,
-					   struct bio *bio)
-{
-	struct elevator_queue *e = q->elevator;
-
-	if (e && e->type->ops.mq.get_rq_priv)
-		return e->type->ops.mq.get_rq_priv(q, rq, bio);
-
-	return 0;
-}
-
-static inline void blk_mq_sched_put_rq_priv(struct request_queue *q,
-					    struct request *rq)
-{
-	struct elevator_queue *e = q->elevator;
-
-	if (e && e->type->ops.mq.put_rq_priv)
-		e->type->ops.mq.put_rq_priv(q, rq);
-}
-
 static inline bool
 blk_mq_sched_allow_merge(struct request_queue *q, struct request *rq,
 			 struct bio *bio)
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 694cbd698507..1a45c287db64 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -438,7 +438,8 @@ void blk_mq_free_request(struct request *rq)
 	struct elevator_queue *e = q->elevator;
 
 	if (rq->rq_flags & RQF_ELVPRIV) {
-		blk_mq_sched_put_rq_priv(rq->q, rq);
+		if (e && e->type->ops.mq.put_rq_priv)
+			e->type->ops.mq.put_rq_priv(q, rq);
 		if (rq->elv.icq) {
 			put_io_context(rq->elv.icq->ioc);
 			rq->elv.icq = NULL;
-- 
cgit v1.2.3


From 7b9e93616399638521aafd1f01dfcf474c736393 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 16 Jun 2017 18:15:21 +0200
Subject: blk-mq-sched: unify request finished methods

No need to have two different callouts of bfq vs kyber.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bfq-iosched.c      |  6 +++---
 block/blk-mq.c           | 11 ++++-------
 block/kyber-iosched.c    |  8 +++-----
 include/linux/elevator.h |  3 +--
 4 files changed, 11 insertions(+), 17 deletions(-)

(limited to 'block')

diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index ed93da2462ab..4f69e39c2f89 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -4290,7 +4290,7 @@ static void bfq_put_rq_priv_body(struct bfq_queue *bfqq)
 	bfq_put_queue(bfqq);
 }
 
-static void bfq_put_rq_private(struct request_queue *q, struct request *rq)
+static void bfq_finish_request(struct request *rq)
 {
 	struct bfq_queue *bfqq = RQ_BFQQ(rq);
 	struct bfq_data *bfqd = bfqq->bfqd;
@@ -4324,7 +4324,7 @@ static void bfq_put_rq_private(struct request_queue *q, struct request *rq)
 		 */
 
 		if (!RB_EMPTY_NODE(&rq->rb_node))
-			bfq_remove_request(q, rq);
+			bfq_remove_request(rq->q, rq);
 		bfq_put_rq_priv_body(bfqq);
 	}
 
@@ -4951,7 +4951,7 @@ static struct elv_fs_entry bfq_attrs[] = {
 static struct elevator_type iosched_bfq_mq = {
 	.ops.mq = {
 		.get_rq_priv		= bfq_get_rq_private,
-		.put_rq_priv		= bfq_put_rq_private,
+		.finish_request		= bfq_finish_request,
 		.exit_icq		= bfq_exit_icq,
 		.insert_requests	= bfq_insert_requests,
 		.dispatch_request	= bfq_dispatch_request,
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 1a45c287db64..9df7e0394a48 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -437,19 +437,16 @@ void blk_mq_free_request(struct request *rq)
 	struct request_queue *q = rq->q;
 	struct elevator_queue *e = q->elevator;
 
-	if (rq->rq_flags & RQF_ELVPRIV) {
-		if (e && e->type->ops.mq.put_rq_priv)
-			e->type->ops.mq.put_rq_priv(q, rq);
+	if (rq->rq_flags & (RQF_ELVPRIV | RQF_QUEUED)) {
+		if (e && e->type->ops.mq.finish_request)
+			e->type->ops.mq.finish_request(rq);
 		if (rq->elv.icq) {
 			put_io_context(rq->elv.icq->ioc);
 			rq->elv.icq = NULL;
 		}
 	}
 
-	if ((rq->rq_flags & RQF_QUEUED) && e && e->type->ops.mq.put_request)
-		e->type->ops.mq.put_request(rq);
-	else
-		blk_mq_finish_request(rq);
+	blk_mq_finish_request(rq);
 }
 EXPORT_SYMBOL_GPL(blk_mq_free_request);
 
diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c
index b9faabc75fdb..2557b399f0a8 100644
--- a/block/kyber-iosched.c
+++ b/block/kyber-iosched.c
@@ -446,13 +446,11 @@ static struct request *kyber_get_request(struct request_queue *q,
 	return rq;
 }
 
-static void kyber_put_request(struct request *rq)
+static void kyber_finish_request(struct request *rq)
 {
-	struct request_queue *q = rq->q;
-	struct kyber_queue_data *kqd = q->elevator->elevator_data;
+	struct kyber_queue_data *kqd = rq->q->elevator->elevator_data;
 
 	rq_clear_domain_token(kqd, rq);
-	blk_mq_finish_request(rq);
 }
 
 static void kyber_completed_request(struct request *rq)
@@ -816,7 +814,7 @@ static struct elevator_type kyber_sched = {
 		.init_hctx = kyber_init_hctx,
 		.exit_hctx = kyber_exit_hctx,
 		.get_request = kyber_get_request,
-		.put_request = kyber_put_request,
+		.finish_request = kyber_finish_request,
 		.completed_request = kyber_completed_request,
 		.dispatch_request = kyber_dispatch_request,
 		.has_work = kyber_has_work,
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index 0e306c5a86d6..4acea351d43f 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -105,7 +105,7 @@ struct elevator_mq_ops {
 	void (*request_merged)(struct request_queue *, struct request *, enum elv_merge);
 	void (*requests_merged)(struct request_queue *, struct request *, struct request *);
 	struct request *(*get_request)(struct request_queue *, unsigned int, struct blk_mq_alloc_data *);
-	void (*put_request)(struct request *);
+	void (*finish_request)(struct request *);
 	void (*insert_requests)(struct blk_mq_hw_ctx *, struct list_head *, bool);
 	struct request *(*dispatch_request)(struct blk_mq_hw_ctx *);
 	bool (*has_work)(struct blk_mq_hw_ctx *);
@@ -115,7 +115,6 @@ struct elevator_mq_ops {
 	struct request *(*former_request)(struct request_queue *, struct request *);
 	struct request *(*next_request)(struct request_queue *, struct request *);
 	int (*get_rq_priv)(struct request_queue *, struct request *, struct bio *);
-	void (*put_rq_priv)(struct request_queue *, struct request *);
 	void (*init_icq)(struct io_cq *);
 	void (*exit_icq)(struct io_cq *);
 };
-- 
cgit v1.2.3


From 6af54051a07041d8d4e36b1b01136a0db4eb7e23 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 16 Jun 2017 18:15:22 +0200
Subject: blk-mq: simplify blk_mq_free_request

Merge three functions only tail-called by blk_mq_free_request into
blk_mq_free_request.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c | 50 +++++++++++++++-----------------------------------
 block/blk-mq.h |  3 ---
 2 files changed, 15 insertions(+), 38 deletions(-)

(limited to 'block')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 9df7e0394a48..0b17351fccfc 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -395,12 +395,24 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q, int rw,
 }
 EXPORT_SYMBOL_GPL(blk_mq_alloc_request_hctx);
 
-void __blk_mq_finish_request(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
-			     struct request *rq)
+void blk_mq_free_request(struct request *rq)
 {
-	const int sched_tag = rq->internal_tag;
 	struct request_queue *q = rq->q;
+	struct elevator_queue *e = q->elevator;
+	struct blk_mq_ctx *ctx = rq->mq_ctx;
+	struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
+	const int sched_tag = rq->internal_tag;
 
+	if (rq->rq_flags & (RQF_ELVPRIV | RQF_QUEUED)) {
+		if (e && e->type->ops.mq.finish_request)
+			e->type->ops.mq.finish_request(rq);
+		if (rq->elv.icq) {
+			put_io_context(rq->elv.icq->ioc);
+			rq->elv.icq = NULL;
+		}
+	}
+
+	ctx->rq_completed[rq_is_sync(rq)]++;
 	if (rq->rq_flags & RQF_MQ_INFLIGHT)
 		atomic_dec(&hctx->nr_active);
 
@@ -416,38 +428,6 @@ void __blk_mq_finish_request(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
 	blk_mq_sched_restart(hctx);
 	blk_queue_exit(q);
 }
-
-static void blk_mq_finish_hctx_request(struct blk_mq_hw_ctx *hctx,
-				     struct request *rq)
-{
-	struct blk_mq_ctx *ctx = rq->mq_ctx;
-
-	ctx->rq_completed[rq_is_sync(rq)]++;
-	__blk_mq_finish_request(hctx, ctx, rq);
-}
-
-void blk_mq_finish_request(struct request *rq)
-{
-	blk_mq_finish_hctx_request(blk_mq_map_queue(rq->q, rq->mq_ctx->cpu), rq);
-}
-EXPORT_SYMBOL_GPL(blk_mq_finish_request);
-
-void blk_mq_free_request(struct request *rq)
-{
-	struct request_queue *q = rq->q;
-	struct elevator_queue *e = q->elevator;
-
-	if (rq->rq_flags & (RQF_ELVPRIV | RQF_QUEUED)) {
-		if (e && e->type->ops.mq.finish_request)
-			e->type->ops.mq.finish_request(rq);
-		if (rq->elv.icq) {
-			put_io_context(rq->elv.icq->ioc);
-			rq->elv.icq = NULL;
-		}
-	}
-
-	blk_mq_finish_request(rq);
-}
 EXPORT_SYMBOL_GPL(blk_mq_free_request);
 
 inline void __blk_mq_end_request(struct request *rq, blk_status_t error)
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 806fed53f607..6a509a8eb3fb 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -131,9 +131,6 @@ static inline struct blk_mq_tags *blk_mq_tags_from_data(struct blk_mq_alloc_data
 /*
  * Internal helpers for request allocation/init/free
  */
-void __blk_mq_finish_request(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
-				struct request *rq);
-void blk_mq_finish_request(struct request *rq);
 struct request *__blk_mq_alloc_request(struct blk_mq_alloc_data *data,
 					unsigned int op);
 
-- 
cgit v1.2.3


From 037cebb85b94027a52be69d72068e6f6d0dca3a3 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 16 Jun 2017 18:15:23 +0200
Subject: blk-mq: streamline blk_mq_get_request

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

(limited to 'block')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 0b17351fccfc..e056725679a8 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -302,24 +302,24 @@ static struct request *blk_mq_get_request(struct request_queue *q,
 			rq = e->type->ops.mq.get_request(q, op, data);
 			if (rq)
 				rq->rq_flags |= RQF_QUEUED;
-		} else
-			rq = __blk_mq_alloc_request(data, op);
-	} else {
-		rq = __blk_mq_alloc_request(data, op);
+			goto allocated;
+		}
 	}
 
-	if (rq) {
-		if (!op_is_flush(op)) {
-			rq->elv.icq = NULL;
-			if (e && e->type->icq_cache)
-				blk_mq_sched_assign_ioc(q, rq, bio);
-		}
-		data->hctx->queued++;
-		return rq;
+	rq = __blk_mq_alloc_request(data, op);
+allocated:
+	if (!rq) {
+		blk_queue_exit(q);
+		return NULL;
 	}
 
-	blk_queue_exit(q);
-	return NULL;
+	if (!op_is_flush(op)) {
+		rq->elv.icq = NULL;
+		if (e && e->type->icq_cache)
+			blk_mq_sched_assign_ioc(q, rq, bio);
+	}
+	data->hctx->queued++;
+	return rq;
 }
 
 struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
-- 
cgit v1.2.3


From 9f2107382636cf9a71951eb71ec04f2fb3641b37 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 16 Jun 2017 18:15:24 +0200
Subject: bfq-iosched: fix NULL ioc check in bfq_get_rq_private

icq_to_bic is a container_of operation, so we need to check for NULL
before it.  Also move the check outside the spinlock while we're at
it.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bfq-iosched.c | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

(limited to 'block')

diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 4f69e39c2f89..f037b005faa1 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -4398,16 +4398,17 @@ static int bfq_get_rq_private(struct request_queue *q, struct request *rq,
 			      struct bio *bio)
 {
 	struct bfq_data *bfqd = q->elevator->elevator_data;
-	struct bfq_io_cq *bic = icq_to_bic(rq->elv.icq);
+	struct bfq_io_cq *bic;
 	const int is_sync = rq_is_sync(rq);
 	struct bfq_queue *bfqq;
 	bool new_queue = false;
 	bool split = false;
 
-	spin_lock_irq(&bfqd->lock);
+	if (!rq->elv.icq)
+		return 1;
+	bic = icq_to_bic(rq->elv.icq);
 
-	if (!bic)
-		goto queue_fail;
+	spin_lock_irq(&bfqd->lock);
 
 	bfq_check_ioprio_change(bic, bio);
 
@@ -4465,13 +4466,7 @@ static int bfq_get_rq_private(struct request_queue *q, struct request *rq,
 		bfq_handle_burst(bfqd, bfqq);
 
 	spin_unlock_irq(&bfqd->lock);
-
 	return 0;
-
-queue_fail:
-	spin_unlock_irq(&bfqd->lock);
-
-	return 1;
 }
 
 static void bfq_idle_slice_timer_body(struct bfq_queue *bfqq)
-- 
cgit v1.2.3


From 44e8c2bff80bb384a608406009948f90a78bf8a3 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 16 Jun 2017 18:15:25 +0200
Subject: blk-mq: refactor blk_mq_sched_assign_ioc

blk_mq_sched_assign_ioc now only handles the assigned of the ioc if
the schedule needs it (bfq only at the moment).  The caller to the
per-request initializer is moved out so that it can be merged with
a similar call for the kyber I/O scheduler.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq-sched.c | 28 ++++------------------------
 block/blk-mq-sched.h |  3 +--
 block/blk-mq.c       | 14 ++++++++++++--
 3 files changed, 17 insertions(+), 28 deletions(-)

(limited to 'block')

diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 22601e5c6f19..254d1c164567 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -31,12 +31,10 @@ void blk_mq_sched_free_hctx_data(struct request_queue *q,
 }
 EXPORT_SYMBOL_GPL(blk_mq_sched_free_hctx_data);
 
-static void __blk_mq_sched_assign_ioc(struct request_queue *q,
-				      struct request *rq,
-				      struct bio *bio,
-				      struct io_context *ioc)
+void blk_mq_sched_assign_ioc(struct request *rq, struct bio *bio)
 {
-	struct elevator_queue *e = q->elevator;
+	struct request_queue *q = rq->q;
+	struct io_context *ioc = rq_ioc(bio);
 	struct io_cq *icq;
 
 	spin_lock_irq(q->queue_lock);
@@ -48,26 +46,8 @@ static void __blk_mq_sched_assign_ioc(struct request_queue *q,
 		if (!icq)
 			return;
 	}
-
-	rq->elv.icq = icq;
-	if (e && e->type->ops.mq.get_rq_priv &&
-	    e->type->ops.mq.get_rq_priv(q, rq, bio)) {
-		rq->elv.icq = NULL;
-		return;
-	}
-
-	rq->rq_flags |= RQF_ELVPRIV;
 	get_io_context(icq->ioc);
-}
-
-void blk_mq_sched_assign_ioc(struct request_queue *q, struct request *rq,
-			     struct bio *bio)
-{
-	struct io_context *ioc;
-
-	ioc = rq_ioc(bio);
-	if (ioc)
-		__blk_mq_sched_assign_ioc(q, rq, bio, ioc);
+	rq->elv.icq = icq;
 }
 
 void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h
index f34e6a522105..e117edd039b1 100644
--- a/block/blk-mq-sched.h
+++ b/block/blk-mq-sched.h
@@ -7,8 +7,7 @@
 void blk_mq_sched_free_hctx_data(struct request_queue *q,
 				 void (*exit)(struct blk_mq_hw_ctx *));
 
-void blk_mq_sched_assign_ioc(struct request_queue *q, struct request *rq,
-			     struct bio *bio);
+void blk_mq_sched_assign_ioc(struct request *rq, struct bio *bio);
 
 void blk_mq_sched_request_inserted(struct request *rq);
 bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio,
diff --git a/block/blk-mq.c b/block/blk-mq.c
index e056725679a8..2f380ab7a603 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -315,8 +315,18 @@ allocated:
 
 	if (!op_is_flush(op)) {
 		rq->elv.icq = NULL;
-		if (e && e->type->icq_cache)
-			blk_mq_sched_assign_ioc(q, rq, bio);
+		if (e && e->type->ops.mq.get_rq_priv) {
+			if (e->type->icq_cache && rq_ioc(bio))
+				blk_mq_sched_assign_ioc(rq, bio);
+
+			if (e->type->ops.mq.get_rq_priv(q, rq, bio)) {
+				if (rq->elv.icq)
+					put_io_context(rq->elv.icq->ioc);
+				rq->elv.icq = NULL;
+			} else {
+				rq->rq_flags |= RQF_ELVPRIV;
+			}
+		}
 	}
 	data->hctx->queued++;
 	return rq;
-- 
cgit v1.2.3


From 5bbf4e5a8e3a780874b2ed77bd1bd57850f3f6da Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 16 Jun 2017 18:15:26 +0200
Subject: blk-mq-sched: unify request prepare methods

This patch makes sure we always allocate requests in the core blk-mq
code and use a common prepare_request method to initialize them for
both mq I/O schedulers.  For Kyber and additional limit_depth method
is added that is called before allocating the request.

Also because none of the intializations can really fail the new method
does not return an error - instead the bfq finish method is hardened
to deal with the no-IOC case.

Last but not least this removes the abuse of RQF_QUEUE by the blk-mq
scheduling code as RQF_ELFPRIV is all that is needed now.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bfq-iosched.c      | 19 ++++++++++++-------
 block/blk-mq.c           | 22 ++++++----------------
 block/kyber-iosched.c    | 23 +++++++++++------------
 include/linux/elevator.h |  4 ++--
 4 files changed, 31 insertions(+), 37 deletions(-)

(limited to 'block')

diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index f037b005faa1..60d32700f104 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -4292,8 +4292,14 @@ static void bfq_put_rq_priv_body(struct bfq_queue *bfqq)
 
 static void bfq_finish_request(struct request *rq)
 {
-	struct bfq_queue *bfqq = RQ_BFQQ(rq);
-	struct bfq_data *bfqd = bfqq->bfqd;
+	struct bfq_queue *bfqq;
+	struct bfq_data *bfqd;
+
+	if (!rq->elv.icq)
+		return;
+
+	bfqq = RQ_BFQQ(rq);
+	bfqd = bfqq->bfqd;
 
 	if (rq->rq_flags & RQF_STARTED)
 		bfqg_stats_update_completion(bfqq_group(bfqq),
@@ -4394,9 +4400,9 @@ static struct bfq_queue *bfq_get_bfqq_handle_split(struct bfq_data *bfqd,
 /*
  * Allocate bfq data structures associated with this request.
  */
-static int bfq_get_rq_private(struct request_queue *q, struct request *rq,
-			      struct bio *bio)
+static void bfq_prepare_request(struct request *rq, struct bio *bio)
 {
+	struct request_queue *q = rq->q;
 	struct bfq_data *bfqd = q->elevator->elevator_data;
 	struct bfq_io_cq *bic;
 	const int is_sync = rq_is_sync(rq);
@@ -4405,7 +4411,7 @@ static int bfq_get_rq_private(struct request_queue *q, struct request *rq,
 	bool split = false;
 
 	if (!rq->elv.icq)
-		return 1;
+		return;
 	bic = icq_to_bic(rq->elv.icq);
 
 	spin_lock_irq(&bfqd->lock);
@@ -4466,7 +4472,6 @@ static int bfq_get_rq_private(struct request_queue *q, struct request *rq,
 		bfq_handle_burst(bfqd, bfqq);
 
 	spin_unlock_irq(&bfqd->lock);
-	return 0;
 }
 
 static void bfq_idle_slice_timer_body(struct bfq_queue *bfqq)
@@ -4945,7 +4950,7 @@ static struct elv_fs_entry bfq_attrs[] = {
 
 static struct elevator_type iosched_bfq_mq = {
 	.ops.mq = {
-		.get_rq_priv		= bfq_get_rq_private,
+		.prepare_request	= bfq_prepare_request,
 		.finish_request		= bfq_finish_request,
 		.exit_icq		= bfq_exit_icq,
 		.insert_requests	= bfq_insert_requests,
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 2f380ab7a603..81d05c19d4b3 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -298,16 +298,11 @@ static struct request *blk_mq_get_request(struct request_queue *q,
 		 * Flush requests are special and go directly to the
 		 * dispatch list.
 		 */
-		if (!op_is_flush(op) && e->type->ops.mq.get_request) {
-			rq = e->type->ops.mq.get_request(q, op, data);
-			if (rq)
-				rq->rq_flags |= RQF_QUEUED;
-			goto allocated;
-		}
+		if (!op_is_flush(op) && e->type->ops.mq.limit_depth)
+			e->type->ops.mq.limit_depth(op, data);
 	}
 
 	rq = __blk_mq_alloc_request(data, op);
-allocated:
 	if (!rq) {
 		blk_queue_exit(q);
 		return NULL;
@@ -315,17 +310,12 @@ allocated:
 
 	if (!op_is_flush(op)) {
 		rq->elv.icq = NULL;
-		if (e && e->type->ops.mq.get_rq_priv) {
+		if (e && e->type->ops.mq.prepare_request) {
 			if (e->type->icq_cache && rq_ioc(bio))
 				blk_mq_sched_assign_ioc(rq, bio);
 
-			if (e->type->ops.mq.get_rq_priv(q, rq, bio)) {
-				if (rq->elv.icq)
-					put_io_context(rq->elv.icq->ioc);
-				rq->elv.icq = NULL;
-			} else {
-				rq->rq_flags |= RQF_ELVPRIV;
-			}
+			e->type->ops.mq.prepare_request(rq, bio);
+			rq->rq_flags |= RQF_ELVPRIV;
 		}
 	}
 	data->hctx->queued++;
@@ -413,7 +403,7 @@ void blk_mq_free_request(struct request *rq)
 	struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
 	const int sched_tag = rq->internal_tag;
 
-	if (rq->rq_flags & (RQF_ELVPRIV | RQF_QUEUED)) {
+	if (rq->rq_flags & RQF_ELVPRIV) {
 		if (e && e->type->ops.mq.finish_request)
 			e->type->ops.mq.finish_request(rq);
 		if (rq->elv.icq) {
diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c
index 2557b399f0a8..a9f6fd3fab8e 100644
--- a/block/kyber-iosched.c
+++ b/block/kyber-iosched.c
@@ -426,24 +426,22 @@ static void rq_clear_domain_token(struct kyber_queue_data *kqd,
 	}
 }
 
-static struct request *kyber_get_request(struct request_queue *q,
-					 unsigned int op,
-					 struct blk_mq_alloc_data *data)
+static void kyber_limit_depth(unsigned int op, struct blk_mq_alloc_data *data)
 {
-	struct kyber_queue_data *kqd = q->elevator->elevator_data;
-	struct request *rq;
-
 	/*
 	 * We use the scheduler tags as per-hardware queue queueing tokens.
 	 * Async requests can be limited at this stage.
 	 */
-	if (!op_is_sync(op))
+	if (!op_is_sync(op)) {
+		struct kyber_queue_data *kqd = data->q->elevator->elevator_data;
+
 		data->shallow_depth = kqd->async_depth;
+	}
+}
 
-	rq = __blk_mq_alloc_request(data, op);
-	if (rq)
-		rq_set_domain_token(rq, -1);
-	return rq;
+static void kyber_prepare_request(struct request *rq, struct bio *bio)
+{
+	rq_set_domain_token(rq, -1);
 }
 
 static void kyber_finish_request(struct request *rq)
@@ -813,7 +811,8 @@ static struct elevator_type kyber_sched = {
 		.exit_sched = kyber_exit_sched,
 		.init_hctx = kyber_init_hctx,
 		.exit_hctx = kyber_exit_hctx,
-		.get_request = kyber_get_request,
+		.limit_depth = kyber_limit_depth,
+		.prepare_request = kyber_prepare_request,
 		.finish_request = kyber_finish_request,
 		.completed_request = kyber_completed_request,
 		.dispatch_request = kyber_dispatch_request,
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index 4acea351d43f..5bc8f8682a3e 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -104,7 +104,8 @@ struct elevator_mq_ops {
 	int (*request_merge)(struct request_queue *q, struct request **, struct bio *);
 	void (*request_merged)(struct request_queue *, struct request *, enum elv_merge);
 	void (*requests_merged)(struct request_queue *, struct request *, struct request *);
-	struct request *(*get_request)(struct request_queue *, unsigned int, struct blk_mq_alloc_data *);
+	void (*limit_depth)(unsigned int, struct blk_mq_alloc_data *);
+	void (*prepare_request)(struct request *, struct bio *bio);
 	void (*finish_request)(struct request *);
 	void (*insert_requests)(struct blk_mq_hw_ctx *, struct list_head *, bool);
 	struct request *(*dispatch_request)(struct blk_mq_hw_ctx *);
@@ -114,7 +115,6 @@ struct elevator_mq_ops {
 	void (*requeue_request)(struct request *);
 	struct request *(*former_request)(struct request_queue *, struct request *);
 	struct request *(*next_request)(struct request_queue *, struct request *);
-	int (*get_rq_priv)(struct request_queue *, struct request *, struct bio *);
 	void (*init_icq)(struct io_cq *);
 	void (*exit_icq)(struct io_cq *);
 };
-- 
cgit v1.2.3


From e4cdf1a1cb161a648cc1ed7d6148fc3b99a1b3f5 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 16 Jun 2017 18:15:27 +0200
Subject: blk-mq: remove __blk_mq_alloc_request

Move most code into blk_mq_rq_ctx_init, and the rest into
blk_mq_get_request.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c | 68 +++++++++++++++++++++++-----------------------------------
 block/blk-mq.h |  6 ------
 2 files changed, 27 insertions(+), 47 deletions(-)

(limited to 'block')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 81d05c19d4b3..be40c1d6e3a4 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -204,15 +204,31 @@ bool blk_mq_can_queue(struct blk_mq_hw_ctx *hctx)
 }
 EXPORT_SYMBOL(blk_mq_can_queue);
 
-static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
-		struct request *rq, unsigned int op)
+static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
+		unsigned int tag, unsigned int op)
 {
+	struct blk_mq_tags *tags = blk_mq_tags_from_data(data);
+	struct request *rq = tags->static_rqs[tag];
+
+	if (data->flags & BLK_MQ_REQ_INTERNAL) {
+		rq->tag = -1;
+		rq->internal_tag = tag;
+	} else {
+		if (blk_mq_tag_busy(data->hctx)) {
+			rq->rq_flags = RQF_MQ_INFLIGHT;
+			atomic_inc(&data->hctx->nr_active);
+		}
+		rq->tag = tag;
+		rq->internal_tag = -1;
+		data->hctx->tags->rqs[rq->tag] = rq;
+	}
+
 	INIT_LIST_HEAD(&rq->queuelist);
 	/* csd/requeue_work/fifo_time is initialized before use */
-	rq->q = q;
-	rq->mq_ctx = ctx;
+	rq->q = data->q;
+	rq->mq_ctx = data->ctx;
 	rq->cmd_flags = op;
-	if (blk_queue_io_stat(q))
+	if (blk_queue_io_stat(data->q))
 		rq->rq_flags |= RQF_IO_STAT;
 	/* do not touch atomic flags, it needs atomic ops against the timer */
 	rq->cpu = -1;
@@ -241,41 +257,9 @@ static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
 	rq->end_io_data = NULL;
 	rq->next_rq = NULL;
 
-	ctx->rq_dispatched[op_is_sync(op)]++;
-}
-
-struct request *__blk_mq_alloc_request(struct blk_mq_alloc_data *data,
-				       unsigned int op)
-{
-	struct request *rq;
-	unsigned int tag;
-
-	tag = blk_mq_get_tag(data);
-	if (tag != BLK_MQ_TAG_FAIL) {
-		struct blk_mq_tags *tags = blk_mq_tags_from_data(data);
-
-		rq = tags->static_rqs[tag];
-
-		if (data->flags & BLK_MQ_REQ_INTERNAL) {
-			rq->tag = -1;
-			rq->internal_tag = tag;
-		} else {
-			if (blk_mq_tag_busy(data->hctx)) {
-				rq->rq_flags = RQF_MQ_INFLIGHT;
-				atomic_inc(&data->hctx->nr_active);
-			}
-			rq->tag = tag;
-			rq->internal_tag = -1;
-			data->hctx->tags->rqs[rq->tag] = rq;
-		}
-
-		blk_mq_rq_ctx_init(data->q, data->ctx, rq, op);
-		return rq;
-	}
-
-	return NULL;
+	data->ctx->rq_dispatched[op_is_sync(op)]++;
+	return rq;
 }
-EXPORT_SYMBOL_GPL(__blk_mq_alloc_request);
 
 static struct request *blk_mq_get_request(struct request_queue *q,
 		struct bio *bio, unsigned int op,
@@ -283,6 +267,7 @@ static struct request *blk_mq_get_request(struct request_queue *q,
 {
 	struct elevator_queue *e = q->elevator;
 	struct request *rq;
+	unsigned int tag;
 
 	blk_queue_enter_live(q);
 	data->q = q;
@@ -302,12 +287,13 @@ static struct request *blk_mq_get_request(struct request_queue *q,
 			e->type->ops.mq.limit_depth(op, data);
 	}
 
-	rq = __blk_mq_alloc_request(data, op);
-	if (!rq) {
+	tag = blk_mq_get_tag(data);
+	if (tag == BLK_MQ_TAG_FAIL) {
 		blk_queue_exit(q);
 		return NULL;
 	}
 
+	rq = blk_mq_rq_ctx_init(data, tag, op);
 	if (!op_is_flush(op)) {
 		rq->elv.icq = NULL;
 		if (e && e->type->ops.mq.prepare_request) {
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 6a509a8eb3fb..1a06fdf9fd4d 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -128,12 +128,6 @@ static inline struct blk_mq_tags *blk_mq_tags_from_data(struct blk_mq_alloc_data
 	return data->hctx->tags;
 }
 
-/*
- * Internal helpers for request allocation/init/free
- */
-struct request *__blk_mq_alloc_request(struct blk_mq_alloc_data *data,
-					unsigned int op);
-
 static inline bool blk_mq_hctx_stopped(struct blk_mq_hw_ctx *hctx)
 {
 	return test_bit(BLK_MQ_S_STOPPED, &hctx->state);
-- 
cgit v1.2.3


From af67c31fba3b879b241536a48df703a2eee18ebf Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Sun, 18 Jun 2017 14:38:57 +1000
Subject: blk: remove bio_set arg from blk_queue_split()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

blk_queue_split() is always called with the last arg being q->bio_split,
where 'q' is the first arg.

Also blk_queue_split() sometimes uses the passed-in 'bs' and sometimes uses
q->bio_split.

This is inconsistent and unnecessary.  Remove the last arg and always use
q->bio_split inside blk_queue_split()

Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Credit-to: Javier González <jg@lightnvm.io> (Noticed that lightnvm was missed)
Reviewed-by: Javier González <javier@cnexlabs.com>
Tested-by: Javier González <javier@cnexlabs.com>
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c              | 2 +-
 block/blk-merge.c             | 9 ++++-----
 block/blk-mq.c                | 2 +-
 drivers/block/drbd/drbd_req.c | 2 +-
 drivers/block/pktcdvd.c       | 2 +-
 drivers/block/ps3vram.c       | 2 +-
 drivers/block/rsxx/dev.c      | 2 +-
 drivers/block/umem.c          | 2 +-
 drivers/lightnvm/pblk-init.c  | 4 ++--
 drivers/lightnvm/rrpc.c       | 2 +-
 drivers/md/md.c               | 2 +-
 drivers/s390/block/dcssblk.c  | 2 +-
 drivers/s390/block/xpram.c    | 2 +-
 include/linux/blkdev.h        | 3 +--
 14 files changed, 18 insertions(+), 20 deletions(-)

(limited to 'block')

diff --git a/block/blk-core.c b/block/blk-core.c
index 8592409db272..31b5ece6b18e 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1723,7 +1723,7 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio)
 	 */
 	blk_queue_bounce(q, &bio);
 
-	blk_queue_split(q, &bio, q->bio_split);
+	blk_queue_split(q, &bio);
 
 	if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
 		bio->bi_status = BLK_STS_IOERR;
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 3990ae406341..d59074556703 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -202,8 +202,7 @@ split:
 	return do_split ? new : NULL;
 }
 
-void blk_queue_split(struct request_queue *q, struct bio **bio,
-		     struct bio_set *bs)
+void blk_queue_split(struct request_queue *q, struct bio **bio)
 {
 	struct bio *split, *res;
 	unsigned nsegs;
@@ -211,13 +210,13 @@ void blk_queue_split(struct request_queue *q, struct bio **bio,
 	switch (bio_op(*bio)) {
 	case REQ_OP_DISCARD:
 	case REQ_OP_SECURE_ERASE:
-		split = blk_bio_discard_split(q, *bio, bs, &nsegs);
+		split = blk_bio_discard_split(q, *bio, q->bio_split, &nsegs);
 		break;
 	case REQ_OP_WRITE_ZEROES:
-		split = blk_bio_write_zeroes_split(q, *bio, bs, &nsegs);
+		split = blk_bio_write_zeroes_split(q, *bio, q->bio_split, &nsegs);
 		break;
 	case REQ_OP_WRITE_SAME:
-		split = blk_bio_write_same_split(q, *bio, bs, &nsegs);
+		split = blk_bio_write_same_split(q, *bio, q->bio_split, &nsegs);
 		break;
 	default:
 		split = blk_bio_segment_split(q, *bio, q->bio_split, &nsegs);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index be40c1d6e3a4..cc85de9d6b2d 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1499,7 +1499,7 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
 
 	blk_queue_bounce(q, &bio);
 
-	blk_queue_split(q, &bio, q->bio_split);
+	blk_queue_split(q, &bio);
 
 	if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
 		bio_io_error(bio);
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index fca6b9914948..f6e865b2d543 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -1560,7 +1560,7 @@ blk_qc_t drbd_make_request(struct request_queue *q, struct bio *bio)
 	struct drbd_device *device = (struct drbd_device *) q->queuedata;
 	unsigned long start_jif;
 
-	blk_queue_split(q, &bio, q->bio_split);
+	blk_queue_split(q, &bio);
 
 	start_jif = jiffies;
 
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index e8a381161db6..1f363638b453 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -2414,7 +2414,7 @@ static blk_qc_t pkt_make_request(struct request_queue *q, struct bio *bio)
 
 	blk_queue_bounce(q, &bio);
 
-	blk_queue_split(q, &bio, q->bio_split);
+	blk_queue_split(q, &bio);
 
 	pd = q->queuedata;
 	if (!pd) {
diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c
index 6fa2b8197013..e0e81cacd781 100644
--- a/drivers/block/ps3vram.c
+++ b/drivers/block/ps3vram.c
@@ -606,7 +606,7 @@ static blk_qc_t ps3vram_make_request(struct request_queue *q, struct bio *bio)
 
 	dev_dbg(&dev->core, "%s\n", __func__);
 
-	blk_queue_split(q, &bio, q->bio_split);
+	blk_queue_split(q, &bio);
 
 	spin_lock_irq(&priv->lock);
 	busy = !bio_list_empty(&priv->list);
diff --git a/drivers/block/rsxx/dev.c b/drivers/block/rsxx/dev.c
index 0b0a0a902355..4e8bdfa0aa31 100644
--- a/drivers/block/rsxx/dev.c
+++ b/drivers/block/rsxx/dev.c
@@ -151,7 +151,7 @@ static blk_qc_t rsxx_make_request(struct request_queue *q, struct bio *bio)
 	struct rsxx_bio_meta *bio_meta;
 	blk_status_t st = BLK_STS_IOERR;
 
-	blk_queue_split(q, &bio, q->bio_split);
+	blk_queue_split(q, &bio);
 
 	might_sleep();
 
diff --git a/drivers/block/umem.c b/drivers/block/umem.c
index 4b3c947697b1..0677d2514665 100644
--- a/drivers/block/umem.c
+++ b/drivers/block/umem.c
@@ -529,7 +529,7 @@ static blk_qc_t mm_make_request(struct request_queue *q, struct bio *bio)
 		 (unsigned long long)bio->bi_iter.bi_sector,
 		 bio->bi_iter.bi_size);
 
-	blk_queue_split(q, &bio, q->bio_split);
+	blk_queue_split(q, &bio);
 
 	spin_lock_irq(&card->lock);
 	*card->biotail = bio;
diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c
index ae8cd6d5af8b..b3fec8ec55b8 100644
--- a/drivers/lightnvm/pblk-init.c
+++ b/drivers/lightnvm/pblk-init.c
@@ -33,7 +33,7 @@ static int pblk_rw_io(struct request_queue *q, struct pblk *pblk,
 	 * constraint. Writes can be of arbitrary size.
 	 */
 	if (bio_data_dir(bio) == READ) {
-		blk_queue_split(q, &bio, q->bio_split);
+		blk_queue_split(q, &bio);
 		ret = pblk_submit_read(pblk, bio);
 		if (ret == NVM_IO_DONE && bio_flagged(bio, BIO_CLONED))
 			bio_put(bio);
@@ -46,7 +46,7 @@ static int pblk_rw_io(struct request_queue *q, struct pblk *pblk,
 	 * available for user I/O.
 	 */
 	if (unlikely(pblk_get_secs(bio) >= pblk_rl_sysfs_rate_show(&pblk->rl)))
-		blk_queue_split(q, &bio, q->bio_split);
+		blk_queue_split(q, &bio);
 
 	return pblk_write_to_cache(pblk, bio, PBLK_IOTYPE_USER);
 }
diff --git a/drivers/lightnvm/rrpc.c b/drivers/lightnvm/rrpc.c
index 8d3b53bb3307..267f01ae87e4 100644
--- a/drivers/lightnvm/rrpc.c
+++ b/drivers/lightnvm/rrpc.c
@@ -994,7 +994,7 @@ static blk_qc_t rrpc_make_rq(struct request_queue *q, struct bio *bio)
 	struct nvm_rq *rqd;
 	int err;
 
-	blk_queue_split(q, &bio, q->bio_split);
+	blk_queue_split(q, &bio);
 
 	if (bio_op(bio) == REQ_OP_DISCARD) {
 		rrpc_discard(rrpc, bio);
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 6d493b54d56c..d43df1176c23 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -265,7 +265,7 @@ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
 	unsigned int sectors;
 	int cpu;
 
-	blk_queue_split(q, &bio, q->bio_split);
+	blk_queue_split(q, &bio);
 
 	if (mddev == NULL || mddev->pers == NULL) {
 		bio_io_error(bio);
diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c
index 36e5280af3e4..06eb1de52d1c 100644
--- a/drivers/s390/block/dcssblk.c
+++ b/drivers/s390/block/dcssblk.c
@@ -845,7 +845,7 @@ dcssblk_make_request(struct request_queue *q, struct bio *bio)
 	unsigned long source_addr;
 	unsigned long bytes_done;
 
-	blk_queue_split(q, &bio, q->bio_split);
+	blk_queue_split(q, &bio);
 
 	bytes_done = 0;
 	dev_info = bio->bi_bdev->bd_disk->private_data;
diff --git a/drivers/s390/block/xpram.c b/drivers/s390/block/xpram.c
index b9d7e755c8a3..a48f0d40c1d2 100644
--- a/drivers/s390/block/xpram.c
+++ b/drivers/s390/block/xpram.c
@@ -190,7 +190,7 @@ static blk_qc_t xpram_make_request(struct request_queue *q, struct bio *bio)
 	unsigned long page_addr;
 	unsigned long bytes;
 
-	blk_queue_split(q, &bio, q->bio_split);
+	blk_queue_split(q, &bio);
 
 	if ((bio->bi_iter.bi_sector & 7) != 0 ||
 	    (bio->bi_iter.bi_size & 4095) != 0)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 76b6df862a12..670df402bc51 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -944,8 +944,7 @@ extern blk_status_t blk_insert_cloned_request(struct request_queue *q,
 				     struct request *rq);
 extern int blk_rq_append_bio(struct request *rq, struct bio *bio);
 extern void blk_delay_queue(struct request_queue *, unsigned long);
-extern void blk_queue_split(struct request_queue *, struct bio **,
-			    struct bio_set *);
+extern void blk_queue_split(struct request_queue *, struct bio **);
 extern void blk_recount_segments(struct request_queue *, struct bio *);
 extern int scsi_verify_blk_ioctl(struct block_device *, unsigned int);
 extern int scsi_cmd_blk_ioctl(struct block_device *, fmode_t,
-- 
cgit v1.2.3


From 011067b05668b05aae88e5a24cff0ca0a67ca0b0 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Sun, 18 Jun 2017 14:38:57 +1000
Subject: blk: replace bioset_create_nobvec() with a flags arg to
 bioset_create()

"flags" arguments are often seen as good API design as they allow
easy extensibility.
bioset_create_nobvec() is implemented internally as a variation in
flags passed to __bioset_create().

To support future extension, make the internal structure part of the
API.
i.e. add a 'flags' argument to bioset_create() and discard
bioset_create_nobvec().

Note that the bio_split allocations in drivers/md/raid* do not need
the bvec mempool - they should have used bioset_create_nobvec().

Suggested-by: Christoph Hellwig <hch@infradead.org>
Reviewed-by: Christoph Hellwig <hch@infradead.org>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bio.c                         | 60 ++++++++++++++-----------------------
 block/blk-core.c                    |  2 +-
 drivers/block/drbd/drbd_main.c      |  2 +-
 drivers/md/bcache/super.c           |  4 +--
 drivers/md/dm-crypt.c               |  2 +-
 drivers/md/dm-io.c                  |  2 +-
 drivers/md/dm.c                     |  2 +-
 drivers/md/md.c                     |  2 +-
 drivers/md/raid1.c                  |  2 +-
 drivers/md/raid10.c                 |  2 +-
 drivers/md/raid5-cache.c            |  2 +-
 drivers/md/raid5-ppl.c              |  2 +-
 drivers/md/raid5.c                  |  2 +-
 drivers/target/target_core_iblock.c |  2 +-
 fs/block_dev.c                      |  2 +-
 fs/btrfs/extent_io.c                |  3 +-
 fs/xfs/xfs_super.c                  |  3 +-
 include/linux/bio.h                 |  6 ++--
 18 files changed, 45 insertions(+), 57 deletions(-)

(limited to 'block')

diff --git a/block/bio.c b/block/bio.c
index 0e36ca5407b5..84b313bd3ce8 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1921,9 +1921,26 @@ void bioset_free(struct bio_set *bs)
 }
 EXPORT_SYMBOL(bioset_free);
 
-static struct bio_set *__bioset_create(unsigned int pool_size,
-				       unsigned int front_pad,
-				       bool create_bvec_pool)
+/**
+ * bioset_create  - Create a bio_set
+ * @pool_size:	Number of bio and bio_vecs to cache in the mempool
+ * @front_pad:	Number of bytes to allocate in front of the returned bio
+ * @flags:	Flags to modify behavior, currently only %BIOSET_NEED_BVECS
+ *
+ * Description:
+ *    Set up a bio_set to be used with @bio_alloc_bioset. Allows the caller
+ *    to ask for a number of bytes to be allocated in front of the bio.
+ *    Front pad allocation is useful for embedding the bio inside
+ *    another structure, to avoid allocating extra data to go with the bio.
+ *    Note that the bio must be embedded at the END of that structure always,
+ *    or things will break badly.
+ *    If %BIOSET_NEED_BVECS is set in @flags, a separate pool will be allocated
+ *    for allocating iovecs.  This pool is not needed e.g. for bio_clone_fast().
+ *
+ */
+struct bio_set *bioset_create(unsigned int pool_size,
+			      unsigned int front_pad,
+			      int flags)
 {
 	unsigned int back_pad = BIO_INLINE_VECS * sizeof(struct bio_vec);
 	struct bio_set *bs;
@@ -1948,7 +1965,7 @@ static struct bio_set *__bioset_create(unsigned int pool_size,
 	if (!bs->bio_pool)
 		goto bad;
 
-	if (create_bvec_pool) {
+	if (flags & BIOSET_NEED_BVECS) {
 		bs->bvec_pool = biovec_create_pool(pool_size);
 		if (!bs->bvec_pool)
 			goto bad;
@@ -1963,41 +1980,8 @@ bad:
 	bioset_free(bs);
 	return NULL;
 }
-
-/**
- * bioset_create  - Create a bio_set
- * @pool_size:	Number of bio and bio_vecs to cache in the mempool
- * @front_pad:	Number of bytes to allocate in front of the returned bio
- *
- * Description:
- *    Set up a bio_set to be used with @bio_alloc_bioset. Allows the caller
- *    to ask for a number of bytes to be allocated in front of the bio.
- *    Front pad allocation is useful for embedding the bio inside
- *    another structure, to avoid allocating extra data to go with the bio.
- *    Note that the bio must be embedded at the END of that structure always,
- *    or things will break badly.
- */
-struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad)
-{
-	return __bioset_create(pool_size, front_pad, true);
-}
 EXPORT_SYMBOL(bioset_create);
 
-/**
- * bioset_create_nobvec  - Create a bio_set without bio_vec mempool
- * @pool_size:	Number of bio to cache in the mempool
- * @front_pad:	Number of bytes to allocate in front of the returned bio
- *
- * Description:
- *    Same functionality as bioset_create() except that mempool is not
- *    created for bio_vecs. Saving some memory for bio_clone_fast() users.
- */
-struct bio_set *bioset_create_nobvec(unsigned int pool_size, unsigned int front_pad)
-{
-	return __bioset_create(pool_size, front_pad, false);
-}
-EXPORT_SYMBOL(bioset_create_nobvec);
-
 #ifdef CONFIG_BLK_CGROUP
 
 /**
@@ -2112,7 +2096,7 @@ static int __init init_bio(void)
 	bio_integrity_init();
 	biovec_init_slabs();
 
-	fs_bio_set = bioset_create(BIO_POOL_SIZE, 0);
+	fs_bio_set = bioset_create(BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
 	if (!fs_bio_set)
 		panic("bio: can't allocate bios\n");
 
diff --git a/block/blk-core.c b/block/blk-core.c
index 31b5ece6b18e..62cf92550512 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -790,7 +790,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
 	if (q->id < 0)
 		goto fail_q;
 
-	q->bio_split = bioset_create(BIO_POOL_SIZE, 0);
+	q->bio_split = bioset_create(BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
 	if (!q->bio_split)
 		goto fail_id;
 
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 84455c365f57..b395fe391171 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -2165,7 +2165,7 @@ static int drbd_create_mempools(void)
 		goto Enomem;
 
 	/* mempools */
-	drbd_md_io_bio_set = bioset_create(DRBD_MIN_POOL_PAGES, 0);
+	drbd_md_io_bio_set = bioset_create(DRBD_MIN_POOL_PAGES, 0, BIOSET_NEED_BVECS);
 	if (drbd_md_io_bio_set == NULL)
 		goto Enomem;
 
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index fbc4f5412dec..abd6e825b39b 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -782,7 +782,7 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size,
 
 	minor *= BCACHE_MINORS;
 
-	if (!(d->bio_split = bioset_create(4, offsetof(struct bbio, bio))) ||
+	if (!(d->bio_split = bioset_create(4, offsetof(struct bbio, bio), BIOSET_NEED_BVECS)) ||
 	    !(d->disk = alloc_disk(BCACHE_MINORS))) {
 		ida_simple_remove(&bcache_minor, minor);
 		return -ENOMEM;
@@ -1516,7 +1516,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
 				sizeof(struct bbio) + sizeof(struct bio_vec) *
 				bucket_pages(c))) ||
 	    !(c->fill_iter = mempool_create_kmalloc_pool(1, iter_size)) ||
-	    !(c->bio_split = bioset_create(4, offsetof(struct bbio, bio))) ||
+	    !(c->bio_split = bioset_create(4, offsetof(struct bbio, bio), BIOSET_NEED_BVECS)) ||
 	    !(c->uuids = alloc_bucket_pages(GFP_KERNEL, c)) ||
 	    !(c->moving_gc_wq = alloc_workqueue("bcache_gc",
 						WQ_MEM_RECLAIM, 0)) ||
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 586cef085c6a..237ff8e9752a 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -2677,7 +2677,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 		goto bad;
 	}
 
-	cc->bs = bioset_create(MIN_IOS, 0);
+	cc->bs = bioset_create(MIN_IOS, 0, BIOSET_NEED_BVECS);
 	if (!cc->bs) {
 		ti->error = "Cannot allocate crypt bioset";
 		goto bad;
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index c8f8f3004085..5c4121024d92 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -58,7 +58,7 @@ struct dm_io_client *dm_io_client_create(void)
 	if (!client->pool)
 		goto bad;
 
-	client->bios = bioset_create(min_ios, 0);
+	client->bios = bioset_create(min_ios, 0, BIOSET_NEED_BVECS);
 	if (!client->bios)
 		goto bad;
 
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index c4b74f7398ac..3394a311de3d 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -2660,7 +2660,7 @@ struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, enum dm_qu
 		BUG();
 	}
 
-	pools->bs = bioset_create_nobvec(pool_size, front_pad);
+	pools->bs = bioset_create(pool_size, front_pad, 0);
 	if (!pools->bs)
 		goto out;
 
diff --git a/drivers/md/md.c b/drivers/md/md.c
index d43df1176c23..07fe780ccd29 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -5428,7 +5428,7 @@ int md_run(struct mddev *mddev)
 	}
 
 	if (mddev->bio_set == NULL) {
-		mddev->bio_set = bioset_create(BIO_POOL_SIZE, 0);
+		mddev->bio_set = bioset_create(BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
 		if (!mddev->bio_set)
 			return -ENOMEM;
 	}
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index a1a3cf0293df..98ca2c1d3226 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -2955,7 +2955,7 @@ static struct r1conf *setup_conf(struct mddev *mddev)
 	if (!conf->r1bio_pool)
 		goto abort;
 
-	conf->bio_split = bioset_create(BIO_POOL_SIZE, 0);
+	conf->bio_split = bioset_create(BIO_POOL_SIZE, 0, 0);
 	if (!conf->bio_split)
 		goto abort;
 
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 3178273a7253..57a250fdbbcc 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -3552,7 +3552,7 @@ static struct r10conf *setup_conf(struct mddev *mddev)
 	if (!conf->r10bio_pool)
 		goto out;
 
-	conf->bio_split = bioset_create(BIO_POOL_SIZE, 0);
+	conf->bio_split = bioset_create(BIO_POOL_SIZE, 0, 0);
 	if (!conf->bio_split)
 		goto out;
 
diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index 3746c9c27e54..bfa1e907c472 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -3063,7 +3063,7 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev)
 	if (!log->io_pool)
 		goto io_pool;
 
-	log->bs = bioset_create(R5L_POOL_SIZE, 0);
+	log->bs = bioset_create(R5L_POOL_SIZE, 0, BIOSET_NEED_BVECS);
 	if (!log->bs)
 		goto io_bs;
 
diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c
index e709ada0bf09..77cce3573aa8 100644
--- a/drivers/md/raid5-ppl.c
+++ b/drivers/md/raid5-ppl.c
@@ -1150,7 +1150,7 @@ int ppl_init_log(struct r5conf *conf)
 		goto err;
 	}
 
-	ppl_conf->bs = bioset_create(conf->raid_disks, 0);
+	ppl_conf->bs = bioset_create(conf->raid_disks, 0, 0);
 	if (!ppl_conf->bs) {
 		ret = -ENOMEM;
 		goto err;
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 7171bfd48223..62c965be97e1 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -6943,7 +6943,7 @@ static struct r5conf *setup_conf(struct mddev *mddev)
 			goto abort;
 	}
 
-	conf->bio_split = bioset_create(BIO_POOL_SIZE, 0);
+	conf->bio_split = bioset_create(BIO_POOL_SIZE, 0, 0);
 	if (!conf->bio_split)
 		goto abort;
 	conf->mddev = mddev;
diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c
index 75373624604b..c05d38016556 100644
--- a/drivers/target/target_core_iblock.c
+++ b/drivers/target/target_core_iblock.c
@@ -93,7 +93,7 @@ static int iblock_configure_device(struct se_device *dev)
 		return -EINVAL;
 	}
 
-	ib_dev->ibd_bio_set = bioset_create(IBLOCK_BIO_POOL_SIZE, 0);
+	ib_dev->ibd_bio_set = bioset_create(IBLOCK_BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
 	if (!ib_dev->ibd_bio_set) {
 		pr_err("IBLOCK: Unable to create bioset\n");
 		goto out;
diff --git a/fs/block_dev.c b/fs/block_dev.c
index bcd8e16a34e1..dd91c99e9ba0 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -439,7 +439,7 @@ blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
 
 static __init int blkdev_init(void)
 {
-	blkdev_dio_pool = bioset_create(4, offsetof(struct blkdev_dio, bio));
+	blkdev_dio_pool = bioset_create(4, offsetof(struct blkdev_dio, bio), BIOSET_NEED_BVECS);
 	if (!blkdev_dio_pool)
 		return -ENOMEM;
 	return 0;
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 8f66e55e7ba1..19eedf2e630b 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -174,7 +174,8 @@ int __init extent_io_init(void)
 		goto free_state_cache;
 
 	btrfs_bioset = bioset_create(BIO_POOL_SIZE,
-				     offsetof(struct btrfs_io_bio, bio));
+				     offsetof(struct btrfs_io_bio, bio),
+				     BIOSET_NEED_BVECS);
 	if (!btrfs_bioset)
 		goto free_buffer_cache;
 
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 455a575f101d..97df4db13b2e 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1766,7 +1766,8 @@ STATIC int __init
 xfs_init_zones(void)
 {
 	xfs_ioend_bioset = bioset_create(4 * MAX_BUF_PER_PAGE,
-			offsetof(struct xfs_ioend, io_inline_bio));
+			offsetof(struct xfs_ioend, io_inline_bio),
+			BIOSET_NEED_BVECS);
 	if (!xfs_ioend_bioset)
 		goto out;
 
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 9455aada1399..985dc645637e 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -373,8 +373,10 @@ static inline struct bio *bio_next_split(struct bio *bio, int sectors,
 	return bio_split(bio, sectors, gfp, bs);
 }
 
-extern struct bio_set *bioset_create(unsigned int, unsigned int);
-extern struct bio_set *bioset_create_nobvec(unsigned int, unsigned int);
+extern struct bio_set *bioset_create(unsigned int, unsigned int, int flags);
+enum {
+	BIOSET_NEED_BVECS = BIT(0),
+};
 extern void bioset_free(struct bio_set *);
 extern mempool_t *biovec_create_pool(int pool_entries);
 
-- 
cgit v1.2.3


From 47e0fb461fca1a68a566c82fcc006cc787312d8c Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Sun, 18 Jun 2017 14:38:57 +1000
Subject: blk: make the bioset rescue_workqueue optional.

This patch converts bioset_create() to not create a workqueue by
default, so alloctions will never trigger punt_bios_to_rescuer().  It
also introduces a new flag BIOSET_NEED_RESCUER which tells
bioset_create() to preserve the old behavior.

All callers of bioset_create() that are inside block device drivers,
are given the BIOSET_NEED_RESCUER flag.

biosets used by filesystems or other top-level users do not
need rescuing as the bio can never be queued behind other
bios.  This includes fs_bio_set, blkdev_dio_pool,
btrfs_bioset, xfs_ioend_bioset, and one allocated by
target_core_iblock.c.

biosets used by md/raid do not need rescuing as
their usage was recently audited and revised to never
risk deadlock.

It is hoped that most, if not all, of the remaining biosets
can end up being the non-rescued version.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Credit-to: Ming Lei <ming.lei@redhat.com> (minor fixes)
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bio.c                    | 13 +++++++++++--
 block/blk-core.c               |  3 ++-
 drivers/block/drbd/drbd_main.c |  4 +++-
 drivers/md/bcache/super.c      |  8 ++++++--
 drivers/md/dm-crypt.c          |  3 ++-
 drivers/md/dm-io.c             |  3 ++-
 drivers/md/dm.c                |  5 +++--
 include/linux/bio.h            |  1 +
 8 files changed, 30 insertions(+), 10 deletions(-)

(limited to 'block')

diff --git a/block/bio.c b/block/bio.c
index 84b313bd3ce8..2bd064906e06 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -363,6 +363,8 @@ static void punt_bios_to_rescuer(struct bio_set *bs)
 	struct bio_list punt, nopunt;
 	struct bio *bio;
 
+	if (WARN_ON_ONCE(!bs->rescue_workqueue))
+		return;
 	/*
 	 * In order to guarantee forward progress we must punt only bios that
 	 * were allocated from this bio_set; otherwise, if there was a bio on
@@ -474,7 +476,8 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, unsigned int nr_iovecs,
 
 		if (current->bio_list &&
 		    (!bio_list_empty(&current->bio_list[0]) ||
-		     !bio_list_empty(&current->bio_list[1])))
+		     !bio_list_empty(&current->bio_list[1])) &&
+		    bs->rescue_workqueue)
 			gfp_mask &= ~__GFP_DIRECT_RECLAIM;
 
 		p = mempool_alloc(bs->bio_pool, gfp_mask);
@@ -1925,7 +1928,8 @@ EXPORT_SYMBOL(bioset_free);
  * bioset_create  - Create a bio_set
  * @pool_size:	Number of bio and bio_vecs to cache in the mempool
  * @front_pad:	Number of bytes to allocate in front of the returned bio
- * @flags:	Flags to modify behavior, currently only %BIOSET_NEED_BVECS
+ * @flags:	Flags to modify behavior, currently %BIOSET_NEED_BVECS
+ *              and %BIOSET_NEED_RESCUER
  *
  * Description:
  *    Set up a bio_set to be used with @bio_alloc_bioset. Allows the caller
@@ -1936,6 +1940,8 @@ EXPORT_SYMBOL(bioset_free);
  *    or things will break badly.
  *    If %BIOSET_NEED_BVECS is set in @flags, a separate pool will be allocated
  *    for allocating iovecs.  This pool is not needed e.g. for bio_clone_fast().
+ *    If %BIOSET_NEED_RESCUER is set, a workqueue is created which can be used to
+ *    dispatch queued requests when the mempool runs out of space.
  *
  */
 struct bio_set *bioset_create(unsigned int pool_size,
@@ -1971,6 +1977,9 @@ struct bio_set *bioset_create(unsigned int pool_size,
 			goto bad;
 	}
 
+	if (!(flags & BIOSET_NEED_RESCUER))
+		return bs;
+
 	bs->rescue_workqueue = alloc_workqueue("bioset", WQ_MEM_RECLAIM, 0);
 	if (!bs->rescue_workqueue)
 		goto bad;
diff --git a/block/blk-core.c b/block/blk-core.c
index 62cf92550512..9bd10c46a538 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -790,7 +790,8 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
 	if (q->id < 0)
 		goto fail_q;
 
-	q->bio_split = bioset_create(BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
+	q->bio_split = bioset_create(BIO_POOL_SIZE, 0, (BIOSET_NEED_BVECS |
+							BIOSET_NEED_RESCUER));
 	if (!q->bio_split)
 		goto fail_id;
 
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index b395fe391171..bdf51b6977cf 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -2165,7 +2165,9 @@ static int drbd_create_mempools(void)
 		goto Enomem;
 
 	/* mempools */
-	drbd_md_io_bio_set = bioset_create(DRBD_MIN_POOL_PAGES, 0, BIOSET_NEED_BVECS);
+	drbd_md_io_bio_set = bioset_create(DRBD_MIN_POOL_PAGES, 0,
+					   BIOSET_NEED_BVECS |
+					   BIOSET_NEED_RESCUER);
 	if (drbd_md_io_bio_set == NULL)
 		goto Enomem;
 
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index abd6e825b39b..8352fad765f6 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -782,7 +782,9 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size,
 
 	minor *= BCACHE_MINORS;
 
-	if (!(d->bio_split = bioset_create(4, offsetof(struct bbio, bio), BIOSET_NEED_BVECS)) ||
+	if (!(d->bio_split = bioset_create(4, offsetof(struct bbio, bio),
+					   BIOSET_NEED_BVECS |
+					   BIOSET_NEED_RESCUER)) ||
 	    !(d->disk = alloc_disk(BCACHE_MINORS))) {
 		ida_simple_remove(&bcache_minor, minor);
 		return -ENOMEM;
@@ -1516,7 +1518,9 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
 				sizeof(struct bbio) + sizeof(struct bio_vec) *
 				bucket_pages(c))) ||
 	    !(c->fill_iter = mempool_create_kmalloc_pool(1, iter_size)) ||
-	    !(c->bio_split = bioset_create(4, offsetof(struct bbio, bio), BIOSET_NEED_BVECS)) ||
+	    !(c->bio_split = bioset_create(4, offsetof(struct bbio, bio),
+					   BIOSET_NEED_BVECS |
+					   BIOSET_NEED_RESCUER)) ||
 	    !(c->uuids = alloc_bucket_pages(GFP_KERNEL, c)) ||
 	    !(c->moving_gc_wq = alloc_workqueue("bcache_gc",
 						WQ_MEM_RECLAIM, 0)) ||
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 237ff8e9752a..9e1b72e8f7ef 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -2677,7 +2677,8 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 		goto bad;
 	}
 
-	cc->bs = bioset_create(MIN_IOS, 0, BIOSET_NEED_BVECS);
+	cc->bs = bioset_create(MIN_IOS, 0, (BIOSET_NEED_BVECS |
+					    BIOSET_NEED_RESCUER));
 	if (!cc->bs) {
 		ti->error = "Cannot allocate crypt bioset";
 		goto bad;
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index 5c4121024d92..81248a8a8b57 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -58,7 +58,8 @@ struct dm_io_client *dm_io_client_create(void)
 	if (!client->pool)
 		goto bad;
 
-	client->bios = bioset_create(min_ios, 0, BIOSET_NEED_BVECS);
+	client->bios = bioset_create(min_ios, 0, (BIOSET_NEED_BVECS |
+						  BIOSET_NEED_RESCUER));
 	if (!client->bios)
 		goto bad;
 
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 3394a311de3d..fbd06b9f9467 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1036,7 +1036,8 @@ static void flush_current_bio_list(struct blk_plug_cb *cb, bool from_schedule)
 
 		while ((bio = bio_list_pop(&list))) {
 			struct bio_set *bs = bio->bi_pool;
-			if (unlikely(!bs) || bs == fs_bio_set) {
+			if (unlikely(!bs) || bs == fs_bio_set ||
+			    !bs->rescue_workqueue) {
 				bio_list_add(&current->bio_list[i], bio);
 				continue;
 			}
@@ -2660,7 +2661,7 @@ struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, enum dm_qu
 		BUG();
 	}
 
-	pools->bs = bioset_create(pool_size, front_pad, 0);
+	pools->bs = bioset_create(pool_size, front_pad, BIOSET_NEED_RESCUER);
 	if (!pools->bs)
 		goto out;
 
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 985dc645637e..32c786baa10a 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -376,6 +376,7 @@ static inline struct bio *bio_next_split(struct bio *bio, int sectors,
 extern struct bio_set *bioset_create(unsigned int, unsigned int, int flags);
 enum {
 	BIOSET_NEED_BVECS = BIT(0),
+	BIOSET_NEED_RESCUER = BIT(1),
 };
 extern void bioset_free(struct bio_set *);
 extern mempool_t *biovec_create_pool(int pool_entries);
-- 
cgit v1.2.3


From 93b27e72904a9869e648c870bf0d04b124fda1c7 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Sun, 18 Jun 2017 14:38:57 +1000
Subject: blk: use non-rescuing bioset for q->bio_split.

A rescuing bioset is only useful if there might be bios from
that same bioset on the bio_list_on_stack queue at a time
when bio_alloc_bioset() is called.  This never applies to
q->bio_split.

Allocations from q->bio_split are only ever made from
blk_queue_split() which is only ever called early in each of
various make_request_fn()s.  The original bio (call this A)
is then passed to generic_make_request() and is placed on
the bio_list_on_stack queue, and the bio that was allocated
from q->bio_split (B) is processed.

The processing of this may cause other bios to be passed to
generic_make_request() or may even cause the bio B itself to
be passed, possible after some prefix has been split off
(using some other bioset).

generic_make_request() now guarantees that all of these bios
(B and dependants) will be fully processed before the tail
of the original bio A gets handled.  None of these early bios
can possible trigger an allocation from the original
q->bio_split as they are either too small to require
splitting or (more likely) are destined for a different queue.

The next time that the original q->bio_split might be used
by this thread is when A is processed again, as it might
still be too big to handle directly.  By this time there
cannot be any other bios allocated from q->bio_split in the
generic_make_request() queue.  So no rescuing will ever be
needed.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'block')

diff --git a/block/blk-core.c b/block/blk-core.c
index 9bd10c46a538..62cf92550512 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -790,8 +790,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
 	if (q->id < 0)
 		goto fail_q;
 
-	q->bio_split = bioset_create(BIO_POOL_SIZE, 0, (BIOSET_NEED_BVECS |
-							BIOSET_NEED_RESCUER));
+	q->bio_split = bioset_create(BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
 	if (!q->bio_split)
 		goto fail_id;
 
-- 
cgit v1.2.3


From a8821f3f32bea173c7f9546b3b025898f0e09f58 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Sun, 18 Jun 2017 14:38:58 +1000
Subject: block: Improvements to bounce-buffer handling

Since commit 23688bf4f830 ("block: ensure to split after potentially
bouncing a bio") blk_queue_bounce() is called *before*
blk_queue_split().
This means that:
 1/ the comments blk_queue_split() about bounce buffers are
    irrelevant, and
 2/ a very large bio (more than BIO_MAX_PAGES) will no longer be
    split before it arrives at blk_queue_bounce(), leading to the
    possibility that bio_clone_bioset() will fail and a NULL
    will be dereferenced.

Separately, blk_queue_bounce() shouldn't use fs_bio_set as the bio
being copied could be from the same set, and this could lead to a
deadlock.

So:
 - allocate 2 private biosets for blk_queue_bounce, one for
   splitting enormous bios and one for cloning bios.
 - add code to split a bio that exceeds BIO_MAX_PAGES.
 - Fix up the comments in blk_queue_split()

Credit-to: Ming Lei <tom.leiming@gmail.com> (suggested using single bio_for_each_segment loop)
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-merge.c | 14 ++++----------
 block/bounce.c    | 32 ++++++++++++++++++++++++++------
 2 files changed, 30 insertions(+), 16 deletions(-)

(limited to 'block')

diff --git a/block/blk-merge.c b/block/blk-merge.c
index d59074556703..51c84540d3bb 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -117,17 +117,11 @@ static struct bio *blk_bio_segment_split(struct request_queue *q,
 		 * each holds at most BIO_MAX_PAGES bvecs because
 		 * bio_clone() can fail to allocate big bvecs.
 		 *
-		 * It should have been better to apply the limit per
-		 * request queue in which bio_clone() is involved,
-		 * instead of globally. The biggest blocker is the
-		 * bio_clone() in bio bounce.
+		 * Those drivers which will need to use bio_clone()
+		 * should tell us in some way.  For now, impose the
+		 * BIO_MAX_PAGES limit on all queues.
 		 *
-		 * If bio is splitted by this reason, we should have
-		 * allowed to continue bios merging, but don't do
-		 * that now for making the change simple.
-		 *
-		 * TODO: deal with bio bounce's bio_clone() gracefully
-		 * and convert the global limit into per-queue limit.
+		 * TODO: handle users of bio_clone() differently.
 		 */
 		if (bvecs++ >= BIO_MAX_PAGES)
 			goto split;
diff --git a/block/bounce.c b/block/bounce.c
index e4703181d97f..17d77613c471 100644
--- a/block/bounce.c
+++ b/block/bounce.c
@@ -26,6 +26,7 @@
 #define POOL_SIZE	64
 #define ISA_POOL_SIZE	16
 
+struct bio_set *bounce_bio_set, *bounce_bio_split;
 static mempool_t *page_pool, *isa_page_pool;
 
 #if defined(CONFIG_HIGHMEM) || defined(CONFIG_NEED_BOUNCE_POOL)
@@ -40,6 +41,14 @@ static __init int init_emergency_pool(void)
 	BUG_ON(!page_pool);
 	pr_info("pool size: %d pages\n", POOL_SIZE);
 
+	bounce_bio_set = bioset_create(BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
+	BUG_ON(!bounce_bio_set);
+	if (bioset_integrity_create(bounce_bio_set, BIO_POOL_SIZE))
+		BUG_ON(1);
+
+	bounce_bio_split = bioset_create(BIO_POOL_SIZE, 0, 0);
+	BUG_ON(!bounce_bio_split);
+
 	return 0;
 }
 
@@ -186,15 +195,26 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
 	int rw = bio_data_dir(*bio_orig);
 	struct bio_vec *to, from;
 	struct bvec_iter iter;
-	unsigned i;
+	unsigned i = 0;
+	bool bounce = false;
+	int sectors = 0;
 
-	bio_for_each_segment(from, *bio_orig, iter)
+	bio_for_each_segment(from, *bio_orig, iter) {
+		if (i++ < BIO_MAX_PAGES)
+			sectors += from.bv_len >> 9;
 		if (page_to_pfn(from.bv_page) > queue_bounce_pfn(q))
-			goto bounce;
+			bounce = true;
+	}
+	if (!bounce)
+		return;
 
-	return;
-bounce:
-	bio = bio_clone_bioset(*bio_orig, GFP_NOIO, fs_bio_set);
+	if (sectors < bio_sectors(*bio_orig)) {
+		bio = bio_split(*bio_orig, sectors, GFP_NOIO, bounce_bio_split);
+		bio_chain(bio, *bio_orig);
+		generic_make_request(*bio_orig);
+		*bio_orig = bio;
+	}
+	bio = bio_clone_bioset(*bio_orig, GFP_NOIO, bounce_bio_set);
 
 	bio_for_each_segment_all(to, bio, i) {
 		struct page *page = to->bv_page;
-- 
cgit v1.2.3


From 9b10f6a9c2aaab49c56b8cff0facdc1b64ed7e1c Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Sun, 18 Jun 2017 14:38:59 +1000
Subject: block: remove bio_clone() and all references.

bio_clone() is no longer used.
Only bio_clone_bioset() or bio_clone_fast().
This is for the best, as bio_clone() used fs_bio_set,
and filesystems are unlikely to want to use bio_clone().

So remove bio_clone() and all references.
This includes a fix to some incorrect documentation.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 Documentation/block/biodoc.txt | 2 +-
 block/bio.c                    | 2 +-
 block/blk-merge.c              | 6 +++---
 drivers/md/md.c                | 2 +-
 include/linux/bio.h            | 5 -----
 5 files changed, 6 insertions(+), 11 deletions(-)

(limited to 'block')

diff --git a/Documentation/block/biodoc.txt b/Documentation/block/biodoc.txt
index 01ddeaf64b0f..9490f2845f06 100644
--- a/Documentation/block/biodoc.txt
+++ b/Documentation/block/biodoc.txt
@@ -632,7 +632,7 @@ to i/o submission, if the bio fields are likely to be accessed after the
 i/o is issued (since the bio may otherwise get freed in case i/o completion
 happens in the meantime).
 
-The bio_clone() routine may be used to duplicate a bio, where the clone
+The bio_clone_fast() routine may be used to duplicate a bio, where the clone
 shares the bio_vec_list with the original bio (i.e. both point to the
 same bio_vec_list). This would typically be used for splitting i/o requests
 in lvm or md.
diff --git a/block/bio.c b/block/bio.c
index 2bd064906e06..89a51bd49ab7 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -547,7 +547,7 @@ EXPORT_SYMBOL(zero_fill_bio);
  *
  * Description:
  *   Put a reference to a &struct bio, either one you have gotten with
- *   bio_alloc, bio_get or bio_clone. The last put of a bio will free it.
+ *   bio_alloc, bio_get or bio_clone_*. The last put of a bio will free it.
  **/
 void bio_put(struct bio *bio)
 {
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 51c84540d3bb..e7862e9dcc39 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -115,13 +115,13 @@ static struct bio *blk_bio_segment_split(struct request_queue *q,
 		 * With arbitrary bio size, the incoming bio may be very
 		 * big. We have to split the bio into small bios so that
 		 * each holds at most BIO_MAX_PAGES bvecs because
-		 * bio_clone() can fail to allocate big bvecs.
+		 * bio_clone_bioset() can fail to allocate big bvecs.
 		 *
-		 * Those drivers which will need to use bio_clone()
+		 * Those drivers which will need to use bio_clone_bioset()
 		 * should tell us in some way.  For now, impose the
 		 * BIO_MAX_PAGES limit on all queues.
 		 *
-		 * TODO: handle users of bio_clone() differently.
+		 * TODO: handle users of bio_clone_bioset() differently.
 		 */
 		if (bvecs++ >= BIO_MAX_PAGES)
 			goto split;
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 07fe780ccd29..31bcbfb09fef 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -185,7 +185,7 @@ static int start_readonly;
 static bool create_on_open = true;
 
 /* bio_clone_mddev
- * like bio_clone, but with a local bio set
+ * like bio_clone_bioset, but with a local bio set
  */
 
 struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 32c786baa10a..40d054185277 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -395,11 +395,6 @@ static inline struct bio *bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs)
 	return bio_alloc_bioset(gfp_mask, nr_iovecs, fs_bio_set);
 }
 
-static inline struct bio *bio_clone(struct bio *bio, gfp_t gfp_mask)
-{
-	return bio_clone_bioset(bio, gfp_mask, fs_bio_set);
-}
-
 static inline struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned int nr_iovecs)
 {
 	return bio_alloc_bioset(gfp_mask, nr_iovecs, NULL);
-- 
cgit v1.2.3


From 58c94cc19e0800e8c0d0d708e37eeb4dee564ba9 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Sun, 18 Jun 2017 14:38:59 +1000
Subject: block: don't check for BIO_MAX_PAGES in blk_bio_segment_split()

blk_bio_segment_split() makes sure bios have no more than
BIO_MAX_PAGES entries in the bi_io_vec.
This was done because bio_clone_bioset() (when given a
mempool bioset) could not handle larger io_vecs.

No driver uses bio_clone_bioset() any more, they all
use bio_clone_fast() if anything, and bio_clone_fast()
doesn't clone the bi_io_vec.

The main user of of bio_clone_bioset() at this level
is bounce.c, and bouncing now happens before blk_bio_segment_split(),
so that is not of concern.

So remove the big helpful comment and the code.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-merge.c | 16 ----------------
 1 file changed, 16 deletions(-)

(limited to 'block')

diff --git a/block/blk-merge.c b/block/blk-merge.c
index e7862e9dcc39..cea544ec5d96 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -108,24 +108,8 @@ static struct bio *blk_bio_segment_split(struct request_queue *q,
 	bool do_split = true;
 	struct bio *new = NULL;
 	const unsigned max_sectors = get_max_io_size(q, bio);
-	unsigned bvecs = 0;
 
 	bio_for_each_segment(bv, bio, iter) {
-		/*
-		 * With arbitrary bio size, the incoming bio may be very
-		 * big. We have to split the bio into small bios so that
-		 * each holds at most BIO_MAX_PAGES bvecs because
-		 * bio_clone_bioset() can fail to allocate big bvecs.
-		 *
-		 * Those drivers which will need to use bio_clone_bioset()
-		 * should tell us in some way.  For now, impose the
-		 * BIO_MAX_PAGES limit on all queues.
-		 *
-		 * TODO: handle users of bio_clone_bioset() differently.
-		 */
-		if (bvecs++ >= BIO_MAX_PAGES)
-			goto split;
-
 		/*
 		 * If the queue doesn't support SG gaps and adding this
 		 * offset would create a gap, disallow it.
-- 
cgit v1.2.3


From e4e739131ac93d373cd2d2fd92820a6a39115ba5 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Tue, 6 Jun 2017 23:22:03 +0800
Subject: blk-mq: introduce blk_mq_unquiesce_queue

blk_mq_start_stopped_hw_queues() is used implictly
as counterpart of blk_mq_quiesce_queue() for unquiescing queue,
so we introduce blk_mq_unquiesce_queue() and make it
as counterpart of blk_mq_quiesce_queue() explicitly.

This function is for improving the current quiescing mechanism
in the following patches.

Reviewed-by: Bart Van Assche <Bart.VanAssche@sandisk.com>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c         | 13 +++++++++++++
 include/linux/blk-mq.h |  1 +
 2 files changed, 14 insertions(+)

(limited to 'block')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index cc85de9d6b2d..07785b5cf2bc 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -181,6 +181,19 @@ void blk_mq_quiesce_queue(struct request_queue *q)
 }
 EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue);
 
+/*
+ * blk_mq_unquiesce_queue() - counterpart of blk_mq_quiesce_queue()
+ * @q: request queue.
+ *
+ * This function recovers queue into the state before quiescing
+ * which is done by blk_mq_quiesce_queue.
+ */
+void blk_mq_unquiesce_queue(struct request_queue *q)
+{
+	blk_mq_start_stopped_hw_queues(q, true);
+}
+EXPORT_SYMBOL_GPL(blk_mq_unquiesce_queue);
+
 void blk_mq_wake_waiters(struct request_queue *q)
 {
 	struct blk_mq_hw_ctx *hctx;
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 78a8b64074ea..787d8a2a2ac6 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -245,6 +245,7 @@ void blk_mq_start_hw_queues(struct request_queue *q);
 void blk_mq_start_stopped_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
 void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async);
 void blk_mq_quiesce_queue(struct request_queue *q);
+void blk_mq_unquiesce_queue(struct request_queue *q);
 void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs);
 void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
 void blk_mq_run_hw_queues(struct request_queue *q, bool async);
-- 
cgit v1.2.3


From f4560ffe8cec1361b1021d81aca6a4173f8e7c87 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Sun, 18 Jun 2017 14:24:27 -0600
Subject: blk-mq: use QUEUE_FLAG_QUIESCED to quiesce queue

It is required that no dispatch can happen any more once
blk_mq_quiesce_queue() returns, and we don't have such requirement
on APIs of stopping queue.

But blk_mq_quiesce_queue() still may not block/drain dispatch in the
the case of BLK_MQ_S_START_ON_RUN, so use the new introduced flag of
QUEUE_FLAG_QUIESCED and evaluate it inside RCU read-side critical
sections for fixing this issue.

Also blk_mq_quiesce_queue() is implemented via stopping queue, which
limits its uses, and easy to cause race, because any queue restart in
other paths may break blk_mq_quiesce_queue(). With the introduced
flag of QUEUE_FLAG_QUIESCED, we don't need to depend on stopping queue
for quiescing any more.

Signed-off-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Bart Van Assche <Bart.VanAssche@sandisk.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq-sched.c   |  3 ++-
 block/blk-mq.c         | 11 ++++++++++-
 include/linux/blk-mq.h |  4 ++++
 include/linux/blkdev.h |  2 ++
 4 files changed, 18 insertions(+), 2 deletions(-)

(limited to 'block')

diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 254d1c164567..9f025289da63 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -58,7 +58,8 @@ void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
 	bool did_work = false;
 	LIST_HEAD(rq_list);
 
-	if (unlikely(blk_mq_hctx_stopped(hctx)))
+	/* RCU or SRCU read lock is needed before checking quiesced flag */
+	if (unlikely(blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(q)))
 		return;
 
 	hctx->run++;
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 07785b5cf2bc..40b22c7f684e 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -170,6 +170,10 @@ void blk_mq_quiesce_queue(struct request_queue *q)
 
 	__blk_mq_stop_hw_queues(q, true);
 
+	spin_lock_irq(q->queue_lock);
+	queue_flag_set(QUEUE_FLAG_QUIESCED, q);
+	spin_unlock_irq(q->queue_lock);
+
 	queue_for_each_hw_ctx(q, hctx, i) {
 		if (hctx->flags & BLK_MQ_F_BLOCKING)
 			synchronize_srcu(&hctx->queue_rq_srcu);
@@ -190,6 +194,10 @@ EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue);
  */
 void blk_mq_unquiesce_queue(struct request_queue *q)
 {
+	spin_lock_irq(q->queue_lock);
+	queue_flag_clear(QUEUE_FLAG_QUIESCED, q);
+	spin_unlock_irq(q->queue_lock);
+
 	blk_mq_start_stopped_hw_queues(q, true);
 }
 EXPORT_SYMBOL_GPL(blk_mq_unquiesce_queue);
@@ -1444,7 +1452,8 @@ static void __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
 	blk_status_t ret;
 	bool run_queue = true;
 
-	if (blk_mq_hctx_stopped(hctx)) {
+	/* RCU or SRCU read lock is needed before checking quiesced flag */
+	if (blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(q)) {
 		run_queue = false;
 		goto insert;
 	}
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 787d8a2a2ac6..de6536c14ae7 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -268,6 +268,10 @@ void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues);
  */
 static inline void blk_mq_quiesce_queue_nowait(struct request_queue *q)
 {
+	spin_lock_irq(q->queue_lock);
+	queue_flag_set(QUEUE_FLAG_QUIESCED, q);
+	spin_unlock_irq(q->queue_lock);
+
 	blk_mq_stop_hw_queues(q);
 }
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 8423f6baf818..22cfba64ce81 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -619,6 +619,7 @@ struct request_queue {
 #define QUEUE_FLAG_POLL_STATS  28	/* collecting stats for hybrid polling */
 #define QUEUE_FLAG_REGISTERED  29	/* queue has been registered to a disk */
 #define QUEUE_FLAG_SCSI_PASSTHROUGH 30	/* queue supports SCSI commands */
+#define QUEUE_FLAG_QUIESCED    31	/* queue has been quiesced */
 
 #define QUEUE_FLAG_DEFAULT	((1 << QUEUE_FLAG_IO_STAT) |		\
 				 (1 << QUEUE_FLAG_STACKABLE)	|	\
@@ -715,6 +716,7 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q)
 #define blk_noretry_request(rq) \
 	((rq)->cmd_flags & (REQ_FAILFAST_DEV|REQ_FAILFAST_TRANSPORT| \
 			     REQ_FAILFAST_DRIVER))
+#define blk_queue_quiesced(q)	test_bit(QUEUE_FLAG_QUIESCED, &(q)->queue_flags)
 
 static inline bool blk_account_rq(struct request *rq)
 {
-- 
cgit v1.2.3


From 69e07c4adb8669fd77f3b59abdb436aca9f1bee9 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Tue, 6 Jun 2017 23:22:07 +0800
Subject: blk-mq: update comments on blk_mq_quiesce_queue()

Actually what we want to get from blk_mq_quiesce_queue()
isn't only to wait for completion of all ongoing .queue_rq().

In the typical context of canceling requests, we need to
make sure that the following is done in the dispatch path
before starting to cancel requests:

	- failed dispatched request is finished
	- busy dispatched request is requeued, and the STARTED
	flag is cleared

So update comment to keep code, doc and our expection consistent.

Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'block')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 40b22c7f684e..f2a73190f60d 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -155,12 +155,13 @@ void blk_mq_unfreeze_queue(struct request_queue *q)
 EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue);
 
 /**
- * blk_mq_quiesce_queue() - wait until all ongoing queue_rq calls have finished
+ * blk_mq_quiesce_queue() - wait until all ongoing dispatches have finished
  * @q: request queue.
  *
  * Note: this function does not prevent that the struct request end_io()
- * callback function is invoked. Additionally, it is not prevented that
- * new queue_rq() calls occur unless the queue has been stopped first.
+ * callback function is invoked. Once this function is returned, we make
+ * sure no dispatch can happen until the queue is unquiesced via
+ * blk_mq_unquiesce_queue().
  */
 void blk_mq_quiesce_queue(struct request_queue *q)
 {
-- 
cgit v1.2.3


From 1d9e9bc6b56e1bb7e33e7e2e1b99d7088356c006 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Tue, 6 Jun 2017 23:22:08 +0800
Subject: blk-mq: don't stop queue for quiescing

Queue can be started by other blk-mq APIs and can be used in
different cases, this limits uses of blk_mq_quiesce_queue()
if it is based on stopping queue, and make its usage very
difficult, especially users have to use the stop queue APIs
carefully for avoiding to break blk_mq_quiesce_queue().

We have applied the QUIESCED flag for draining and blocking
dispatch, so it isn't necessary to stop queue any more.

After stopping queue is removed, blk_mq_quiesce_queue() can
be used safely and easily, then users won't worry about queue
restarting during quiescing at all.

Reviewed-by: Bart Van Assche <Bart.VanAssche@sandisk.com>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c         | 9 +++------
 include/linux/blk-mq.h | 2 --
 2 files changed, 3 insertions(+), 8 deletions(-)

(limited to 'block')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index f2a73190f60d..dbae586602f6 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -169,11 +169,7 @@ void blk_mq_quiesce_queue(struct request_queue *q)
 	unsigned int i;
 	bool rcu = false;
 
-	__blk_mq_stop_hw_queues(q, true);
-
-	spin_lock_irq(q->queue_lock);
-	queue_flag_set(QUEUE_FLAG_QUIESCED, q);
-	spin_unlock_irq(q->queue_lock);
+	blk_mq_quiesce_queue_nowait(q);
 
 	queue_for_each_hw_ctx(q, hctx, i) {
 		if (hctx->flags & BLK_MQ_F_BLOCKING)
@@ -199,7 +195,8 @@ void blk_mq_unquiesce_queue(struct request_queue *q)
 	queue_flag_clear(QUEUE_FLAG_QUIESCED, q);
 	spin_unlock_irq(q->queue_lock);
 
-	blk_mq_start_stopped_hw_queues(q, true);
+	/* dispatch requests which are inserted during quiescing */
+	blk_mq_run_hw_queues(q, true);
 }
 EXPORT_SYMBOL_GPL(blk_mq_unquiesce_queue);
 
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index de6536c14ae7..f1bd13ae8f57 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -271,8 +271,6 @@ static inline void blk_mq_quiesce_queue_nowait(struct request_queue *q)
 	spin_lock_irq(q->queue_lock);
 	queue_flag_set(QUEUE_FLAG_QUIESCED, q);
 	spin_unlock_irq(q->queue_lock);
-
-	blk_mq_stop_hw_queues(q);
 }
 
 /*
-- 
cgit v1.2.3


From 39a70c76b89b81db91a72a86b6c6a9b239013417 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Tue, 6 Jun 2017 23:22:09 +0800
Subject: blk-mq: clarify dispatch may not be drained/blocked by stopping queue

BLK_MQ_S_STOPPED may not be observed in other concurrent I/O paths,
we can't guarantee that dispatching won't happen after returning
from the APIs of stopping queue.

So clarify the fact and avoid potential misuse.

Signed-off-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Bart Van Assche <Bart.VanAssche@sandisk.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'block')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index dbae586602f6..89cbd022b1eb 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1204,6 +1204,15 @@ static void __blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx, bool sync)
 	set_bit(BLK_MQ_S_STOPPED, &hctx->state);
 }
 
+/*
+ * This function is often used for pausing .queue_rq() by driver when
+ * there isn't enough resource or some conditions aren't satisfied, and
+ * BLK_MQ_RQ_QUEUE_BUSY is usually returned.
+ *
+ * We do not guarantee that dispatch can be drained or blocked
+ * after blk_mq_stop_hw_queue() returns. Please use
+ * blk_mq_quiesce_queue() for that requirement.
+ */
 void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx)
 {
 	__blk_mq_stop_hw_queue(hctx, false);
@@ -1219,6 +1228,15 @@ static void __blk_mq_stop_hw_queues(struct request_queue *q, bool sync)
 		__blk_mq_stop_hw_queue(hctx, sync);
 }
 
+/*
+ * This function is often used for pausing .queue_rq() by driver when
+ * there isn't enough resource or some conditions aren't satisfied, and
+ * BLK_MQ_RQ_QUEUE_BUSY is usually returned.
+ *
+ * We do not guarantee that dispatch can be drained or blocked
+ * after blk_mq_stop_hw_queues() returns. Please use
+ * blk_mq_quiesce_queue() for that requirement.
+ */
 void blk_mq_stop_hw_queues(struct request_queue *q)
 {
 	__blk_mq_stop_hw_queues(q, false);
-- 
cgit v1.2.3


From 641a9ed60f3620936921a58fb21d9f3aa891f3a4 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Tue, 6 Jun 2017 23:22:10 +0800
Subject: Revert "blk-mq: don't use sync workqueue flushing from drivers"

This patch reverts commit 2719aa217e0d02(blk-mq: don't use
sync workqueue flushing from drivers) because only
blk_mq_quiesce_queue() need the sync flush, and now
we don't need to stop queue any more, so revert it.

Also changes to cancel_delayed_work() in blk_mq_stop_hw_queue().

Reviewed-by: Bart Van Assche <Bart.VanAssche@sandisk.com>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c | 30 ++++++++----------------------
 1 file changed, 8 insertions(+), 22 deletions(-)

(limited to 'block')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 89cbd022b1eb..dd276a9e138e 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -42,7 +42,6 @@ static LIST_HEAD(all_q_list);
 
 static void blk_mq_poll_stats_start(struct request_queue *q);
 static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb);
-static void __blk_mq_stop_hw_queues(struct request_queue *q, bool sync);
 
 static int blk_mq_poll_stats_bkt(const struct request *rq)
 {
@@ -1194,16 +1193,6 @@ bool blk_mq_queue_stopped(struct request_queue *q)
 }
 EXPORT_SYMBOL(blk_mq_queue_stopped);
 
-static void __blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx, bool sync)
-{
-	if (sync)
-		cancel_delayed_work_sync(&hctx->run_work);
-	else
-		cancel_delayed_work(&hctx->run_work);
-
-	set_bit(BLK_MQ_S_STOPPED, &hctx->state);
-}
-
 /*
  * This function is often used for pausing .queue_rq() by driver when
  * there isn't enough resource or some conditions aren't satisfied, and
@@ -1215,18 +1204,11 @@ static void __blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx, bool sync)
  */
 void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx)
 {
-	__blk_mq_stop_hw_queue(hctx, false);
-}
-EXPORT_SYMBOL(blk_mq_stop_hw_queue);
+	cancel_delayed_work(&hctx->run_work);
 
-static void __blk_mq_stop_hw_queues(struct request_queue *q, bool sync)
-{
-	struct blk_mq_hw_ctx *hctx;
-	int i;
-
-	queue_for_each_hw_ctx(q, hctx, i)
-		__blk_mq_stop_hw_queue(hctx, sync);
+	set_bit(BLK_MQ_S_STOPPED, &hctx->state);
 }
+EXPORT_SYMBOL(blk_mq_stop_hw_queue);
 
 /*
  * This function is often used for pausing .queue_rq() by driver when
@@ -1239,7 +1221,11 @@ static void __blk_mq_stop_hw_queues(struct request_queue *q, bool sync)
  */
 void blk_mq_stop_hw_queues(struct request_queue *q)
 {
-	__blk_mq_stop_hw_queues(q, false);
+	struct blk_mq_hw_ctx *hctx;
+	int i;
+
+	queue_for_each_hw_ctx(q, hctx, i)
+		blk_mq_stop_hw_queue(hctx);
 }
 EXPORT_SYMBOL(blk_mq_stop_hw_queues);
 
-- 
cgit v1.2.3


From 03a07c92a9ed9938d828ca7f1d11b8bc63a7bb89 Mon Sep 17 00:00:00 2001
From: Goldwyn Rodrigues <rgoldwyn@suse.com>
Date: Tue, 20 Jun 2017 07:05:46 -0500
Subject: block: return on congested block device

A new bio operation flag REQ_NOWAIT is introduced to identify bio's
orignating from iocb with IOCB_NOWAIT. This flag indicates
to return immediately if a request cannot be made instead
of retrying.

Stacked devices such as md (the ones with make_request_fn hooks)
currently are not supported because it may block for housekeeping.
For example, an md can have a part of the device suspended.
For this reason, only request based devices are supported.
In the future, this feature will be expanded to stacked devices
by teaching them how to handle the REQ_NOWAIT flags.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c          | 22 ++++++++++++++++++++--
 block/blk-mq.c            |  4 ++++
 fs/direct-io.c            | 10 ++++++++--
 include/linux/bio.h       |  6 ++++++
 include/linux/blk_types.h |  4 ++++
 5 files changed, 42 insertions(+), 4 deletions(-)

(limited to 'block')

diff --git a/block/blk-core.c b/block/blk-core.c
index 62cf92550512..279e3c432d7b 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -143,6 +143,7 @@ static const struct {
 	[BLK_STS_MEDIUM]	= { -ENODATA,	"critical medium" },
 	[BLK_STS_PROTECTION]	= { -EILSEQ,	"protection" },
 	[BLK_STS_RESOURCE]	= { -ENOMEM,	"kernel resource" },
+	[BLK_STS_AGAIN]		= { -EAGAIN,	"nonblocking retry" },
 
 	/* device mapper special case, should not leak out: */
 	[BLK_STS_DM_REQUEUE]	= { -EREMCHG, "dm internal retry" },
@@ -1314,6 +1315,11 @@ retry:
 	if (!IS_ERR(rq))
 		return rq;
 
+	if (op & REQ_NOWAIT) {
+		blk_put_rl(rl);
+		return ERR_PTR(-EAGAIN);
+	}
+
 	if (!gfpflags_allow_blocking(gfp_mask) || unlikely(blk_queue_dying(q))) {
 		blk_put_rl(rl);
 		return rq;
@@ -1961,6 +1967,14 @@ generic_make_request_checks(struct bio *bio)
 		goto end_io;
 	}
 
+	/*
+	 * For a REQ_NOWAIT based request, return -EOPNOTSUPP
+	 * if queue is not a request based queue.
+	 */
+
+	if ((bio->bi_opf & REQ_NOWAIT) && !queue_is_rq_based(q))
+		goto not_supported;
+
 	part = bio->bi_bdev->bd_part;
 	if (should_fail_request(part, bio->bi_iter.bi_size) ||
 	    should_fail_request(&part_to_disk(part)->part0,
@@ -2118,7 +2132,7 @@ blk_qc_t generic_make_request(struct bio *bio)
 	do {
 		struct request_queue *q = bdev_get_queue(bio->bi_bdev);
 
-		if (likely(blk_queue_enter(q, false) == 0)) {
+		if (likely(blk_queue_enter(q, bio->bi_opf & REQ_NOWAIT) == 0)) {
 			struct bio_list lower, same;
 
 			/* Create a fresh bio_list for all subordinate requests */
@@ -2143,7 +2157,11 @@ blk_qc_t generic_make_request(struct bio *bio)
 			bio_list_merge(&bio_list_on_stack[0], &same);
 			bio_list_merge(&bio_list_on_stack[0], &bio_list_on_stack[1]);
 		} else {
-			bio_io_error(bio);
+			if (unlikely(!blk_queue_dying(q) &&
+					(bio->bi_opf & REQ_NOWAIT)))
+				bio_wouldblock_error(bio);
+			else
+				bio_io_error(bio);
 		}
 		bio = bio_list_pop(&bio_list_on_stack[0]);
 	} while (bio);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index dd276a9e138e..ca03cd4b263f 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -293,6 +293,8 @@ static struct request *blk_mq_get_request(struct request_queue *q,
 		data->ctx = blk_mq_get_ctx(q);
 	if (likely(!data->hctx))
 		data->hctx = blk_mq_map_queue(q, data->ctx->cpu);
+	if (op & REQ_NOWAIT)
+		data->flags |= BLK_MQ_REQ_NOWAIT;
 
 	if (e) {
 		data->flags |= BLK_MQ_REQ_INTERNAL;
@@ -1544,6 +1546,8 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
 	rq = blk_mq_get_request(q, bio, bio->bi_opf, &data);
 	if (unlikely(!rq)) {
 		__wbt_done(q->rq_wb, wb_acct);
+		if (bio->bi_opf & REQ_NOWAIT)
+			bio_wouldblock_error(bio);
 		return BLK_QC_T_NONE;
 	}
 
diff --git a/fs/direct-io.c b/fs/direct-io.c
index e8baaabebf13..c87077d1dc33 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -479,8 +479,12 @@ static blk_status_t dio_bio_complete(struct dio *dio, struct bio *bio)
 	unsigned i;
 	blk_status_t err = bio->bi_status;
 
-	if (err)
-		dio->io_error = -EIO;
+	if (err) {
+		if (err == BLK_STS_AGAIN && (bio->bi_opf & REQ_NOWAIT))
+			dio->io_error = -EAGAIN;
+		else
+			dio->io_error = -EIO;
+	}
 
 	if (dio->is_async && dio->op == REQ_OP_READ && dio->should_dirty) {
 		bio_check_pages_dirty(bio);	/* transfers ownership */
@@ -1194,6 +1198,8 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
 	if (iov_iter_rw(iter) == WRITE) {
 		dio->op = REQ_OP_WRITE;
 		dio->op_flags = REQ_SYNC | REQ_IDLE;
+		if (iocb->ki_flags & IOCB_NOWAIT)
+			dio->op_flags |= REQ_NOWAIT;
 	} else {
 		dio->op = REQ_OP_READ;
 	}
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 40d054185277..36aa641cde28 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -416,6 +416,12 @@ static inline void bio_io_error(struct bio *bio)
 	bio_endio(bio);
 }
 
+static inline void bio_wouldblock_error(struct bio *bio)
+{
+	bio->bi_status = BLK_STS_AGAIN;
+	bio_endio(bio);
+}
+
 struct request_queue;
 extern int bio_phys_segments(struct request_queue *, struct bio *);
 
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index dcd45b15a3a5..e210da6d14b8 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -36,6 +36,8 @@ typedef u8 __bitwise blk_status_t;
 /* hack for device mapper, don't use elsewhere: */
 #define BLK_STS_DM_REQUEUE    ((__force blk_status_t)11)
 
+#define BLK_STS_AGAIN		((__force blk_status_t)12)
+
 struct blk_issue_stat {
 	u64 stat;
 };
@@ -224,6 +226,7 @@ enum req_flag_bits {
 	/* command specific flags for REQ_OP_WRITE_ZEROES: */
 	__REQ_NOUNMAP,		/* do not free blocks when zeroing */
 
+	__REQ_NOWAIT,           /* Don't wait if request will block */
 	__REQ_NR_BITS,		/* stops here */
 };
 
@@ -242,6 +245,7 @@ enum req_flag_bits {
 #define REQ_BACKGROUND		(1ULL << __REQ_BACKGROUND)
 
 #define REQ_NOUNMAP		(1ULL << __REQ_NOUNMAP)
+#define REQ_NOWAIT		(1ULL << __REQ_NOWAIT)
 
 #define REQ_FAILFAST_MASK \
 	(REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER)
-- 
cgit v1.2.3


From 073196787727e454e17a96d222ea55eba2000978 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Tue, 20 Jun 2017 11:15:38 -0700
Subject: blk-mq: Reduce blk_mq_hw_ctx size

Since the srcu structure is rather large (184 bytes on an x86-64
system with kernel debugging disabled), only allocate it if needed.

Reported-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Cc: Hannes Reinecke <hare@suse.com>
Cc: Omar Sandoval <osandov@fb.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c         | 30 ++++++++++++++++++++++--------
 include/linux/blk-mq.h |  5 +++--
 2 files changed, 25 insertions(+), 10 deletions(-)

(limited to 'block')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index ca03cd4b263f..3e0cc11b1a90 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -172,7 +172,7 @@ void blk_mq_quiesce_queue(struct request_queue *q)
 
 	queue_for_each_hw_ctx(q, hctx, i) {
 		if (hctx->flags & BLK_MQ_F_BLOCKING)
-			synchronize_srcu(&hctx->queue_rq_srcu);
+			synchronize_srcu(hctx->queue_rq_srcu);
 		else
 			rcu = true;
 	}
@@ -1094,9 +1094,9 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
 	} else {
 		might_sleep();
 
-		srcu_idx = srcu_read_lock(&hctx->queue_rq_srcu);
+		srcu_idx = srcu_read_lock(hctx->queue_rq_srcu);
 		blk_mq_sched_dispatch_requests(hctx);
-		srcu_read_unlock(&hctx->queue_rq_srcu, srcu_idx);
+		srcu_read_unlock(hctx->queue_rq_srcu, srcu_idx);
 	}
 }
 
@@ -1505,9 +1505,9 @@ static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
 
 		might_sleep();
 
-		srcu_idx = srcu_read_lock(&hctx->queue_rq_srcu);
+		srcu_idx = srcu_read_lock(hctx->queue_rq_srcu);
 		__blk_mq_try_issue_directly(hctx, rq, cookie, true);
-		srcu_read_unlock(&hctx->queue_rq_srcu, srcu_idx);
+		srcu_read_unlock(hctx->queue_rq_srcu, srcu_idx);
 	}
 }
 
@@ -1853,7 +1853,7 @@ static void blk_mq_exit_hctx(struct request_queue *q,
 		set->ops->exit_hctx(hctx, hctx_idx);
 
 	if (hctx->flags & BLK_MQ_F_BLOCKING)
-		cleanup_srcu_struct(&hctx->queue_rq_srcu);
+		cleanup_srcu_struct(hctx->queue_rq_srcu);
 
 	blk_mq_remove_cpuhp(hctx);
 	blk_free_flush_queue(hctx->fq);
@@ -1926,7 +1926,7 @@ static int blk_mq_init_hctx(struct request_queue *q,
 		goto free_fq;
 
 	if (hctx->flags & BLK_MQ_F_BLOCKING)
-		init_srcu_struct(&hctx->queue_rq_srcu);
+		init_srcu_struct(hctx->queue_rq_srcu);
 
 	blk_mq_debugfs_register_hctx(q, hctx);
 
@@ -2201,6 +2201,20 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
 }
 EXPORT_SYMBOL(blk_mq_init_queue);
 
+static int blk_mq_hw_ctx_size(struct blk_mq_tag_set *tag_set)
+{
+	int hw_ctx_size = sizeof(struct blk_mq_hw_ctx);
+
+	BUILD_BUG_ON(ALIGN(offsetof(struct blk_mq_hw_ctx, queue_rq_srcu),
+			   __alignof__(struct blk_mq_hw_ctx)) !=
+		     sizeof(struct blk_mq_hw_ctx));
+
+	if (tag_set->flags & BLK_MQ_F_BLOCKING)
+		hw_ctx_size += sizeof(struct srcu_struct);
+
+	return hw_ctx_size;
+}
+
 static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
 						struct request_queue *q)
 {
@@ -2215,7 +2229,7 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
 			continue;
 
 		node = blk_mq_hw_queue_to_node(q->mq_map, i);
-		hctxs[i] = kzalloc_node(sizeof(struct blk_mq_hw_ctx),
+		hctxs[i] = kzalloc_node(blk_mq_hw_ctx_size(set),
 					GFP_KERNEL, node);
 		if (!hctxs[i])
 			break;
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index f1bd13ae8f57..3f2c22a42df6 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -39,8 +39,6 @@ struct blk_mq_hw_ctx {
 	struct blk_mq_tags	*tags;
 	struct blk_mq_tags	*sched_tags;
 
-	struct srcu_struct	queue_rq_srcu;
-
 	unsigned long		queued;
 	unsigned long		run;
 #define BLK_MQ_MAX_DISPATCH_ORDER	7
@@ -62,6 +60,9 @@ struct blk_mq_hw_ctx {
 	struct dentry		*debugfs_dir;
 	struct dentry		*sched_debugfs_dir;
 #endif
+
+	/* Must be the last member - see also blk_mq_hw_ctx_size(). */
+	struct srcu_struct	queue_rq_srcu[0];
 };
 
 struct blk_mq_tag_set {
-- 
cgit v1.2.3


From cd6ce1482fd9e691bb68c660fa918c90f6b1bc25 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Tue, 20 Jun 2017 11:15:39 -0700
Subject: block: Make request operation type argument declarations consistent

Instead of declaring the second argument of blk_*_get_request()
as int and passing it to functions that expect an unsigned int,
declare that second argument as unsigned int. Also because of
consistency, rename that second argument from 'rw' into 'op'.
This patch does not change any functionality.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: Hannes Reinecke <hare@suse.com>
Cc: Omar Sandoval <osandov@fb.com>
Cc: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c       | 13 +++++++------
 block/blk-mq.c         | 10 +++++-----
 include/linux/blk-mq.h |  6 +++---
 include/linux/blkdev.h |  3 ++-
 4 files changed, 17 insertions(+), 15 deletions(-)

(limited to 'block')

diff --git a/block/blk-core.c b/block/blk-core.c
index 279e3c432d7b..21f6f1020303 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1347,8 +1347,8 @@ retry:
 	goto retry;
 }
 
-static struct request *blk_old_get_request(struct request_queue *q, int rw,
-		gfp_t gfp_mask)
+static struct request *blk_old_get_request(struct request_queue *q,
+					   unsigned int op, gfp_t gfp_mask)
 {
 	struct request *rq;
 
@@ -1356,7 +1356,7 @@ static struct request *blk_old_get_request(struct request_queue *q, int rw,
 	create_io_context(gfp_mask, q->node);
 
 	spin_lock_irq(q->queue_lock);
-	rq = get_request(q, rw, NULL, gfp_mask);
+	rq = get_request(q, op, NULL, gfp_mask);
 	if (IS_ERR(rq)) {
 		spin_unlock_irq(q->queue_lock);
 		return rq;
@@ -1369,14 +1369,15 @@ static struct request *blk_old_get_request(struct request_queue *q, int rw,
 	return rq;
 }
 
-struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)
+struct request *blk_get_request(struct request_queue *q, unsigned int op,
+				gfp_t gfp_mask)
 {
 	if (q->mq_ops)
-		return blk_mq_alloc_request(q, rw,
+		return blk_mq_alloc_request(q, op,
 			(gfp_mask & __GFP_DIRECT_RECLAIM) ?
 				0 : BLK_MQ_REQ_NOWAIT);
 	else
-		return blk_old_get_request(q, rw, gfp_mask);
+		return blk_old_get_request(q, op, gfp_mask);
 }
 EXPORT_SYMBOL(blk_get_request);
 
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 3e0cc11b1a90..2d21fbccc3a5 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -328,7 +328,7 @@ static struct request *blk_mq_get_request(struct request_queue *q,
 	return rq;
 }
 
-struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
+struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op,
 		unsigned int flags)
 {
 	struct blk_mq_alloc_data alloc_data = { .flags = flags };
@@ -339,7 +339,7 @@ struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
 	if (ret)
 		return ERR_PTR(ret);
 
-	rq = blk_mq_get_request(q, NULL, rw, &alloc_data);
+	rq = blk_mq_get_request(q, NULL, op, &alloc_data);
 
 	blk_mq_put_ctx(alloc_data.ctx);
 	blk_queue_exit(q);
@@ -354,8 +354,8 @@ struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
 }
 EXPORT_SYMBOL(blk_mq_alloc_request);
 
-struct request *blk_mq_alloc_request_hctx(struct request_queue *q, int rw,
-		unsigned int flags, unsigned int hctx_idx)
+struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
+		unsigned int op, unsigned int flags, unsigned int hctx_idx)
 {
 	struct blk_mq_alloc_data alloc_data = { .flags = flags };
 	struct request *rq;
@@ -390,7 +390,7 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q, int rw,
 	cpu = cpumask_first(alloc_data.hctx->cpumask);
 	alloc_data.ctx = __blk_mq_get_ctx(q, cpu);
 
-	rq = blk_mq_get_request(q, NULL, rw, &alloc_data);
+	rq = blk_mq_get_request(q, NULL, op, &alloc_data);
 
 	blk_queue_exit(q);
 
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 3f2c22a42df6..3077714250ce 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -202,10 +202,10 @@ enum {
 	BLK_MQ_REQ_INTERNAL	= (1 << 2), /* allocate internal/sched tag */
 };
 
-struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
+struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op,
 		unsigned int flags);
-struct request *blk_mq_alloc_request_hctx(struct request_queue *q, int op,
-		unsigned int flags, unsigned int hctx_idx);
+struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
+		unsigned int op, unsigned int flags, unsigned int hctx_idx);
 struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag);
 
 enum {
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 0deed7274a7f..e21dd893ee86 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -935,7 +935,8 @@ extern void blk_rq_init(struct request_queue *q, struct request *rq);
 extern void blk_init_request_from_bio(struct request *req, struct bio *bio);
 extern void blk_put_request(struct request *);
 extern void __blk_put_request(struct request_queue *, struct request *);
-extern struct request *blk_get_request(struct request_queue *, int, gfp_t);
+extern struct request *blk_get_request(struct request_queue *, unsigned int op,
+				       gfp_t gfp_mask);
 extern void blk_requeue_request(struct request_queue *, struct request *);
 extern int blk_lld_busy(struct request_queue *q);
 extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
-- 
cgit v1.2.3


From d280bab305431c1836423f3cd6a5ff0e35a601ef Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Tue, 20 Jun 2017 11:15:40 -0700
Subject: block: Introduce request_queue.initialize_rq_fn()

Several block drivers need to initialize the driver-private request
data after having called blk_get_request() and before .prep_rq_fn()
is called, e.g. when submitting a REQ_OP_SCSI_* request. Avoid that
that initialization code has to be repeated after every
blk_get_request() call by adding new callback functions to struct
request_queue and to struct blk_mq_ops.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Hannes Reinecke <hare@suse.com>
Cc: Omar Sandoval <osandov@fb.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c       | 17 +++++++++++++----
 include/linux/blk-mq.h |  2 ++
 include/linux/blkdev.h |  4 ++++
 3 files changed, 19 insertions(+), 4 deletions(-)

(limited to 'block')

diff --git a/block/blk-core.c b/block/blk-core.c
index 21f6f1020303..09989028616f 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1372,12 +1372,21 @@ static struct request *blk_old_get_request(struct request_queue *q,
 struct request *blk_get_request(struct request_queue *q, unsigned int op,
 				gfp_t gfp_mask)
 {
-	if (q->mq_ops)
-		return blk_mq_alloc_request(q, op,
+	struct request *req;
+
+	if (q->mq_ops) {
+		req = blk_mq_alloc_request(q, op,
 			(gfp_mask & __GFP_DIRECT_RECLAIM) ?
 				0 : BLK_MQ_REQ_NOWAIT);
-	else
-		return blk_old_get_request(q, op, gfp_mask);
+		if (!IS_ERR(req) && q->mq_ops->initialize_rq_fn)
+			q->mq_ops->initialize_rq_fn(req);
+	} else {
+		req = blk_old_get_request(q, op, gfp_mask);
+		if (!IS_ERR(req) && q->initialize_rq_fn)
+			q->initialize_rq_fn(req);
+	}
+
+	return req;
 }
 EXPORT_SYMBOL(blk_get_request);
 
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 3077714250ce..366b83cee955 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -144,6 +144,8 @@ struct blk_mq_ops {
 	init_request_fn		*init_request;
 	exit_request_fn		*exit_request;
 	reinit_request_fn	*reinit_request;
+	/* Called from inside blk_get_request() */
+	void (*initialize_rq_fn)(struct request *rq);
 
 	map_queues_fn		*map_queues;
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index e21dd893ee86..9a36164487d0 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -410,8 +410,12 @@ struct request_queue {
 	rq_timed_out_fn		*rq_timed_out_fn;
 	dma_drain_needed_fn	*dma_drain_needed;
 	lld_busy_fn		*lld_busy_fn;
+	/* Called just after a request is allocated */
 	init_rq_fn		*init_rq_fn;
+	/* Called just before a request is freed */
 	exit_rq_fn		*exit_rq_fn;
+	/* Called from inside blk_get_request() */
+	void (*initialize_rq_fn)(struct request *rq);
 
 	const struct blk_mq_ops	*mq_ops;
 
-- 
cgit v1.2.3


From ca18d6f769d22e931d3ba1e8d1ae81953547a417 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Tue, 20 Jun 2017 11:15:41 -0700
Subject: block: Make most scsi_req_init() calls implicit

Instead of explicitly calling scsi_req_init() after blk_get_request(),
call that function from inside blk_get_request(). Add an
.initialize_rq_fn() callback function to the block drivers that need
it. Merge the IDE .init_rq_fn() function into .initialize_rq_fn()
because it is too small to keep it as a separate function. Keep the
scsi_req_init() call in ide_prep_sense() because it follows a
blk_rq_init() call.

References: commit 82ed4db499b8 ("block: split scsi_request out of struct request")
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Hannes Reinecke <hare@suse.com>
Cc: Omar Sandoval <osandov@fb.com>
Cc: Nicholas Bellinger <nab@linux-iscsi.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bsg.c                        |  1 -
 block/scsi_ioctl.c                 |  3 ---
 drivers/block/pktcdvd.c            |  1 -
 drivers/cdrom/cdrom.c              |  1 -
 drivers/ide/ide-atapi.c            |  1 -
 drivers/ide/ide-cd.c               |  1 -
 drivers/ide/ide-cd_ioctl.c         |  1 -
 drivers/ide/ide-devsets.c          |  1 -
 drivers/ide/ide-disk.c             |  1 -
 drivers/ide/ide-ioctls.c           |  2 --
 drivers/ide/ide-park.c             |  2 --
 drivers/ide/ide-pm.c               |  2 --
 drivers/ide/ide-probe.c            |  6 +++---
 drivers/ide/ide-tape.c             |  1 -
 drivers/ide/ide-taskfile.c         |  1 -
 drivers/scsi/osd/osd_initiator.c   |  2 --
 drivers/scsi/osst.c                |  1 -
 drivers/scsi/scsi_error.c          |  1 -
 drivers/scsi/scsi_lib.c            | 15 ++++++++++++++-
 drivers/scsi/scsi_transport_sas.c  |  2 ++
 drivers/scsi/sg.c                  |  2 --
 drivers/scsi/st.c                  |  1 -
 drivers/target/target_core_pscsi.c |  2 --
 fs/nfsd/blocklayout.c              |  1 -
 include/scsi/scsi_cmnd.h           |  1 +
 25 files changed, 20 insertions(+), 33 deletions(-)

(limited to 'block')

diff --git a/block/bsg.c b/block/bsg.c
index 59d02dd31b0c..37663b664666 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -236,7 +236,6 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, fmode_t has_write_perm)
 	rq = blk_get_request(q, op, GFP_KERNEL);
 	if (IS_ERR(rq))
 		return rq;
-	scsi_req_init(rq);
 
 	ret = blk_fill_sgv4_hdr_rq(q, rq, hdr, bd, has_write_perm);
 	if (ret)
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index 4a294a5f7fab..f96c51f5df40 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -326,7 +326,6 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk,
 	if (IS_ERR(rq))
 		return PTR_ERR(rq);
 	req = scsi_req(rq);
-	scsi_req_init(rq);
 
 	if (hdr->cmd_len > BLK_MAX_CDB) {
 		req->cmd = kzalloc(hdr->cmd_len, GFP_KERNEL);
@@ -456,7 +455,6 @@ int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk, fmode_t mode,
 		goto error_free_buffer;
 	}
 	req = scsi_req(rq);
-	scsi_req_init(rq);
 
 	cmdlen = COMMAND_SIZE(opcode);
 
@@ -542,7 +540,6 @@ static int __blk_send_generic(struct request_queue *q, struct gendisk *bd_disk,
 	rq = blk_get_request(q, REQ_OP_SCSI_OUT, __GFP_RECLAIM);
 	if (IS_ERR(rq))
 		return PTR_ERR(rq);
-	scsi_req_init(rq);
 	rq->timeout = BLK_DEFAULT_SG_TIMEOUT;
 	scsi_req(rq)->cmd[0] = cmd;
 	scsi_req(rq)->cmd[4] = data;
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 26c04baae967..8ef703ccc4b6 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -708,7 +708,6 @@ static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command *
 			     REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, __GFP_RECLAIM);
 	if (IS_ERR(rq))
 		return PTR_ERR(rq);
-	scsi_req_init(rq);
 
 	if (cgc->buflen) {
 		ret = blk_rq_map_kern(q, rq, cgc->buffer, cgc->buflen,
diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
index ff19cfc587f0..e36d160c458f 100644
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c
@@ -2201,7 +2201,6 @@ static int cdrom_read_cdda_bpc(struct cdrom_device_info *cdi, __u8 __user *ubuf,
 			break;
 		}
 		req = scsi_req(rq);
-		scsi_req_init(rq);
 
 		ret = blk_rq_map_user(q, rq, NULL, ubuf, len, GFP_KERNEL);
 		if (ret) {
diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index d7a49dcfa85e..37f61acf5a35 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -93,7 +93,6 @@ int ide_queue_pc_tail(ide_drive_t *drive, struct gendisk *disk,
 	int error;
 
 	rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
-	scsi_req_init(rq);
 	ide_req(rq)->type = ATA_PRIV_MISC;
 	rq->special = (char *)pc;
 
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index d55e44ed82b5..81e18f9628d0 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -438,7 +438,6 @@ int ide_cd_queue_pc(ide_drive_t *drive, const unsigned char *cmd,
 
 		rq = blk_get_request(drive->queue,
 			write ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN,  __GFP_RECLAIM);
-		scsi_req_init(rq);
 		memcpy(scsi_req(rq)->cmd, cmd, BLK_MAX_CDB);
 		ide_req(rq)->type = ATA_PRIV_PC;
 		rq->rq_flags |= rq_flags;
diff --git a/drivers/ide/ide-cd_ioctl.c b/drivers/ide/ide-cd_ioctl.c
index 55cd736c39c6..9d26c9737e21 100644
--- a/drivers/ide/ide-cd_ioctl.c
+++ b/drivers/ide/ide-cd_ioctl.c
@@ -304,7 +304,6 @@ int ide_cdrom_reset(struct cdrom_device_info *cdi)
 	int ret;
 
 	rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
-	scsi_req_init(rq);
 	ide_req(rq)->type = ATA_PRIV_MISC;
 	rq->rq_flags = RQF_QUIET;
 	blk_execute_rq(drive->queue, cd->disk, rq, 0);
diff --git a/drivers/ide/ide-devsets.c b/drivers/ide/ide-devsets.c
index 9b69c32ee560..ef7c8c43a380 100644
--- a/drivers/ide/ide-devsets.c
+++ b/drivers/ide/ide-devsets.c
@@ -166,7 +166,6 @@ int ide_devset_execute(ide_drive_t *drive, const struct ide_devset *setting,
 		return setting->set(drive, arg);
 
 	rq = blk_get_request(q, REQ_OP_DRV_IN, __GFP_RECLAIM);
-	scsi_req_init(rq);
 	ide_req(rq)->type = ATA_PRIV_MISC;
 	scsi_req(rq)->cmd_len = 5;
 	scsi_req(rq)->cmd[0] = REQ_DEVSET_EXEC;
diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c
index 7c06237f3479..241983da5fc4 100644
--- a/drivers/ide/ide-disk.c
+++ b/drivers/ide/ide-disk.c
@@ -478,7 +478,6 @@ static int set_multcount(ide_drive_t *drive, int arg)
 		return -EBUSY;
 
 	rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
-	scsi_req_init(rq);
 	ide_req(rq)->type = ATA_PRIV_TASKFILE;
 
 	drive->mult_req = arg;
diff --git a/drivers/ide/ide-ioctls.c b/drivers/ide/ide-ioctls.c
index 8c0d17297a7a..3661abb16a5f 100644
--- a/drivers/ide/ide-ioctls.c
+++ b/drivers/ide/ide-ioctls.c
@@ -126,7 +126,6 @@ static int ide_cmd_ioctl(ide_drive_t *drive, unsigned long arg)
 		struct request *rq;
 
 		rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
-		scsi_req_init(rq);
 		ide_req(rq)->type = ATA_PRIV_TASKFILE;
 		blk_execute_rq(drive->queue, NULL, rq, 0);
 		err = scsi_req(rq)->result ? -EIO : 0;
@@ -224,7 +223,6 @@ static int generic_drive_reset(ide_drive_t *drive)
 	int ret = 0;
 
 	rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
-	scsi_req_init(rq);
 	ide_req(rq)->type = ATA_PRIV_MISC;
 	scsi_req(rq)->cmd_len = 1;
 	scsi_req(rq)->cmd[0] = REQ_DRIVE_RESET;
diff --git a/drivers/ide/ide-park.c b/drivers/ide/ide-park.c
index 94e3107f59b9..1f264d5d3f3f 100644
--- a/drivers/ide/ide-park.c
+++ b/drivers/ide/ide-park.c
@@ -32,7 +32,6 @@ static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout)
 	spin_unlock_irq(&hwif->lock);
 
 	rq = blk_get_request(q, REQ_OP_DRV_IN, __GFP_RECLAIM);
-	scsi_req_init(rq);
 	scsi_req(rq)->cmd[0] = REQ_PARK_HEADS;
 	scsi_req(rq)->cmd_len = 1;
 	ide_req(rq)->type = ATA_PRIV_MISC;
@@ -48,7 +47,6 @@ static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout)
 	 * timeout has expired, so power management will be reenabled.
 	 */
 	rq = blk_get_request(q, REQ_OP_DRV_IN, GFP_NOWAIT);
-	scsi_req_init(rq);
 	if (IS_ERR(rq))
 		goto out;
 
diff --git a/drivers/ide/ide-pm.c b/drivers/ide/ide-pm.c
index 08b54bb3b705..544f02d673ca 100644
--- a/drivers/ide/ide-pm.c
+++ b/drivers/ide/ide-pm.c
@@ -19,7 +19,6 @@ int generic_ide_suspend(struct device *dev, pm_message_t mesg)
 
 	memset(&rqpm, 0, sizeof(rqpm));
 	rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
-	scsi_req_init(rq);
 	ide_req(rq)->type = ATA_PRIV_PM_SUSPEND;
 	rq->special = &rqpm;
 	rqpm.pm_step = IDE_PM_START_SUSPEND;
@@ -91,7 +90,6 @@ int generic_ide_resume(struct device *dev)
 
 	memset(&rqpm, 0, sizeof(rqpm));
 	rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
-	scsi_req_init(rq);
 	ide_req(rq)->type = ATA_PRIV_PM_RESUME;
 	rq->rq_flags |= RQF_PREEMPT;
 	rq->special = &rqpm;
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index b3f85250dea9..c60e5ffc9231 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -741,12 +741,12 @@ static void ide_port_tune_devices(ide_hwif_t *hwif)
 	}
 }
 
-static int ide_init_rq(struct request_queue *q, struct request *rq, gfp_t gfp)
+static void ide_initialize_rq(struct request *rq)
 {
 	struct ide_request *req = blk_mq_rq_to_pdu(rq);
 
+	scsi_req_init(rq);
 	req->sreq.sense = req->sense;
-	return 0;
 }
 
 /*
@@ -771,7 +771,7 @@ static int ide_init_queue(ide_drive_t *drive)
 		return 1;
 
 	q->request_fn = do_ide_request;
-	q->init_rq_fn = ide_init_rq;
+	q->initialize_rq_fn = ide_initialize_rq;
 	q->cmd_size = sizeof(struct ide_request);
 	queue_flag_set_unlocked(QUEUE_FLAG_SCSI_PASSTHROUGH, q);
 	if (blk_init_allocated_queue(q) < 0) {
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index 4d062c568777..fd57e8ccc47a 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -855,7 +855,6 @@ static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int size)
 	BUG_ON(size < 0 || size % tape->blk_size);
 
 	rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
-	scsi_req_init(rq);
 	ide_req(rq)->type = ATA_PRIV_MISC;
 	scsi_req(rq)->cmd[13] = cmd;
 	rq->rq_disk = tape->disk;
diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c
index ab1a32cdcb0a..4efe4c6e956c 100644
--- a/drivers/ide/ide-taskfile.c
+++ b/drivers/ide/ide-taskfile.c
@@ -433,7 +433,6 @@ int ide_raw_taskfile(ide_drive_t *drive, struct ide_cmd *cmd, u8 *buf,
 	rq = blk_get_request(drive->queue,
 		(cmd->tf_flags & IDE_TFLAG_WRITE) ?
 			REQ_OP_DRV_OUT : REQ_OP_DRV_IN, __GFP_RECLAIM);
-	scsi_req_init(rq);
 	ide_req(rq)->type = ATA_PRIV_TASKFILE;
 
 	/*
diff --git a/drivers/scsi/osd/osd_initiator.c b/drivers/scsi/osd/osd_initiator.c
index 1e69a43b279d..ca45bf6d2bdb 100644
--- a/drivers/scsi/osd/osd_initiator.c
+++ b/drivers/scsi/osd/osd_initiator.c
@@ -1574,7 +1574,6 @@ static struct request *_make_request(struct request_queue *q, bool has_write,
 			flags);
 	if (IS_ERR(req))
 		return req;
-	scsi_req_init(req);
 
 	for_each_bio(bio) {
 		struct bio *bounce_bio = bio;
@@ -1619,7 +1618,6 @@ static int _init_blk_request(struct osd_request *or,
 				ret = PTR_ERR(req);
 				goto out;
 			}
-			scsi_req_init(req);
 			or->in.req = or->request->next_rq = req;
 		}
 	} else if (has_in)
diff --git a/drivers/scsi/osst.c b/drivers/scsi/osst.c
index d54689c9216e..929ee7e88120 100644
--- a/drivers/scsi/osst.c
+++ b/drivers/scsi/osst.c
@@ -373,7 +373,6 @@ static int osst_execute(struct osst_request *SRpnt, const unsigned char *cmd,
 		return DRIVER_ERROR << 24;
 
 	rq = scsi_req(req);
-	scsi_req_init(req);
 	req->rq_flags |= RQF_QUIET;
 
 	SRpnt->bio = NULL;
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index 44904f41924c..304a7158540f 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -1903,7 +1903,6 @@ static void scsi_eh_lock_door(struct scsi_device *sdev)
 	if (IS_ERR(req))
 		return;
 	rq = scsi_req(req);
-	scsi_req_init(req);
 
 	rq->cmd[0] = ALLOW_MEDIUM_REMOVAL;
 	rq->cmd[1] = 0;
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index fb18ed284e55..301a7f706c9a 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -250,7 +250,6 @@ int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
 	if (IS_ERR(req))
 		return ret;
 	rq = scsi_req(req);
-	scsi_req_init(req);
 
 	if (bufflen &&	blk_rq_map_kern(sdev->request_queue, req,
 					buffer, bufflen, __GFP_RECLAIM))
@@ -1117,6 +1116,18 @@ err_exit:
 }
 EXPORT_SYMBOL(scsi_init_io);
 
+/**
+ * scsi_initialize_rq - initialize struct scsi_cmnd.req
+ *
+ * Called from inside blk_get_request().
+ */
+void scsi_initialize_rq(struct request *rq)
+{
+	scsi_req_init(rq);
+}
+EXPORT_SYMBOL(scsi_initialize_rq);
+
+/* Called after a request has been started. */
 void scsi_init_command(struct scsi_device *dev, struct scsi_cmnd *cmd)
 {
 	void *buf = cmd->sense_buffer;
@@ -2124,6 +2135,7 @@ struct request_queue *scsi_alloc_queue(struct scsi_device *sdev)
 	q->request_fn = scsi_request_fn;
 	q->init_rq_fn = scsi_init_rq;
 	q->exit_rq_fn = scsi_exit_rq;
+	q->initialize_rq_fn = scsi_initialize_rq;
 
 	if (blk_init_allocated_queue(q) < 0) {
 		blk_cleanup_queue(q);
@@ -2148,6 +2160,7 @@ static const struct blk_mq_ops scsi_mq_ops = {
 #endif
 	.init_request	= scsi_init_request,
 	.exit_request	= scsi_exit_request,
+	.initialize_rq_fn = scsi_initialize_rq,
 	.map_queues	= scsi_map_queues,
 };
 
diff --git a/drivers/scsi/scsi_transport_sas.c b/drivers/scsi/scsi_transport_sas.c
index cc970c811bcb..a190c052cd93 100644
--- a/drivers/scsi/scsi_transport_sas.c
+++ b/drivers/scsi/scsi_transport_sas.c
@@ -33,6 +33,7 @@
 #include <linux/bsg.h>
 
 #include <scsi/scsi.h>
+#include <scsi/scsi_cmnd.h>
 #include <scsi/scsi_request.h>
 #include <scsi/scsi_device.h>
 #include <scsi/scsi_host.h>
@@ -230,6 +231,7 @@ static int sas_bsg_initialize(struct Scsi_Host *shost, struct sas_rphy *rphy)
 	q = blk_alloc_queue(GFP_KERNEL);
 	if (!q)
 		return -ENOMEM;
+	q->initialize_rq_fn = scsi_initialize_rq;
 	q->cmd_size = sizeof(struct scsi_request);
 
 	if (rphy) {
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index f3387c6089c5..21225d62b0c1 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -1732,8 +1732,6 @@ sg_start_req(Sg_request *srp, unsigned char *cmd)
 	}
 	req = scsi_req(rq);
 
-	scsi_req_init(rq);
-
 	if (hp->cmd_len > BLK_MAX_CDB)
 		req->cmd = long_cmdp;
 	memcpy(req->cmd, cmd, hp->cmd_len);
diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c
index 6b1c4ac54e66..8e5013d9cad4 100644
--- a/drivers/scsi/st.c
+++ b/drivers/scsi/st.c
@@ -549,7 +549,6 @@ static int st_scsi_execute(struct st_request *SRpnt, const unsigned char *cmd,
 	if (IS_ERR(req))
 		return DRIVER_ERROR << 24;
 	rq = scsi_req(req);
-	scsi_req_init(req);
 	req->rq_flags |= RQF_QUIET;
 
 	mdata->null_mapped = 1;
diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c
index 323ab47645d0..ceec0211e84e 100644
--- a/drivers/target/target_core_pscsi.c
+++ b/drivers/target/target_core_pscsi.c
@@ -992,8 +992,6 @@ pscsi_execute_cmd(struct se_cmd *cmd)
 		goto fail;
 	}
 
-	scsi_req_init(req);
-
 	if (sgl) {
 		ret = pscsi_map_sg(cmd, sgl, sgl_nents, req);
 		if (ret)
diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
index 47ed19c53f2e..c862c2489df0 100644
--- a/fs/nfsd/blocklayout.c
+++ b/fs/nfsd/blocklayout.c
@@ -232,7 +232,6 @@ static int nfsd4_scsi_identify_device(struct block_device *bdev,
 		goto out_free_buf;
 	}
 	req = scsi_req(rq);
-	scsi_req_init(rq);
 
 	error = blk_rq_map_kern(q, rq, buf, bufflen, GFP_KERNEL);
 	if (error)
diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h
index b379f93a2c48..da9bf2bcdf1a 100644
--- a/include/scsi/scsi_cmnd.h
+++ b/include/scsi/scsi_cmnd.h
@@ -166,6 +166,7 @@ extern void *scsi_kmap_atomic_sg(struct scatterlist *sg, int sg_count,
 extern void scsi_kunmap_atomic_sg(void *virt);
 
 extern int scsi_init_io(struct scsi_cmnd *cmd);
+extern void scsi_initialize_rq(struct request *rq);
 
 extern int scsi_dma_map(struct scsi_cmnd *cmd);
 extern void scsi_dma_unmap(struct scsi_cmnd *cmd);
-- 
cgit v1.2.3


From c8d9cf22cf0f89d1249a57ade5a1949c62075ce6 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Tue, 20 Jun 2017 11:15:42 -0700
Subject: block: Change argument type of scsi_req_init()

Since scsi_req_init() works on a struct scsi_request, change the
argument type into struct scsi_request *.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/scsi_ioctl.c          | 10 +++++++---
 drivers/ide/ide-atapi.c     |  2 +-
 drivers/ide/ide-probe.c     |  2 +-
 drivers/scsi/scsi_lib.c     |  4 +++-
 include/scsi/scsi_request.h |  2 +-
 5 files changed, 13 insertions(+), 7 deletions(-)

(limited to 'block')

diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index f96c51f5df40..7440de44dd85 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -741,10 +741,14 @@ int scsi_cmd_blk_ioctl(struct block_device *bd, fmode_t mode,
 }
 EXPORT_SYMBOL(scsi_cmd_blk_ioctl);
 
-void scsi_req_init(struct request *rq)
+/**
+ * scsi_req_init - initialize certain fields of a scsi_request structure
+ * @req: Pointer to a scsi_request structure.
+ * Initializes .__cmd[], .cmd, .cmd_len and .sense_len but no other members
+ * of struct scsi_request.
+ */
+void scsi_req_init(struct scsi_request *req)
 {
-	struct scsi_request *req = scsi_req(rq);
-
 	memset(req->__cmd, 0, sizeof(req->__cmd));
 	req->cmd = req->__cmd;
 	req->cmd_len = BLK_MAX_CDB;
diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index 37f61acf5a35..14d1e7d9a1d6 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -199,7 +199,7 @@ void ide_prep_sense(ide_drive_t *drive, struct request *rq)
 	memset(sense, 0, sizeof(*sense));
 
 	blk_rq_init(rq->q, sense_rq);
-	scsi_req_init(sense_rq);
+	scsi_req_init(req);
 
 	err = blk_rq_map_kern(drive->queue, sense_rq, sense, sense_len,
 			      GFP_NOIO);
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index c60e5ffc9231..01b2adfd8226 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -745,7 +745,7 @@ static void ide_initialize_rq(struct request *rq)
 {
 	struct ide_request *req = blk_mq_rq_to_pdu(rq);
 
-	scsi_req_init(rq);
+	scsi_req_init(&req->sreq);
 	req->sreq.sense = req->sense;
 }
 
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 301a7f706c9a..550e29f903b7 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1123,7 +1123,9 @@ EXPORT_SYMBOL(scsi_init_io);
  */
 void scsi_initialize_rq(struct request *rq)
 {
-	scsi_req_init(rq);
+	struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq);
+
+	scsi_req_init(&cmd->req);
 }
 EXPORT_SYMBOL(scsi_initialize_rq);
 
diff --git a/include/scsi/scsi_request.h b/include/scsi/scsi_request.h
index f0c76f9dc285..e0afa445ee4e 100644
--- a/include/scsi/scsi_request.h
+++ b/include/scsi/scsi_request.h
@@ -27,6 +27,6 @@ static inline void scsi_req_free_cmd(struct scsi_request *req)
 		kfree(req->cmd);
 }
 
-void scsi_req_init(struct request *);
+void scsi_req_init(struct scsi_request *req);
 
 #endif /* _SCSI_SCSI_REQUEST_H */
-- 
cgit v1.2.3


From c3a148d20affcc334348402865169a61000d3905 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Tue, 20 Jun 2017 11:15:43 -0700
Subject: blk-mq: Initialize .rq_flags in blk_mq_rq_ctx_init()

Initialization of blk-mq requests is a bit weird: blk_mq_rq_ctx_init()
is called after a value has been assigned to .rq_flags and .rq_flags
is initialized in __blk_mq_finish_request(). Initialize .rq_flags in
blk_mq_rq_ctx_init() instead of relying on __blk_mq_finish_request().
Moving the initialization of .rq_flags is fine because all changes
and tests of .rq_flags occur between blk_get_request() and finishing
a request.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Hannes Reinecke <hare@suse.com>
Cc: Omar Sandoval <osandov@fb.com>
Cc: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'block')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 2d21fbccc3a5..6268380c680f 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -228,6 +228,8 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
 	struct blk_mq_tags *tags = blk_mq_tags_from_data(data);
 	struct request *rq = tags->static_rqs[tag];
 
+	rq->rq_flags = 0;
+
 	if (data->flags & BLK_MQ_REQ_INTERNAL) {
 		rq->tag = -1;
 		rq->internal_tag = tag;
@@ -423,7 +425,6 @@ void blk_mq_free_request(struct request *rq)
 		atomic_dec(&hctx->nr_active);
 
 	wbt_done(q->rq_wb, &rq->issue_stat);
-	rq->rq_flags = 0;
 
 	clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags);
 	clear_bit(REQ_ATOM_POLL_SLEPT, &rq->atomic_flags);
-- 
cgit v1.2.3


From 2fff8a924d4c614b5a17b2a236a2cf09aa51af5f Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Tue, 20 Jun 2017 11:15:45 -0700
Subject: block: Check locking assumptions at runtime

Instead of documenting the locking assumptions of most block layer
functions as a comment, use lockdep_assert_held() to verify locking
assumptions at runtime.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: Hannes Reinecke <hare@suse.com>
Cc: Omar Sandoval <osandov@fb.com>
Cc: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c    | 71 +++++++++++++++++++++++++++++++++++------------------
 block/blk-flush.c   |  8 +++---
 block/blk-merge.c   |  3 +++
 block/blk-tag.c     | 15 +++++------
 block/blk-timeout.c |  4 ++-
 5 files changed, 64 insertions(+), 37 deletions(-)

(limited to 'block')

diff --git a/block/blk-core.c b/block/blk-core.c
index 09989028616f..5f87788249ce 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -236,10 +236,12 @@ static void blk_delay_work(struct work_struct *work)
  * Description:
  *   Sometimes queueing needs to be postponed for a little while, to allow
  *   resources to come back. This function will make sure that queueing is
- *   restarted around the specified time. Queue lock must be held.
+ *   restarted around the specified time.
  */
 void blk_delay_queue(struct request_queue *q, unsigned long msecs)
 {
+	lockdep_assert_held(q->queue_lock);
+
 	if (likely(!blk_queue_dead(q)))
 		queue_delayed_work(kblockd_workqueue, &q->delay_work,
 				   msecs_to_jiffies(msecs));
@@ -257,6 +259,8 @@ EXPORT_SYMBOL(blk_delay_queue);
  **/
 void blk_start_queue_async(struct request_queue *q)
 {
+	lockdep_assert_held(q->queue_lock);
+
 	queue_flag_clear(QUEUE_FLAG_STOPPED, q);
 	blk_run_queue_async(q);
 }
@@ -269,10 +273,11 @@ EXPORT_SYMBOL(blk_start_queue_async);
  * Description:
  *   blk_start_queue() will clear the stop flag on the queue, and call
  *   the request_fn for the queue if it was in a stopped state when
- *   entered. Also see blk_stop_queue(). Queue lock must be held.
+ *   entered. Also see blk_stop_queue().
  **/
 void blk_start_queue(struct request_queue *q)
 {
+	lockdep_assert_held(q->queue_lock);
 	WARN_ON(!irqs_disabled());
 
 	queue_flag_clear(QUEUE_FLAG_STOPPED, q);
@@ -292,10 +297,12 @@ EXPORT_SYMBOL(blk_start_queue);
  *   or if it simply chooses not to queue more I/O at one point, it can
  *   call this function to prevent the request_fn from being called until
  *   the driver has signalled it's ready to go again. This happens by calling
- *   blk_start_queue() to restart queue operations. Queue lock must be held.
+ *   blk_start_queue() to restart queue operations.
  **/
 void blk_stop_queue(struct request_queue *q)
 {
+	lockdep_assert_held(q->queue_lock);
+
 	cancel_delayed_work(&q->delay_work);
 	queue_flag_set(QUEUE_FLAG_STOPPED, q);
 }
@@ -348,6 +355,8 @@ EXPORT_SYMBOL(blk_sync_queue);
  */
 inline void __blk_run_queue_uncond(struct request_queue *q)
 {
+	lockdep_assert_held(q->queue_lock);
+
 	if (unlikely(blk_queue_dead(q)))
 		return;
 
@@ -369,11 +378,12 @@ EXPORT_SYMBOL_GPL(__blk_run_queue_uncond);
  * @q:	The queue to run
  *
  * Description:
- *    See @blk_run_queue. This variant must be called with the queue lock
- *    held and interrupts disabled.
+ *    See @blk_run_queue.
  */
 void __blk_run_queue(struct request_queue *q)
 {
+	lockdep_assert_held(q->queue_lock);
+
 	if (unlikely(blk_queue_stopped(q)))
 		return;
 
@@ -387,10 +397,17 @@ EXPORT_SYMBOL(__blk_run_queue);
  *
  * Description:
  *    Tells kblockd to perform the equivalent of @blk_run_queue on behalf
- *    of us. The caller must hold the queue lock.
+ *    of us.
+ *
+ * Note:
+ *    Since it is not allowed to run q->delay_work after blk_cleanup_queue()
+ *    has canceled q->delay_work, callers must hold the queue lock to avoid
+ *    race conditions between blk_cleanup_queue() and blk_run_queue_async().
  */
 void blk_run_queue_async(struct request_queue *q)
 {
+	lockdep_assert_held(q->queue_lock);
+
 	if (likely(!blk_queue_stopped(q) && !blk_queue_dead(q)))
 		mod_delayed_work(kblockd_workqueue, &q->delay_work, 0);
 }
@@ -1136,6 +1153,8 @@ static struct request *__get_request(struct request_list *rl, unsigned int op,
 	int may_queue;
 	req_flags_t rq_flags = RQF_ALLOCED;
 
+	lockdep_assert_held(q->queue_lock);
+
 	if (unlikely(blk_queue_dying(q)))
 		return ERR_PTR(-ENODEV);
 
@@ -1309,6 +1328,8 @@ static struct request *get_request(struct request_queue *q, unsigned int op,
 	struct request_list *rl;
 	struct request *rq;
 
+	lockdep_assert_held(q->queue_lock);
+
 	rl = blk_get_rl(q, bio);	/* transferred to @rq on success */
 retry:
 	rq = __get_request(rl, op, bio, gfp_mask);
@@ -1402,6 +1423,8 @@ EXPORT_SYMBOL(blk_get_request);
  */
 void blk_requeue_request(struct request_queue *q, struct request *rq)
 {
+	lockdep_assert_held(q->queue_lock);
+
 	blk_delete_timer(rq);
 	blk_clear_rq_complete(rq);
 	trace_block_rq_requeue(q, rq);
@@ -1476,9 +1499,6 @@ static void blk_pm_put_request(struct request *rq)
 static inline void blk_pm_put_request(struct request *rq) {}
 #endif
 
-/*
- * queue lock must be held
- */
 void __blk_put_request(struct request_queue *q, struct request *req)
 {
 	req_flags_t rq_flags = req->rq_flags;
@@ -1491,6 +1511,8 @@ void __blk_put_request(struct request_queue *q, struct request *req)
 		return;
 	}
 
+	lockdep_assert_held(q->queue_lock);
+
 	blk_pm_put_request(req);
 
 	elv_completed_request(q, req);
@@ -2327,9 +2349,6 @@ EXPORT_SYMBOL_GPL(blk_insert_cloned_request);
  *
  * Return:
  *     The number of bytes to fail.
- *
- * Context:
- *     queue_lock must be held.
  */
 unsigned int blk_rq_err_bytes(const struct request *rq)
 {
@@ -2469,15 +2488,14 @@ void blk_account_io_start(struct request *rq, bool new_io)
  * Return:
  *     Pointer to the request at the top of @q if available.  Null
  *     otherwise.
- *
- * Context:
- *     queue_lock must be held.
  */
 struct request *blk_peek_request(struct request_queue *q)
 {
 	struct request *rq;
 	int ret;
 
+	lockdep_assert_held(q->queue_lock);
+
 	while ((rq = __elv_next_request(q)) != NULL) {
 
 		rq = blk_pm_peek_request(q, rq);
@@ -2593,12 +2611,11 @@ void blk_dequeue_request(struct request *rq)
  *
  *     Block internal functions which don't want to start timer should
  *     call blk_dequeue_request().
- *
- * Context:
- *     queue_lock must be held.
  */
 void blk_start_request(struct request *req)
 {
+	lockdep_assert_held(req->q->queue_lock);
+
 	blk_dequeue_request(req);
 
 	if (test_bit(QUEUE_FLAG_STATS, &req->q->queue_flags)) {
@@ -2623,14 +2640,13 @@ EXPORT_SYMBOL(blk_start_request);
  * Return:
  *     Pointer to the request at the top of @q if available.  Null
  *     otherwise.
- *
- * Context:
- *     queue_lock must be held.
  */
 struct request *blk_fetch_request(struct request_queue *q)
 {
 	struct request *rq;
 
+	lockdep_assert_held(q->queue_lock);
+
 	rq = blk_peek_request(q);
 	if (rq)
 		blk_start_request(rq);
@@ -2776,13 +2792,12 @@ void blk_unprep_request(struct request *req)
 }
 EXPORT_SYMBOL_GPL(blk_unprep_request);
 
-/*
- * queue lock must be held
- */
 void blk_finish_request(struct request *req, blk_status_t error)
 {
 	struct request_queue *q = req->q;
 
+	lockdep_assert_held(req->q->queue_lock);
+
 	if (req->rq_flags & RQF_STATS)
 		blk_stat_add(req);
 
@@ -2864,6 +2879,8 @@ static bool blk_end_bidi_request(struct request *rq, blk_status_t error,
 static bool __blk_end_bidi_request(struct request *rq, blk_status_t error,
 				   unsigned int nr_bytes, unsigned int bidi_bytes)
 {
+	lockdep_assert_held(rq->q->queue_lock);
+
 	if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))
 		return true;
 
@@ -2930,6 +2947,8 @@ EXPORT_SYMBOL(blk_end_request_all);
 bool __blk_end_request(struct request *rq, blk_status_t error,
 		unsigned int nr_bytes)
 {
+	lockdep_assert_held(rq->q->queue_lock);
+
 	return __blk_end_bidi_request(rq, error, nr_bytes, 0);
 }
 EXPORT_SYMBOL(__blk_end_request);
@@ -2947,6 +2966,8 @@ void __blk_end_request_all(struct request *rq, blk_status_t error)
 	bool pending;
 	unsigned int bidi_bytes = 0;
 
+	lockdep_assert_held(rq->q->queue_lock);
+
 	if (unlikely(blk_bidi_rq(rq)))
 		bidi_bytes = blk_rq_bytes(rq->next_rq);
 
@@ -3211,6 +3232,8 @@ static void queue_unplugged(struct request_queue *q, unsigned int depth,
 			    bool from_schedule)
 	__releases(q->queue_lock)
 {
+	lockdep_assert_held(q->queue_lock);
+
 	trace_block_unplug(q, depth, !from_schedule);
 
 	if (from_schedule)
diff --git a/block/blk-flush.c b/block/blk-flush.c
index a572b47fa059..ed5fe322abba 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -346,6 +346,8 @@ static void flush_data_end_io(struct request *rq, blk_status_t error)
 	struct request_queue *q = rq->q;
 	struct blk_flush_queue *fq = blk_get_flush_queue(q, NULL);
 
+	lockdep_assert_held(q->queue_lock);
+
 	/*
 	 * Updating q->in_flight[] here for making this tag usable
 	 * early. Because in blk_queue_start_tag(),
@@ -411,9 +413,6 @@ static void mq_flush_data_end_io(struct request *rq, blk_status_t error)
  * or __blk_mq_run_hw_queue() to dispatch request.
  * @rq is being submitted.  Analyze what needs to be done and put it on the
  * right queue.
- *
- * CONTEXT:
- * spin_lock_irq(q->queue_lock) in !mq case
  */
 void blk_insert_flush(struct request *rq)
 {
@@ -422,6 +421,9 @@ void blk_insert_flush(struct request *rq)
 	unsigned int policy = blk_flush_policy(fflags, rq);
 	struct blk_flush_queue *fq = blk_get_flush_queue(q, rq->mq_ctx);
 
+	if (!q->mq_ops)
+		lockdep_assert_held(q->queue_lock);
+
 	/*
 	 * @policy now records what operations need to be done.  Adjust
 	 * REQ_PREFLUSH and FUA for the driver.
diff --git a/block/blk-merge.c b/block/blk-merge.c
index cea544ec5d96..5df13041b851 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -648,6 +648,9 @@ static void blk_account_io_merge(struct request *req)
 static struct request *attempt_merge(struct request_queue *q,
 				     struct request *req, struct request *next)
 {
+	if (!q->mq_ops)
+		lockdep_assert_held(q->queue_lock);
+
 	if (!rq_mergeable(req) || !rq_mergeable(next))
 		return NULL;
 
diff --git a/block/blk-tag.c b/block/blk-tag.c
index 07cc329fa4b0..2290f65b9d73 100644
--- a/block/blk-tag.c
+++ b/block/blk-tag.c
@@ -258,15 +258,14 @@ EXPORT_SYMBOL(blk_queue_resize_tags);
  *    all transfers have been done for a request. It's important to call
  *    this function before end_that_request_last(), as that will put the
  *    request back on the free list thus corrupting the internal tag list.
- *
- *  Notes:
- *   queue lock must be held.
  **/
 void blk_queue_end_tag(struct request_queue *q, struct request *rq)
 {
 	struct blk_queue_tag *bqt = q->queue_tags;
 	unsigned tag = rq->tag; /* negative tags invalid */
 
+	lockdep_assert_held(q->queue_lock);
+
 	BUG_ON(tag >= bqt->real_max_depth);
 
 	list_del_init(&rq->queuelist);
@@ -307,9 +306,6 @@ EXPORT_SYMBOL(blk_queue_end_tag);
  *    calling this function.  The request will also be removed from
  *    the request queue, so it's the drivers responsibility to readd
  *    it if it should need to be restarted for some reason.
- *
- *  Notes:
- *   queue lock must be held.
  **/
 int blk_queue_start_tag(struct request_queue *q, struct request *rq)
 {
@@ -317,6 +313,8 @@ int blk_queue_start_tag(struct request_queue *q, struct request *rq)
 	unsigned max_depth;
 	int tag;
 
+	lockdep_assert_held(q->queue_lock);
+
 	if (unlikely((rq->rq_flags & RQF_QUEUED))) {
 		printk(KERN_ERR
 		       "%s: request %p for device [%s] already tagged %d",
@@ -389,14 +387,13 @@ EXPORT_SYMBOL(blk_queue_start_tag);
  *   Hardware conditions may dictate a need to stop all pending requests.
  *   In this case, we will safely clear the block side of the tag queue and
  *   readd all requests to the request queue in the right order.
- *
- *  Notes:
- *   queue lock must be held.
  **/
 void blk_queue_invalidate_tags(struct request_queue *q)
 {
 	struct list_head *tmp, *n;
 
+	lockdep_assert_held(q->queue_lock);
+
 	list_for_each_safe(tmp, n, &q->tag_busy_list)
 		blk_requeue_request(q, list_entry_rq(tmp));
 }
diff --git a/block/blk-timeout.c b/block/blk-timeout.c
index cbff183f3d9f..17ec83bb0900 100644
--- a/block/blk-timeout.c
+++ b/block/blk-timeout.c
@@ -189,13 +189,15 @@ unsigned long blk_rq_timeout(unsigned long timeout)
  * Notes:
  *    Each request has its own timer, and as it is added to the queue, we
  *    set up the timer. When the request completes, we cancel the timer.
- *    Queue lock must be held for the non-mq case, mq case doesn't care.
  */
 void blk_add_timer(struct request *req)
 {
 	struct request_queue *q = req->q;
 	unsigned long expiry;
 
+	if (!q->mq_ops)
+		lockdep_assert_held(q->queue_lock);
+
 	/* blk-mq has its own handler, so we don't need ->rq_timed_out_fn */
 	if (!q->mq_ops && !q->rq_timed_out_fn)
 		return;
-- 
cgit v1.2.3


From 332ebbf7f9efb31ffc363b99da548963ee3fd66d Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Tue, 20 Jun 2017 11:15:46 -0700
Subject: block: Document what queue type each function is intended for

Some functions in block/blk-core.c must only be used on blk-sq queues
while others are safe to use against any queue type. Document which
functions are intended for blk-sq queues and issue a warning if the
blk-sq API is misused. This does not only help block driver authors
but will also make it easier to remove the blk-sq code once that code
is declared obsolete.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: Hannes Reinecke <hare@suse.com>
Cc: Omar Sandoval <osandov@fb.com>
Cc: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c | 33 +++++++++++++++++++++++++++++++++
 block/blk.h      |  2 ++
 2 files changed, 35 insertions(+)

(limited to 'block')

diff --git a/block/blk-core.c b/block/blk-core.c
index 5f87788249ce..2e02314ea331 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -241,6 +241,7 @@ static void blk_delay_work(struct work_struct *work)
 void blk_delay_queue(struct request_queue *q, unsigned long msecs)
 {
 	lockdep_assert_held(q->queue_lock);
+	WARN_ON_ONCE(q->mq_ops);
 
 	if (likely(!blk_queue_dead(q)))
 		queue_delayed_work(kblockd_workqueue, &q->delay_work,
@@ -260,6 +261,7 @@ EXPORT_SYMBOL(blk_delay_queue);
 void blk_start_queue_async(struct request_queue *q)
 {
 	lockdep_assert_held(q->queue_lock);
+	WARN_ON_ONCE(q->mq_ops);
 
 	queue_flag_clear(QUEUE_FLAG_STOPPED, q);
 	blk_run_queue_async(q);
@@ -279,6 +281,7 @@ void blk_start_queue(struct request_queue *q)
 {
 	lockdep_assert_held(q->queue_lock);
 	WARN_ON(!irqs_disabled());
+	WARN_ON_ONCE(q->mq_ops);
 
 	queue_flag_clear(QUEUE_FLAG_STOPPED, q);
 	__blk_run_queue(q);
@@ -302,6 +305,7 @@ EXPORT_SYMBOL(blk_start_queue);
 void blk_stop_queue(struct request_queue *q)
 {
 	lockdep_assert_held(q->queue_lock);
+	WARN_ON_ONCE(q->mq_ops);
 
 	cancel_delayed_work(&q->delay_work);
 	queue_flag_set(QUEUE_FLAG_STOPPED, q);
@@ -356,6 +360,7 @@ EXPORT_SYMBOL(blk_sync_queue);
 inline void __blk_run_queue_uncond(struct request_queue *q)
 {
 	lockdep_assert_held(q->queue_lock);
+	WARN_ON_ONCE(q->mq_ops);
 
 	if (unlikely(blk_queue_dead(q)))
 		return;
@@ -383,6 +388,7 @@ EXPORT_SYMBOL_GPL(__blk_run_queue_uncond);
 void __blk_run_queue(struct request_queue *q)
 {
 	lockdep_assert_held(q->queue_lock);
+	WARN_ON_ONCE(q->mq_ops);
 
 	if (unlikely(blk_queue_stopped(q)))
 		return;
@@ -407,6 +413,7 @@ EXPORT_SYMBOL(__blk_run_queue);
 void blk_run_queue_async(struct request_queue *q)
 {
 	lockdep_assert_held(q->queue_lock);
+	WARN_ON_ONCE(q->mq_ops);
 
 	if (likely(!blk_queue_stopped(q) && !blk_queue_dead(q)))
 		mod_delayed_work(kblockd_workqueue, &q->delay_work, 0);
@@ -425,6 +432,8 @@ void blk_run_queue(struct request_queue *q)
 {
 	unsigned long flags;
 
+	WARN_ON_ONCE(q->mq_ops);
+
 	spin_lock_irqsave(q->queue_lock, flags);
 	__blk_run_queue(q);
 	spin_unlock_irqrestore(q->queue_lock, flags);
@@ -453,6 +462,7 @@ static void __blk_drain_queue(struct request_queue *q, bool drain_all)
 	int i;
 
 	lockdep_assert_held(q->queue_lock);
+	WARN_ON_ONCE(q->mq_ops);
 
 	while (true) {
 		bool drain = false;
@@ -531,6 +541,8 @@ static void __blk_drain_queue(struct request_queue *q, bool drain_all)
  */
 void blk_queue_bypass_start(struct request_queue *q)
 {
+	WARN_ON_ONCE(q->mq_ops);
+
 	spin_lock_irq(q->queue_lock);
 	q->bypass_depth++;
 	queue_flag_set(QUEUE_FLAG_BYPASS, q);
@@ -557,6 +569,9 @@ EXPORT_SYMBOL_GPL(blk_queue_bypass_start);
  * @q: queue of interest
  *
  * Leave bypass mode and restore the normal queueing behavior.
+ *
+ * Note: although blk_queue_bypass_start() is only called for blk-sq queues,
+ * this function is called for both blk-sq and blk-mq queues.
  */
 void blk_queue_bypass_end(struct request_queue *q)
 {
@@ -954,6 +969,8 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio);
 
 int blk_init_allocated_queue(struct request_queue *q)
 {
+	WARN_ON_ONCE(q->mq_ops);
+
 	q->fq = blk_alloc_flush_queue(q, NUMA_NO_NODE, q->cmd_size);
 	if (!q->fq)
 		return -ENOMEM;
@@ -1091,6 +1108,8 @@ int blk_update_nr_requests(struct request_queue *q, unsigned int nr)
 	struct request_list *rl;
 	int on_thresh, off_thresh;
 
+	WARN_ON_ONCE(q->mq_ops);
+
 	spin_lock_irq(q->queue_lock);
 	q->nr_requests = nr;
 	blk_queue_congestion_threshold(q);
@@ -1329,6 +1348,7 @@ static struct request *get_request(struct request_queue *q, unsigned int op,
 	struct request *rq;
 
 	lockdep_assert_held(q->queue_lock);
+	WARN_ON_ONCE(q->mq_ops);
 
 	rl = blk_get_rl(q, bio);	/* transferred to @rq on success */
 retry:
@@ -1373,6 +1393,8 @@ static struct request *blk_old_get_request(struct request_queue *q,
 {
 	struct request *rq;
 
+	WARN_ON_ONCE(q->mq_ops);
+
 	/* create ioc upfront */
 	create_io_context(gfp_mask, q->node);
 
@@ -1424,6 +1446,7 @@ EXPORT_SYMBOL(blk_get_request);
 void blk_requeue_request(struct request_queue *q, struct request *rq)
 {
 	lockdep_assert_held(q->queue_lock);
+	WARN_ON_ONCE(q->mq_ops);
 
 	blk_delete_timer(rq);
 	blk_clear_rq_complete(rq);
@@ -2495,6 +2518,7 @@ struct request *blk_peek_request(struct request_queue *q)
 	int ret;
 
 	lockdep_assert_held(q->queue_lock);
+	WARN_ON_ONCE(q->mq_ops);
 
 	while ((rq = __elv_next_request(q)) != NULL) {
 
@@ -2615,6 +2639,7 @@ void blk_dequeue_request(struct request *rq)
 void blk_start_request(struct request *req)
 {
 	lockdep_assert_held(req->q->queue_lock);
+	WARN_ON_ONCE(req->q->mq_ops);
 
 	blk_dequeue_request(req);
 
@@ -2646,6 +2671,7 @@ struct request *blk_fetch_request(struct request_queue *q)
 	struct request *rq;
 
 	lockdep_assert_held(q->queue_lock);
+	WARN_ON_ONCE(q->mq_ops);
 
 	rq = blk_peek_request(q);
 	if (rq)
@@ -2797,6 +2823,7 @@ void blk_finish_request(struct request *req, blk_status_t error)
 	struct request_queue *q = req->q;
 
 	lockdep_assert_held(req->q->queue_lock);
+	WARN_ON_ONCE(q->mq_ops);
 
 	if (req->rq_flags & RQF_STATS)
 		blk_stat_add(req);
@@ -2851,6 +2878,8 @@ static bool blk_end_bidi_request(struct request *rq, blk_status_t error,
 	struct request_queue *q = rq->q;
 	unsigned long flags;
 
+	WARN_ON_ONCE(q->mq_ops);
+
 	if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))
 		return true;
 
@@ -2880,6 +2909,7 @@ static bool __blk_end_bidi_request(struct request *rq, blk_status_t error,
 				   unsigned int nr_bytes, unsigned int bidi_bytes)
 {
 	lockdep_assert_held(rq->q->queue_lock);
+	WARN_ON_ONCE(rq->q->mq_ops);
 
 	if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))
 		return true;
@@ -2906,6 +2936,7 @@ static bool __blk_end_bidi_request(struct request *rq, blk_status_t error,
 bool blk_end_request(struct request *rq, blk_status_t error,
 		unsigned int nr_bytes)
 {
+	WARN_ON_ONCE(rq->q->mq_ops);
 	return blk_end_bidi_request(rq, error, nr_bytes, 0);
 }
 EXPORT_SYMBOL(blk_end_request);
@@ -2948,6 +2979,7 @@ bool __blk_end_request(struct request *rq, blk_status_t error,
 		unsigned int nr_bytes)
 {
 	lockdep_assert_held(rq->q->queue_lock);
+	WARN_ON_ONCE(rq->q->mq_ops);
 
 	return __blk_end_bidi_request(rq, error, nr_bytes, 0);
 }
@@ -2967,6 +2999,7 @@ void __blk_end_request_all(struct request *rq, blk_status_t error)
 	unsigned int bidi_bytes = 0;
 
 	lockdep_assert_held(rq->q->queue_lock);
+	WARN_ON_ONCE(rq->q->mq_ops);
 
 	if (unlikely(blk_bidi_rq(rq)))
 		bidi_bytes = blk_rq_bytes(rq->next_rq);
diff --git a/block/blk.h b/block/blk.h
index 83c8e1100525..798691a5e5e9 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -143,6 +143,8 @@ static inline struct request *__elv_next_request(struct request_queue *q)
 	struct request *rq;
 	struct blk_flush_queue *fq = blk_get_flush_queue(q, NULL);
 
+	WARN_ON_ONCE(q->mq_ops);
+
 	while (1) {
 		if (!list_empty(&q->queue_head)) {
 			rq = list_entry_rq(q->queue_head.next);
-- 
cgit v1.2.3


From 7b6078146ccbe9bd165d578586b10ea092ac489e Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Tue, 20 Jun 2017 11:15:47 -0700
Subject: blk-mq: Document locking assumptions

Document the locking assumptions in functions that modify
blk_mq_ctx.rq_list to make it easier for humans to verify
this code.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: Hannes Reinecke <hare@suse.com>
Cc: Omar Sandoval <osandov@fb.com>
Cc: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq-sched.c | 2 ++
 block/blk-mq.c       | 4 ++++
 2 files changed, 6 insertions(+)

(limited to 'block')

diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 9f025289da63..191bf82d185e 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -150,6 +150,8 @@ static bool blk_mq_attempt_merge(struct request_queue *q,
 	struct request *rq;
 	int checked = 8;
 
+	lockdep_assert_held(&ctx->lock);
+
 	list_for_each_entry_reverse(rq, &ctx->rq_list, queuelist) {
 		bool merged = false;
 
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 6268380c680f..1d8050e49a94 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1317,6 +1317,8 @@ static inline void __blk_mq_insert_req_list(struct blk_mq_hw_ctx *hctx,
 {
 	struct blk_mq_ctx *ctx = rq->mq_ctx;
 
+	lockdep_assert_held(&ctx->lock);
+
 	trace_block_rq_insert(hctx->queue, rq);
 
 	if (at_head)
@@ -1330,6 +1332,8 @@ void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
 {
 	struct blk_mq_ctx *ctx = rq->mq_ctx;
 
+	lockdep_assert_held(&ctx->lock);
+
 	__blk_mq_insert_req_list(hctx, rq, at_head);
 	blk_mq_hctx_mark_pending(hctx, ctx);
 }
-- 
cgit v1.2.3


From edf8ff558887364714e115d396b2d06949cf1e07 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Tue, 20 Jun 2017 11:15:48 -0700
Subject: block: Constify disk_type

The variable 'disk_type' is never modified so constify it.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: Hannes Reinecke <hare@suse.com>
Cc: Omar Sandoval <osandov@fb.com>
Cc: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/genhd.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'block')

diff --git a/block/genhd.c b/block/genhd.c
index d252d29fe837..7f520fa25d16 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -36,7 +36,7 @@ struct kobject *block_depr;
 static DEFINE_SPINLOCK(ext_devt_lock);
 static DEFINE_IDR(ext_devt_idr);
 
-static struct device_type disk_type;
+static const struct device_type disk_type;
 
 static void disk_check_events(struct disk_events *ev,
 			      unsigned int *clearing_ptr);
@@ -1183,7 +1183,7 @@ static char *block_devnode(struct device *dev, umode_t *mode,
 	return NULL;
 }
 
-static struct device_type disk_type = {
+static const struct device_type disk_type = {
 	.name		= "disk",
 	.groups		= disk_attr_groups,
 	.release	= disk_release,
-- 
cgit v1.2.3


From 5435c023b92ee1cfb896d924d28c6d31d8386aa0 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Tue, 20 Jun 2017 11:15:49 -0700
Subject: blk-mq: Warn when attempting to run a hardware queue that is not
 mapped

A queue must be frozen while the mapped state of a hardware queue
is changed. Additionally, any change of the mapped state is
followed by a call to blk_mq_map_swqueue() (see also
blk_mq_init_allocated_queue() and blk_mq_update_nr_hw_queues()).
Since blk_mq_map_swqueue() does not map any unmapped hardware
queue onto any software queue, no attempt will be made to run
an unmapped hardware queue. Hence issue a warning upon attempts
to run an unmapped hardware queue.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: Hannes Reinecke <hare@suse.com>
Cc: Omar Sandoval <osandov@fb.com>
Cc: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'block')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 1d8050e49a94..1c4f1f4978c6 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1129,8 +1129,10 @@ static int blk_mq_hctx_next_cpu(struct blk_mq_hw_ctx *hctx)
 static void __blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async,
 					unsigned long msecs)
 {
-	if (unlikely(blk_mq_hctx_stopped(hctx) ||
-		     !blk_mq_hw_queue_mapped(hctx)))
+	if (WARN_ON_ONCE(!blk_mq_hw_queue_mapped(hctx)))
+		return;
+
+	if (unlikely(blk_mq_hctx_stopped(hctx)))
 		return;
 
 	if (!async && !(hctx->flags & BLK_MQ_F_BLOCKING)) {
@@ -1295,7 +1297,7 @@ static void blk_mq_run_work_fn(struct work_struct *work)
 
 void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs)
 {
-	if (unlikely(!blk_mq_hw_queue_mapped(hctx)))
+	if (WARN_ON_ONCE(!blk_mq_hw_queue_mapped(hctx)))
 		return;
 
 	/*
-- 
cgit v1.2.3


From e29387ebd86e903702422a8361fd3e03aca25573 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@wdc.com>
Date: Wed, 21 Jun 2017 09:40:11 -0700
Subject: block: Add fallthrough markers to switch statements

This patch suppresses gcc 7 warnings about falling through in switch
statements when building with W=1. From the gcc documentation: The
-Wimplicit-fallthrough=3 warning is enabled by -Wextra. See also
https://gcc.gnu.org/onlinedocs/gcc-7.1.0/gcc/Warning-Options.html.

Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/badblocks.c | 1 +
 block/elevator.c  | 1 +
 block/ioprio.c    | 3 ++-
 3 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'block')

diff --git a/block/badblocks.c b/block/badblocks.c
index 6ebcef282314..43c71166e1e2 100644
--- a/block/badblocks.c
+++ b/block/badblocks.c
@@ -533,6 +533,7 @@ ssize_t badblocks_store(struct badblocks *bb, const char *page, size_t len,
 	case 3:
 		if (newline != '\n')
 			return -EINVAL;
+		/* fall through */
 	case 2:
 		if (length <= 0)
 			return -EINVAL;
diff --git a/block/elevator.c b/block/elevator.c
index dac99fbfc273..4bb2f0c93fa6 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -681,6 +681,7 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where)
 		 */
 		if (elv_attempt_insert_merge(q, rq))
 			break;
+		/* fall through */
 	case ELEVATOR_INSERT_SORT:
 		BUG_ON(blk_rq_is_passthrough(rq));
 		rq->rq_flags |= RQF_SORTED;
diff --git a/block/ioprio.c b/block/ioprio.c
index 4b120c9cf7e8..6f5d0b6625e3 100644
--- a/block/ioprio.c
+++ b/block/ioprio.c
@@ -75,7 +75,8 @@ SYSCALL_DEFINE3(ioprio_set, int, which, int, who, int, ioprio)
 		case IOPRIO_CLASS_RT:
 			if (!capable(CAP_SYS_ADMIN))
 				return -EPERM;
-			/* fall through, rt has prio field too */
+			/* fall through */
+			/* rt has prio field too */
 		case IOPRIO_CLASS_BE:
 			if (data >= IOPRIO_BE_NR || data < 0)
 				return -EINVAL;
-- 
cgit v1.2.3


From e0fc443a8643d4c9b330a637a1e6a422a44dde2a Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@wdc.com>
Date: Wed, 21 Jun 2017 10:55:45 -0700
Subject: block: Declare local symbols static

Avoid that building with W=1 causes the compiler to complain that
a declaration for bounce_bio_set and bounce_bio_split is missing.

References: commit a8821f3f32be ("block: Improvements to bounce-buffer handling")
Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
Cc: Neil Brown <neilb@suse.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Hannes Reinecke <hare@suse.com>
Cc: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bounce.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'block')

diff --git a/block/bounce.c b/block/bounce.c
index 17d77613c471..916ee9a9a216 100644
--- a/block/bounce.c
+++ b/block/bounce.c
@@ -26,7 +26,7 @@
 #define POOL_SIZE	64
 #define ISA_POOL_SIZE	16
 
-struct bio_set *bounce_bio_set, *bounce_bio_split;
+static struct bio_set *bounce_bio_set, *bounce_bio_split;
 static mempool_t *page_pool, *isa_page_pool;
 
 #if defined(CONFIG_HIGHMEM) || defined(CONFIG_NEED_BOUNCE_POOL)
-- 
cgit v1.2.3


From 34bd9c1c4f62e936d8865e6442f332cd85bdfc95 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@wdc.com>
Date: Wed, 21 Jun 2017 10:55:46 -0700
Subject: block: Fix off-by-one errors in blk_status_to_errno() and
 print_req_error()

This was detected by the smatch static analyzer.

Fixes: commit 2a842acab109 ("block: introduce new block status code type")
Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Hannes Reinecke <hare@suse.com>
Cc: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'block')

diff --git a/block/blk-core.c b/block/blk-core.c
index 2e02314ea331..3c18ea60cb1c 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -169,7 +169,7 @@ int blk_status_to_errno(blk_status_t status)
 {
 	int idx = (__force int)status;
 
-	if (WARN_ON_ONCE(idx > ARRAY_SIZE(blk_errors)))
+	if (WARN_ON_ONCE(idx >= ARRAY_SIZE(blk_errors)))
 		return -EIO;
 	return blk_errors[idx].errno;
 }
@@ -179,7 +179,7 @@ static void print_req_error(struct request *req, blk_status_t status)
 {
 	int idx = (__force int)status;
 
-	if (WARN_ON_ONCE(idx > ARRAY_SIZE(blk_errors)))
+	if (WARN_ON_ONCE(idx >= ARRAY_SIZE(blk_errors)))
 		return;
 
 	printk_ratelimited(KERN_ERR "%s: %s error, dev %s, sector %llu\n",
-- 
cgit v1.2.3


From 852ec80983d682dc08a0573d37eeaa9814c4f6b1 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@wdc.com>
Date: Wed, 21 Jun 2017 10:55:47 -0700
Subject: blk-mq: Make it safe to quiesce and unquiesce from an interrupt
 handler

Since blk_mq_quiesce_queue_nowait() can be called from interrupt
context, make this safe. Since this function is not in the hot
path, uninline it.

Fixes: commit f4560ffe8cec ("blk-mq: use QUEUE_FLAG_QUIESCED to quiesce queue")
Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
Cc: Ming Lei <ming.lei@redhat.com>
Cc: Hannes Reinecke <hare@suse.com>
Cc: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c         | 20 ++++++++++++++++++--
 include/linux/blk-mq.h | 10 +---------
 2 files changed, 19 insertions(+), 11 deletions(-)

(limited to 'block')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 1c4f1f4978c6..2caac30e128a 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -153,6 +153,20 @@ void blk_mq_unfreeze_queue(struct request_queue *q)
 }
 EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue);
 
+/*
+ * FIXME: replace the scsi_internal_device_*block_nowait() calls in the
+ * mpt3sas driver such that this function can be removed.
+ */
+void blk_mq_quiesce_queue_nowait(struct request_queue *q)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(q->queue_lock, flags);
+	queue_flag_set(QUEUE_FLAG_QUIESCED, q);
+	spin_unlock_irqrestore(q->queue_lock, flags);
+}
+EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue_nowait);
+
 /**
  * blk_mq_quiesce_queue() - wait until all ongoing dispatches have finished
  * @q: request queue.
@@ -190,9 +204,11 @@ EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue);
  */
 void blk_mq_unquiesce_queue(struct request_queue *q)
 {
-	spin_lock_irq(q->queue_lock);
+	unsigned long flags;
+
+	spin_lock_irqsave(q->queue_lock, flags);
 	queue_flag_clear(QUEUE_FLAG_QUIESCED, q);
-	spin_unlock_irq(q->queue_lock);
+	spin_unlock_irqrestore(q->queue_lock, flags);
 
 	/* dispatch requests which are inserted during quiescing */
 	blk_mq_run_hw_queues(q, true);
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 366b83cee955..23d32ff0b462 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -266,15 +266,7 @@ int blk_mq_reinit_tagset(struct blk_mq_tag_set *set);
 int blk_mq_map_queues(struct blk_mq_tag_set *set);
 void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues);
 
-/*
- * FIXME: this helper is just for working around mpt3sas.
- */
-static inline void blk_mq_quiesce_queue_nowait(struct request_queue *q)
-{
-	spin_lock_irq(q->queue_lock);
-	queue_flag_set(QUEUE_FLAG_QUIESCED, q);
-	spin_unlock_irq(q->queue_lock);
-}
+void blk_mq_quiesce_queue_nowait(struct request_queue *q);
 
 /*
  * Driver command data is immediately after the request. So subtract request
-- 
cgit v1.2.3


From a9590fe148c03cb4157b56255357419cb4e14124 Mon Sep 17 00:00:00 2001
From: weiping <zhangweiping@didichuxing.com>
Date: Thu, 22 Jun 2017 23:06:56 +0800
Subject: blk-mq: remove double set queue_num

hwctx's queue_num has been set prior call blk_mq_init_hctx, so no need
set it again.

Signed-off-by: weiping <zhangweiping@didichuxing.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'block')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 2caac30e128a..c56b64ae1741 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1910,7 +1910,6 @@ static int blk_mq_init_hctx(struct request_queue *q,
 	spin_lock_init(&hctx->lock);
 	INIT_LIST_HEAD(&hctx->dispatch);
 	hctx->queue = q;
-	hctx->queue_num = hctx_idx;
 	hctx->flags = set->flags & ~BLK_MQ_F_TAG_SHARED;
 
 	cpuhp_state_add_instance_nocalls(CPUHP_BLK_MQ_DEAD, &hctx->cpuhp_dead);
-- 
cgit v1.2.3


From cb6934f8ea1a595902ca37e250e0917d4dd7b2a7 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Tue, 27 Jun 2017 09:22:02 -0600
Subject: block: add support for write hints in a bio

No functional changes in this patch, we just use up some holes
in the bio and request structures to define a write hint that
we psas down the stack.

Ensure that we don't merge requests that have different life time
hints assigned to them, and that we inherit the write hint when
cloning a bio.

Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bio.c               |  2 ++
 block/blk-core.c          |  1 +
 block/blk-merge.c         | 14 ++++++++++++++
 include/linux/blk_types.h |  1 +
 include/linux/blkdev.h    |  2 ++
 5 files changed, 20 insertions(+)

(limited to 'block')

diff --git a/block/bio.c b/block/bio.c
index 89a51bd49ab7..9cf98b29588a 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -596,6 +596,7 @@ void __bio_clone_fast(struct bio *bio, struct bio *bio_src)
 	bio->bi_bdev = bio_src->bi_bdev;
 	bio_set_flag(bio, BIO_CLONED);
 	bio->bi_opf = bio_src->bi_opf;
+	bio->bi_write_hint = bio_src->bi_write_hint;
 	bio->bi_iter = bio_src->bi_iter;
 	bio->bi_io_vec = bio_src->bi_io_vec;
 
@@ -679,6 +680,7 @@ struct bio *bio_clone_bioset(struct bio *bio_src, gfp_t gfp_mask,
 		return NULL;
 	bio->bi_bdev		= bio_src->bi_bdev;
 	bio->bi_opf		= bio_src->bi_opf;
+	bio->bi_write_hint	= bio_src->bi_write_hint;
 	bio->bi_iter.bi_sector	= bio_src->bi_iter.bi_sector;
 	bio->bi_iter.bi_size	= bio_src->bi_iter.bi_size;
 
diff --git a/block/blk-core.c b/block/blk-core.c
index 3c18ea60cb1c..af393d5a9680 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1765,6 +1765,7 @@ void blk_init_request_from_bio(struct request *req, struct bio *bio)
 		req->ioprio = ioc->ioprio;
 	else
 		req->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0);
+	req->write_hint = bio->bi_write_hint;
 	blk_rq_bio_prep(req->q, req, bio);
 }
 EXPORT_SYMBOL_GPL(blk_init_request_from_bio);
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 5df13041b851..99038830fb42 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -672,6 +672,13 @@ static struct request *attempt_merge(struct request_queue *q,
 	    !blk_write_same_mergeable(req->bio, next->bio))
 		return NULL;
 
+	/*
+	 * Don't allow merge of different write hints, or for a hint with
+	 * non-hint IO.
+	 */
+	if (req->write_hint != next->write_hint)
+		return NULL;
+
 	/*
 	 * If we are allowed to merge, then append bio list
 	 * from next to rq and release next. merge_requests_fn
@@ -791,6 +798,13 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
 	    !blk_write_same_mergeable(rq->bio, bio))
 		return false;
 
+	/*
+	 * Don't allow merge of different write hints, or for a hint with
+	 * non-hint IO.
+	 */
+	if (rq->write_hint != bio->bi_write_hint)
+		return false;
+
 	return true;
 }
 
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index e210da6d14b8..d2eb87c84d82 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -56,6 +56,7 @@ struct bio {
 						 */
 	unsigned short		bi_flags;	/* status, etc and bvec pool number */
 	unsigned short		bi_ioprio;
+	unsigned short		bi_write_hint;
 
 	struct bvec_iter	bi_iter;
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index bf2157141d53..0eebd3bcfd85 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -225,6 +225,8 @@ struct request {
 
 	unsigned int extra_len;	/* length of alignment and padding */
 
+	unsigned short write_hint;
+
 	unsigned long deadline;
 	struct list_head timeout_list;
 
-- 
cgit v1.2.3


From f793dfd3f39a3dc50468b06498606b3a906f42f1 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Mon, 26 Jun 2017 08:15:27 -0600
Subject: blk-mq: expose write hints through debugfs

Useful to verify that things are working the way they should.
Reading the file will return number of kb written with each
write hint. Writing the file will reset the statistics. No care
is taken to ensure that we don't race on updates.

Drivers will write to q->write_hints[] if they handle a given
write hint.

Reviewed-by: Andreas Dilger <adilger@dilger.ca>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq-debugfs.c | 24 ++++++++++++++++++++++++
 include/linux/blkdev.h |  3 +++
 2 files changed, 27 insertions(+)

(limited to 'block')

diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 9edebbdce0bd..9ebc2945f991 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -135,6 +135,29 @@ static void print_stat(struct seq_file *m, struct blk_rq_stat *stat)
 	}
 }
 
+static int queue_write_hint_show(void *data, struct seq_file *m)
+{
+	struct request_queue *q = data;
+	int i;
+
+	for (i = 0; i < BLK_MAX_WRITE_HINTS; i++)
+		seq_printf(m, "hint%d: %llu\n", i, q->write_hints[i]);
+
+	return 0;
+}
+
+static ssize_t queue_write_hint_store(void *data, const char __user *buf,
+				      size_t count, loff_t *ppos)
+{
+	struct request_queue *q = data;
+	int i;
+
+	for (i = 0; i < BLK_MAX_WRITE_HINTS; i++)
+		q->write_hints[i] = 0;
+
+	return count;
+}
+
 static int queue_poll_stat_show(void *data, struct seq_file *m)
 {
 	struct request_queue *q = data;
@@ -730,6 +753,7 @@ static const struct blk_mq_debugfs_attr blk_mq_debugfs_queue_attrs[] = {
 	{"poll_stat", 0400, queue_poll_stat_show},
 	{"requeue_list", 0400, .seq_ops = &queue_requeue_list_seq_ops},
 	{"state", 0600, queue_state_show, queue_state_write},
+	{"write_hints", 0600, queue_write_hint_show, queue_write_hint_store},
 	{},
 };
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 0eebd3bcfd85..e1e289ab66b9 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -596,6 +596,9 @@ struct request_queue {
 	void			*rq_alloc_data;
 
 	struct work_struct	release_work;
+
+#define BLK_MAX_WRITE_HINTS	5
+	u64			write_hints[BLK_MAX_WRITE_HINTS];
 };
 
 #define QUEUE_FLAG_QUEUED	1	/* uses generic tag queueing */
-- 
cgit v1.2.3


From caa4b02476e31fc7933d2138062f7f355d3cd8f7 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 27 Jun 2017 12:13:21 -0600
Subject: blk-map: call blk_queue_bounce from blk_rq_append_bio

This makes moves the knowledge about bouncing out of the callers into the
block core (just like we do for the normal I/O path), and allows to unexport
blk_queue_bounce.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-map.c                  | 7 +++----
 block/bounce.c                   | 2 --
 drivers/scsi/osd/osd_initiator.c | 5 +----
 3 files changed, 4 insertions(+), 10 deletions(-)

(limited to 'block')

diff --git a/block/blk-map.c b/block/blk-map.c
index 3b5cb863318f..2547016aa7aa 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -16,6 +16,8 @@
  */
 int blk_rq_append_bio(struct request *rq, struct bio *bio)
 {
+	blk_queue_bounce(rq->q, &bio);
+
 	if (!rq->bio) {
 		blk_rq_bio_prep(rq->q, rq, bio);
 	} else {
@@ -72,15 +74,13 @@ static int __blk_rq_map_user_iov(struct request *rq,
 		map_data->offset += bio->bi_iter.bi_size;
 
 	orig_bio = bio;
-	blk_queue_bounce(q, &bio);
 
 	/*
 	 * We link the bounce buffer in and could have to traverse it
 	 * later so we have to get a ref to prevent it from being freed
 	 */
-	bio_get(bio);
-
 	ret = blk_rq_append_bio(rq, bio);
+	bio_get(bio);
 	if (ret) {
 		bio_endio(bio);
 		__blk_rq_unmap_user(orig_bio);
@@ -249,7 +249,6 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
 		return ret;
 	}
 
-	blk_queue_bounce(q, &rq->bio);
 	return 0;
 }
 EXPORT_SYMBOL(blk_rq_map_kern);
diff --git a/block/bounce.c b/block/bounce.c
index 916ee9a9a216..27c5cc0f1ed5 100644
--- a/block/bounce.c
+++ b/block/bounce.c
@@ -284,5 +284,3 @@ void blk_queue_bounce(struct request_queue *q, struct bio **bio_orig)
 	 */
 	__blk_queue_bounce(q, bio_orig, pool);
 }
-
-EXPORT_SYMBOL(blk_queue_bounce);
diff --git a/drivers/scsi/osd/osd_initiator.c b/drivers/scsi/osd/osd_initiator.c
index ca45bf6d2bdb..a4f28b7e4c65 100644
--- a/drivers/scsi/osd/osd_initiator.c
+++ b/drivers/scsi/osd/osd_initiator.c
@@ -1576,10 +1576,7 @@ static struct request *_make_request(struct request_queue *q, bool has_write,
 		return req;
 
 	for_each_bio(bio) {
-		struct bio *bounce_bio = bio;
-
-		blk_queue_bounce(req->q, &bounce_bio);
-		ret = blk_rq_append_bio(req, bounce_bio);
+		ret = blk_rq_append_bio(req, bio);
 		if (ret)
 			return ERR_PTR(ret);
 	}
-- 
cgit v1.2.3


From 3bce016a4c5975e4279bfb3cbd6d0332b856cc72 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 19 Jun 2017 09:26:21 +0200
Subject: block: move bounce declarations to block/blk.h

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk.h            | 13 +++++++++++++
 block/bounce.c         |  1 +
 include/linux/blkdev.h | 13 -------------
 3 files changed, 14 insertions(+), 13 deletions(-)

(limited to 'block')

diff --git a/block/blk.h b/block/blk.h
index 798691a5e5e9..01ebb8185f6b 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -336,4 +336,17 @@ static inline void blk_throtl_bio_endio(struct bio *bio) { }
 static inline void blk_throtl_stat_add(struct request *rq, u64 time) { }
 #endif
 
+#ifdef CONFIG_BOUNCE
+extern int init_emergency_isa_pool(void);
+extern void blk_queue_bounce(struct request_queue *q, struct bio **bio);
+#else
+static inline int init_emergency_isa_pool(void)
+{
+	return 0;
+}
+static inline void blk_queue_bounce(struct request_queue *q, struct bio **bio)
+{
+}
+#endif /* CONFIG_BOUNCE */
+
 #endif /* BLK_INTERNAL_H */
diff --git a/block/bounce.c b/block/bounce.c
index 27c5cc0f1ed5..36ba44491703 100644
--- a/block/bounce.c
+++ b/block/bounce.c
@@ -22,6 +22,7 @@
 #include <asm/tlbflush.h>
 
 #include <trace/events/block.h>
+#include "blk.h"
 
 #define POOL_SIZE	64
 #define ISA_POOL_SIZE	16
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index e1e289ab66b9..e7eef48c97c9 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -884,19 +884,6 @@ extern unsigned long blk_max_low_pfn, blk_max_pfn;
 #define BLK_DEFAULT_SG_TIMEOUT	(60 * HZ)
 #define BLK_MIN_SG_TIMEOUT	(7 * HZ)
 
-#ifdef CONFIG_BOUNCE
-extern int init_emergency_isa_pool(void);
-extern void blk_queue_bounce(struct request_queue *q, struct bio **bio);
-#else
-static inline int init_emergency_isa_pool(void)
-{
-	return 0;
-}
-static inline void blk_queue_bounce(struct request_queue *q, struct bio **bio)
-{
-}
-#endif /* CONFIG_MMU */
-
 struct rq_map_data {
 	struct page **pages;
 	int page_order;
-- 
cgit v1.2.3


From 1c4bc3ab9a064d98cdf6de6b44f89d5c3757fa32 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 19 Jun 2017 09:26:22 +0200
Subject: block: remove the queue_bounce_pfn helper

Only used inside the bounce code, and opencoding it makes it more obvious
what is going on.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bounce.c         | 6 +++---
 include/linux/blkdev.h | 5 -----
 2 files changed, 3 insertions(+), 8 deletions(-)

(limited to 'block')

diff --git a/block/bounce.c b/block/bounce.c
index 36ba44491703..5793c2dc1a15 100644
--- a/block/bounce.c
+++ b/block/bounce.c
@@ -203,7 +203,7 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
 	bio_for_each_segment(from, *bio_orig, iter) {
 		if (i++ < BIO_MAX_PAGES)
 			sectors += from.bv_len >> 9;
-		if (page_to_pfn(from.bv_page) > queue_bounce_pfn(q))
+		if (page_to_pfn(from.bv_page) > q->limits.bounce_pfn)
 			bounce = true;
 	}
 	if (!bounce)
@@ -220,7 +220,7 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
 	bio_for_each_segment_all(to, bio, i) {
 		struct page *page = to->bv_page;
 
-		if (page_to_pfn(page) <= queue_bounce_pfn(q))
+		if (page_to_pfn(page) <= q->limits.bounce_pfn)
 			continue;
 
 		to->bv_page = mempool_alloc(pool, q->bounce_gfp);
@@ -272,7 +272,7 @@ void blk_queue_bounce(struct request_queue *q, struct bio **bio_orig)
 	 * don't waste time iterating over bio segments
 	 */
 	if (!(q->bounce_gfp & GFP_DMA)) {
-		if (queue_bounce_pfn(q) >= blk_max_pfn)
+		if (q->limits.bounce_pfn >= blk_max_pfn)
 			return;
 		pool = page_pool;
 	} else {
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index e7eef48c97c9..25f6a0cb27d3 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1385,11 +1385,6 @@ enum blk_default_limits {
 
 #define blkdev_entry_to_request(entry) list_entry((entry), struct request, queuelist)
 
-static inline unsigned long queue_bounce_pfn(struct request_queue *q)
-{
-	return q->limits.bounce_pfn;
-}
-
 static inline unsigned long queue_segment_boundary(struct request_queue *q)
 {
 	return q->limits.seg_boundary_mask;
-- 
cgit v1.2.3


From 0b0bcacc3b4300c4bba0bacb4c7a279b2728f331 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 19 Jun 2017 09:26:23 +0200
Subject: block: don't bother with bounce limits for make_request drivers

We only call blk_queue_bounce for request-based drivers, so stop messing
with it for make_request based drivers.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c               | 5 +++++
 block/blk-mq.c                 | 5 +++++
 block/blk-settings.c           | 5 -----
 drivers/block/brd.c            | 1 -
 drivers/block/drbd/drbd_main.c | 1 -
 drivers/block/rsxx/dev.c       | 1 -
 drivers/nvdimm/blk.c           | 1 -
 drivers/nvdimm/btt.c           | 1 -
 drivers/nvdimm/pmem.c          | 1 -
 9 files changed, 10 insertions(+), 11 deletions(-)

(limited to 'block')

diff --git a/block/blk-core.c b/block/blk-core.c
index af393d5a9680..8699c423fa6e 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -989,6 +989,11 @@ int blk_init_allocated_queue(struct request_queue *q)
 	 */
 	blk_queue_make_request(q, blk_queue_bio);
 
+	/*
+	 * by default assume old behaviour and bounce for any highmem page
+	 */
+	blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
+
 	q->sg_reserved_size = INT_MAX;
 
 	/* Protect q->elevator from elevator_change */
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 05dfa3f270ae..41e3aeb51c9a 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2349,6 +2349,11 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
 
 	blk_queue_make_request(q, blk_mq_make_request);
 
+	/*
+	 * by default assume old behaviour and bounce for any highmem page
+	 */
+	blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
+
 	/*
 	 * Do this after blk_queue_make_request() overrides it...
 	 */
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 4fa81ed383ca..be1f115b538b 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -172,11 +172,6 @@ void blk_queue_make_request(struct request_queue *q, make_request_fn *mfn)
 	q->nr_batching = BLK_BATCH_REQ;
 
 	blk_set_default_limits(&q->limits);
-
-	/*
-	 * by default assume old behaviour and bounce for any highmem page
-	 */
-	blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
 }
 EXPORT_SYMBOL(blk_queue_make_request);
 
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 57b574f2f66a..6112e99bedf7 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -418,7 +418,6 @@ static struct brd_device *brd_alloc(int i)
 
 	blk_queue_make_request(brd->brd_queue, brd_make_request);
 	blk_queue_max_hw_sectors(brd->brd_queue, 1024);
-	blk_queue_bounce_limit(brd->brd_queue, BLK_BOUNCE_ANY);
 
 	/* This is so fdisk will align partitions on 4k, because of
 	 * direct_access API needing 4k alignment, returning a PFN
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 90680034ef57..5fb99e06ebe4 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -2850,7 +2850,6 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
 	/* Setting the max_hw_sectors to an odd value of 8kibyte here
 	   This triggers a max_bio_size message upon first attach or connect */
 	blk_queue_max_hw_sectors(q, DRBD_MAX_BIO_SIZE_SAFE >> 8);
-	blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
 	q->queue_lock = &resource->req_lock;
 
 	device->md_io.page = alloc_page(GFP_KERNEL);
diff --git a/drivers/block/rsxx/dev.c b/drivers/block/rsxx/dev.c
index 4e8bdfa0aa31..7f4acebf4657 100644
--- a/drivers/block/rsxx/dev.c
+++ b/drivers/block/rsxx/dev.c
@@ -284,7 +284,6 @@ int rsxx_setup_dev(struct rsxx_cardinfo *card)
 	}
 
 	blk_queue_make_request(card->queue, rsxx_make_request);
-	blk_queue_bounce_limit(card->queue, BLK_BOUNCE_ANY);
 	blk_queue_max_hw_sectors(card->queue, blkdev_max_hw_sectors);
 	blk_queue_physical_block_size(card->queue, RSXX_HW_BLK_SIZE);
 
diff --git a/drivers/nvdimm/blk.c b/drivers/nvdimm/blk.c
index 79eb9fb358d5..f12d23c49771 100644
--- a/drivers/nvdimm/blk.c
+++ b/drivers/nvdimm/blk.c
@@ -273,7 +273,6 @@ static int nsblk_attach_disk(struct nd_namespace_blk *nsblk)
 
 	blk_queue_make_request(q, nd_blk_make_request);
 	blk_queue_max_hw_sectors(q, UINT_MAX);
-	blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
 	blk_queue_logical_block_size(q, nsblk_sector_size(nsblk));
 	queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q);
 	q->queuedata = nsblk;
diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c
index 31b2d14e210d..b6ba0618ea46 100644
--- a/drivers/nvdimm/btt.c
+++ b/drivers/nvdimm/btt.c
@@ -1297,7 +1297,6 @@ static int btt_blk_init(struct btt *btt)
 	blk_queue_make_request(btt->btt_queue, btt_make_request);
 	blk_queue_logical_block_size(btt->btt_queue, btt->sector_size);
 	blk_queue_max_hw_sectors(btt->btt_queue, UINT_MAX);
-	blk_queue_bounce_limit(btt->btt_queue, BLK_BOUNCE_ANY);
 	queue_flag_set_unlocked(QUEUE_FLAG_NONROT, btt->btt_queue);
 	btt->btt_queue->queuedata = btt;
 
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 7bd383aeea14..6b577afb1d44 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -343,7 +343,6 @@ static int pmem_attach_disk(struct device *dev,
 	blk_queue_make_request(q, pmem_make_request);
 	blk_queue_physical_block_size(q, PAGE_SIZE);
 	blk_queue_max_hw_sectors(q, UINT_MAX);
-	blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
 	queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q);
 	queue_flag_set_unlocked(QUEUE_FLAG_DAX, q);
 	q->queuedata = pmem;
-- 
cgit v1.2.3


From 46685d1a9521054fa3a7a352f6bb54166cd5b2c5 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 19 Jun 2017 09:26:24 +0200
Subject: blk-mq: don't bounce by default

For historical reasons we default to bouncing highmem pages for all block
queues.  But the blk-mq drivers are easy to audit to ensure that we don't
need this - scsi and mtip32xx set explicit limits and everyone else doesn't
have any particular ones.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c               | 5 -----
 drivers/block/virtio_blk.c   | 3 ---
 drivers/block/xen-blkfront.c | 3 ---
 3 files changed, 11 deletions(-)

(limited to 'block')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 41e3aeb51c9a..05dfa3f270ae 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2349,11 +2349,6 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
 
 	blk_queue_make_request(q, blk_mq_make_request);
 
-	/*
-	 * by default assume old behaviour and bounce for any highmem page
-	 */
-	blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
-
 	/*
 	 * Do this after blk_queue_make_request() overrides it...
 	 */
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index e59bd4549a8a..0297ad7c1452 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -720,9 +720,6 @@ static int virtblk_probe(struct virtio_device *vdev)
 	/* We can handle whatever the host told us to handle. */
 	blk_queue_max_segments(q, vblk->sg_elems-2);
 
-	/* No need to bounce any requests */
-	blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
-
 	/* No real sector limit. */
 	blk_queue_max_hw_sectors(q, -1U);
 
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index ac90093fcb25..c852ed3c01d5 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -953,9 +953,6 @@ static void blkif_set_queue_limits(struct blkfront_info *info)
 
 	/* Make sure buffer addresses are sector-aligned. */
 	blk_queue_dma_alignment(rq, 511);
-
-	/* Make sure we don't use bounce buffers. */
-	blk_queue_bounce_limit(rq, BLK_BOUNCE_ANY);
 }
 
 static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size,
-- 
cgit v1.2.3


From 0bf6595ec81d896451f0066c95f8cd34c74313dc Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 19 Jun 2017 09:26:25 +0200
Subject: block: don't set bounce limit in blk_init_allocated_queue

And just move it into scsi_transport_sas which needs it due to low-level
drivers directly derferencing bio_data, and into blk_init_queue_node,
which will need a further push into the callers.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c                  | 10 +++++-----
 drivers/scsi/scsi_transport_sas.c |  5 +++++
 2 files changed, 10 insertions(+), 5 deletions(-)

(limited to 'block')

diff --git a/block/blk-core.c b/block/blk-core.c
index 8699c423fa6e..33b27541dc17 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -960,6 +960,11 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
 		return NULL;
 	}
 
+	/*
+	 * by default assume old behaviour and bounce for any highmem page
+	 */
+	blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
+
 	return q;
 }
 EXPORT_SYMBOL(blk_init_queue_node);
@@ -989,11 +994,6 @@ int blk_init_allocated_queue(struct request_queue *q)
 	 */
 	blk_queue_make_request(q, blk_queue_bio);
 
-	/*
-	 * by default assume old behaviour and bounce for any highmem page
-	 */
-	blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
-
 	q->sg_reserved_size = INT_MAX;
 
 	/* Protect q->elevator from elevator_change */
diff --git a/drivers/scsi/scsi_transport_sas.c b/drivers/scsi/scsi_transport_sas.c
index a190c052cd93..5006a656e16a 100644
--- a/drivers/scsi/scsi_transport_sas.c
+++ b/drivers/scsi/scsi_transport_sas.c
@@ -251,6 +251,11 @@ static int sas_bsg_initialize(struct Scsi_Host *shost, struct sas_rphy *rphy)
 	if (error)
 		goto out_cleanup_queue;
 
+	/*
+	 * by default assume old behaviour and bounce for any highmem page
+	 */
+	blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
+
 	error = bsg_register_queue(q, dev, name, release);
 	if (error)
 		goto out_cleanup_queue;
-- 
cgit v1.2.3


From 8fc450443e3c489af41a3c9c85b32d38625f2c2a Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 19 Jun 2017 09:26:26 +0200
Subject: block: don't set bounce limit in blk_init_queue

Instead move it to the callers.  Those that either don't use bio_data() or
page_address() or are specific to architectures that do not support highmem
are skipped.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c            | 5 -----
 drivers/block/aoe/aoeblk.c  | 1 +
 drivers/block/floppy.c      | 1 +
 drivers/block/paride/pcd.c  | 1 +
 drivers/block/paride/pd.c   | 1 +
 drivers/block/paride/pf.c   | 1 +
 drivers/block/skd_main.c    | 1 +
 drivers/block/swim.c        | 2 ++
 drivers/block/swim3.c       | 1 +
 drivers/block/xsysace.c     | 1 +
 drivers/cdrom/gdrom.c       | 1 +
 drivers/mtd/mtd_blkdevs.c   | 1 +
 drivers/sbus/char/jsflash.c | 1 +
 13 files changed, 13 insertions(+), 5 deletions(-)

(limited to 'block')

diff --git a/block/blk-core.c b/block/blk-core.c
index 33b27541dc17..af393d5a9680 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -960,11 +960,6 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
 		return NULL;
 	}
 
-	/*
-	 * by default assume old behaviour and bounce for any highmem page
-	 */
-	blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
-
 	return q;
 }
 EXPORT_SYMBOL(blk_init_queue_node);
diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
index 027b876370bc..6797e6c23c8a 100644
--- a/drivers/block/aoe/aoeblk.c
+++ b/drivers/block/aoe/aoeblk.c
@@ -388,6 +388,7 @@ aoeblk_gdalloc(void *vp)
 			d->aoemajor, d->aoeminor);
 		goto err_mempool;
 	}
+	blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
 
 	spin_lock_irqsave(&d->lock, flags);
 	WARN_ON(!(d->flags & DEVFL_GD_NOW));
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 9e3cb32e365d..ce823647a9c4 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -4203,6 +4203,7 @@ static int __init do_floppy_init(void)
 			goto out_put_disk;
 		}
 
+		blk_queue_bounce_limit(disks[drive]->queue, BLK_BOUNCE_HIGH);
 		blk_queue_max_hw_sectors(disks[drive]->queue, 64);
 		disks[drive]->major = FLOPPY_MAJOR;
 		disks[drive]->first_minor = TOMINOR(drive);
diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c
index cffe42d80ce9..7b8c6368beb7 100644
--- a/drivers/block/paride/pcd.c
+++ b/drivers/block/paride/pcd.c
@@ -305,6 +305,7 @@ static void pcd_init_units(void)
 			put_disk(disk);
 			continue;
 		}
+		blk_queue_bounce_limit(disk->queue, BLK_BOUNCE_HIGH);
 		cd->disk = disk;
 		cd->pi = &cd->pia;
 		cd->present = 0;
diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c
index c98983be4f9c..27a44b97393a 100644
--- a/drivers/block/paride/pd.c
+++ b/drivers/block/paride/pd.c
@@ -863,6 +863,7 @@ static void pd_probe_drive(struct pd_unit *disk)
 		return;
 	}
 	blk_queue_max_hw_sectors(p->queue, cluster);
+	blk_queue_bounce_limit(p->queue, BLK_BOUNCE_HIGH);
 
 	if (disk->drive == -1) {
 		for (disk->drive = 0; disk->drive <= 1; disk->drive++)
diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c
index 5f46da8d05cd..eef7a91f667d 100644
--- a/drivers/block/paride/pf.c
+++ b/drivers/block/paride/pf.c
@@ -293,6 +293,7 @@ static void __init pf_init_units(void)
 			return;
 		}
 		blk_queue_max_segments(disk->queue, cluster);
+		blk_queue_bounce_limit(disk->queue, BLK_BOUNCE_HIGH);
 		pf->disk = disk;
 		pf->pi = &pf->pia;
 		pf->media_status = PF_NM;
diff --git a/drivers/block/skd_main.c b/drivers/block/skd_main.c
index e6c526861703..d0368682bd43 100644
--- a/drivers/block/skd_main.c
+++ b/drivers/block/skd_main.c
@@ -4273,6 +4273,7 @@ static int skd_cons_disk(struct skd_device *skdev)
 		rc = -ENOMEM;
 		goto err_out;
 	}
+	blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
 
 	skdev->queue = q;
 	disk->queue = q;
diff --git a/drivers/block/swim.c b/drivers/block/swim.c
index 1633aaf24060..84434d3ea19b 100644
--- a/drivers/block/swim.c
+++ b/drivers/block/swim.c
@@ -864,6 +864,8 @@ static int swim_floppy_init(struct swim_priv *swd)
 			put_disk(swd->unit[drive].disk);
 			goto exit_put_disks;
 		}
+		blk_queue_bounce_limit(swd->unit[drive].disk->queue,
+				BLK_BOUNCE_HIGH);
 		swd->unit[drive].disk->queue->queuedata = swd;
 		swd->unit[drive].swd = swd;
 	}
diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c
index e3399a138335..9f931f8f6b4c 100644
--- a/drivers/block/swim3.c
+++ b/drivers/block/swim3.c
@@ -1223,6 +1223,7 @@ static int swim3_attach(struct macio_dev *mdev,
 		put_disk(disk);
 		return -ENOMEM;
 	}
+	blk_queue_bounce_limit(disk->queue, BLK_BOUNCE_HIGH);
 	disk->queue->queuedata = &floppy_states[index];
 
 	if (index == 0) {
diff --git a/drivers/block/xsysace.c b/drivers/block/xsysace.c
index 977fdf066017..14459d66ef0c 100644
--- a/drivers/block/xsysace.c
+++ b/drivers/block/xsysace.c
@@ -993,6 +993,7 @@ static int ace_setup(struct ace_device *ace)
 	if (ace->queue == NULL)
 		goto err_blk_initq;
 	blk_queue_logical_block_size(ace->queue, 512);
+	blk_queue_bounce_limit(ace->queue, BLK_BOUNCE_HIGH);
 
 	/*
 	 * Allocate and initialize GD structure
diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c
index 53f8278e66f7..6495b03f576c 100644
--- a/drivers/cdrom/gdrom.c
+++ b/drivers/cdrom/gdrom.c
@@ -813,6 +813,7 @@ static int probe_gdrom(struct platform_device *devptr)
 		err = -ENOMEM;
 		goto probe_fail_requestq;
 	}
+	blk_queue_bounce_limit(gd.gdrom_rq, BLK_BOUNCE_HIGH);
 
 	err = probe_gdrom_setupqueue();
 	if (err)
diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c
index 91c17fba7659..f336a9b85576 100644
--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c
@@ -417,6 +417,7 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new)
 	new->rq->queuedata = new;
 	blk_queue_logical_block_size(new->rq, tr->blksize);
 
+	blk_queue_bounce_limit(new->rq, BLK_BOUNCE_HIGH);
 	queue_flag_set_unlocked(QUEUE_FLAG_NONROT, new->rq);
 	queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, new->rq);
 
diff --git a/drivers/sbus/char/jsflash.c b/drivers/sbus/char/jsflash.c
index 35a69949f92d..14f377ac1280 100644
--- a/drivers/sbus/char/jsflash.c
+++ b/drivers/sbus/char/jsflash.c
@@ -592,6 +592,7 @@ static int jsfd_init(void)
 			put_disk(disk);
 			goto out;
 		}
+		blk_queue_bounce_limit(disk->queue, BLK_BOUNCE_HIGH);
 		jsfd_disk[i] = disk;
 	}
 
-- 
cgit v1.2.3


From 13c931bd9a82d1a88e21f5904c5cdb0261b9d53c Mon Sep 17 00:00:00 2001
From: Paolo Valente <paolo.valente@linaro.org>
Date: Tue, 27 Jun 2017 12:30:47 -0600
Subject: block, bfq: update wr_busy_queues if needed on a queue split

This commit fixes a bug triggered by a non-trivial sequence of
events. These events are briefly described in the next two
paragraphs. The impatiens, or those who are familiar with queue
merging and splitting, can jump directly to the last paragraph.

On each I/O-request arrival for a shared bfq_queue, i.e., for a
bfq_queue that is the result of the merge of two or more bfq_queues,
BFQ checks whether the shared bfq_queue has become seeky (i.e., if too
many random I/O requests have arrived for the bfq_queue; if the device
is non rotational, then random requests must be also small for the
bfq_queue to be tagged as seeky). If the shared bfq_queue is actually
detected as seeky, then a split occurs: the bfq I/O context of the
process that has issued the request is redirected from the shared
bfq_queue to a new non-shared bfq_queue. As a degenerate case, if the
shared bfq_queue actually happens to be shared only by one process
(because of previous splits), then no new bfq_queue is created: the
state of the shared bfq_queue is just changed from shared to non
shared.

Regardless of whether a brand new non-shared bfq_queue is created, or
the pre-existing shared bfq_queue is just turned into a non-shared
bfq_queue, several parameters of the non-shared bfq_queue are set
(restored) to the original values they had when the bfq_queue
associated with the bfq I/O context of the process (that has just
issued an I/O request) was merged with the shared bfq_queue. One of
these parameters is the weight-raising state.

If, on the split of a shared bfq_queue,
1) a pre-existing shared bfq_queue is turned into a non-shared
bfq_queue;
2) the previously shared bfq_queue happens to be busy;
3) the weight-raising state of the previously shared bfq_queue happens
to change;
the number of weight-raised busy queues changes. The field
wr_busy_queues must then be updated accordingly, but such an update
was missing. This commit adds the missing update.

Reported-by: Luca Miccio <lucmiccio@gmail.com>
Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bfq-iosched.c | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

(limited to 'block')

diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 60d32700f104..12bbc6b8657d 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -725,8 +725,12 @@ static void bfq_updated_next_req(struct bfq_data *bfqd,
 }
 
 static void
-bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_io_cq *bic)
+bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_data *bfqd,
+		      struct bfq_io_cq *bic, bool bfq_already_existing)
 {
+	unsigned int old_wr_coeff = bfqq->wr_coeff;
+	bool busy = bfq_already_existing && bfq_bfqq_busy(bfqq);
+
 	if (bic->saved_idle_window)
 		bfq_mark_bfqq_idle_window(bfqq);
 	else
@@ -754,6 +758,14 @@ bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_io_cq *bic)
 
 	/* make sure weight will be updated, however we got here */
 	bfqq->entity.prio_changed = 1;
+
+	if (likely(!busy))
+		return;
+
+	if (old_wr_coeff == 1 && bfqq->wr_coeff > 1)
+		bfqd->wr_busy_queues++;
+	else if (old_wr_coeff > 1 && bfqq->wr_coeff == 1)
+		bfqd->wr_busy_queues--;
 }
 
 static int bfqq_process_refs(struct bfq_queue *bfqq)
@@ -4408,7 +4420,7 @@ static void bfq_prepare_request(struct request *rq, struct bio *bio)
 	const int is_sync = rq_is_sync(rq);
 	struct bfq_queue *bfqq;
 	bool new_queue = false;
-	bool split = false;
+	bool bfqq_already_existing = false, split = false;
 
 	if (!rq->elv.icq)
 		return;
@@ -4439,6 +4451,8 @@ static void bfq_prepare_request(struct request *rq, struct bio *bio)
 				bfqq = bfq_get_bfqq_handle_split(bfqd, bic, bio,
 								 true, is_sync,
 								 NULL);
+			else
+				bfqq_already_existing = true;
 		}
 	}
 
@@ -4464,7 +4478,8 @@ static void bfq_prepare_request(struct request *rq, struct bio *bio)
 			 * queue: restore the idle window and the
 			 * possible weight raising period.
 			 */
-			bfq_bfqq_resume_state(bfqq, bic);
+			bfq_bfqq_resume_state(bfqq, bfqd, bic,
+					      bfqq_already_existing);
 		}
 	}
 
-- 
cgit v1.2.3


From fe631457ff3e19e7bb28f4ad65c65726203fdb64 Mon Sep 17 00:00:00 2001
From: Max Gurtovoy <maxg@mellanox.com>
Date: Thu, 29 Jun 2017 08:40:11 -0600
Subject: blk-mq: map all HWQ also in hyperthreaded system

This patch performs sequential mapping between CPUs and queues.
In case the system has more CPUs than HWQs then there are still
CPUs to map to HWQs. In hyperthreaded system, map the unmapped CPUs
and their siblings to the same HWQ.
This actually fixes a bug that found unmapped HWQs in a system with
2 sockets, 18 cores per socket, 2 threads per core (total 72 CPUs)
running NVMEoF (opens upto maximum of 64 HWQs).

Performance results running fio (72 jobs, 128 iodepth)
using null_blk (w/w.o patch):

bs      IOPS(read submit_queues=72)   IOPS(write submit_queues=72)   IOPS(read submit_queues=24)  IOPS(write submit_queues=24)
-----  ----------------------------  ------------------------------ ---------------------------- -----------------------------
512    4890.4K/4723.5K                 4524.7K/4324.2K                   4280.2K/4264.3K               3902.4K/3909.5K
1k     4910.1K/4715.2K                 4535.8K/4309.6K                   4296.7K/4269.1K               3906.8K/3914.9K
2k     4906.3K/4739.7K                 4526.7K/4330.6K                   4301.1K/4262.4K               3890.8K/3900.1K
4k     4918.6K/4730.7K                 4556.1K/4343.6K                   4297.6K/4264.5K               3886.9K/3893.9K
8k     4906.4K/4748.9K                 4550.9K/4346.7K                   4283.2K/4268.8K               3863.4K/3858.2K
16k    4903.8K/4782.6K                 4501.5K/4233.9K                   4292.3K/4282.3K               3773.1K/3773.5K
32k    4885.8K/4782.4K                 4365.9K/4184.2K                   4307.5K/4289.4K               3780.3K/3687.3K
64k    4822.5K/4762.7K                 2752.8K/2675.1K                   4308.8K/4312.3K               2651.5K/2655.7K
128k   2388.5K/2313.8K                 1391.9K/1375.7K                   2142.8K/2152.2K               1395.5K/1374.2K

Signed-off-by: Max Gurtovoy <maxg@mellanox.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq-cpumap.c | 68 +++++++++++++++++----------------------------------
 1 file changed, 22 insertions(+), 46 deletions(-)

(limited to 'block')

diff --git a/block/blk-mq-cpumap.c b/block/blk-mq-cpumap.c
index 8e61e8640e17..2cca4fc43f45 100644
--- a/block/blk-mq-cpumap.c
+++ b/block/blk-mq-cpumap.c
@@ -14,10 +14,15 @@
 #include "blk.h"
 #include "blk-mq.h"
 
-static int cpu_to_queue_index(unsigned int nr_cpus, unsigned int nr_queues,
-			      const int cpu)
+static int cpu_to_queue_index(unsigned int nr_queues, const int cpu,
+			      const struct cpumask *online_mask)
 {
-	return cpu * nr_queues / nr_cpus;
+	/*
+	 * Non online CPU will be mapped to queue index 0.
+	 */
+	if (!cpumask_test_cpu(cpu, online_mask))
+		return 0;
+	return cpu % nr_queues;
 }
 
 static int get_first_sibling(unsigned int cpu)
@@ -36,55 +41,26 @@ int blk_mq_map_queues(struct blk_mq_tag_set *set)
 	unsigned int *map = set->mq_map;
 	unsigned int nr_queues = set->nr_hw_queues;
 	const struct cpumask *online_mask = cpu_online_mask;
-	unsigned int i, nr_cpus, nr_uniq_cpus, queue, first_sibling;
-	cpumask_var_t cpus;
-
-	if (!alloc_cpumask_var(&cpus, GFP_ATOMIC))
-		return -ENOMEM;
-
-	cpumask_clear(cpus);
-	nr_cpus = nr_uniq_cpus = 0;
-	for_each_cpu(i, online_mask) {
-		nr_cpus++;
-		first_sibling = get_first_sibling(i);
-		if (!cpumask_test_cpu(first_sibling, cpus))
-			nr_uniq_cpus++;
-		cpumask_set_cpu(i, cpus);
-	}
-
-	queue = 0;
-	for_each_possible_cpu(i) {
-		if (!cpumask_test_cpu(i, online_mask)) {
-			map[i] = 0;
-			continue;
-		}
+	unsigned int cpu, first_sibling;
 
+	for_each_possible_cpu(cpu) {
 		/*
-		 * Easy case - we have equal or more hardware queues. Or
-		 * there are no thread siblings to take into account. Do
-		 * 1:1 if enough, or sequential mapping if less.
+		 * First do sequential mapping between CPUs and queues.
+		 * In case we still have CPUs to map, and we have some number of
+		 * threads per cores then map sibling threads to the same queue for
+		 * performace optimizations.
 		 */
-		if (nr_queues >= nr_cpus || nr_cpus == nr_uniq_cpus) {
-			map[i] = cpu_to_queue_index(nr_cpus, nr_queues, queue);
-			queue++;
-			continue;
+		if (cpu < nr_queues) {
+			map[cpu] = cpu_to_queue_index(nr_queues, cpu, online_mask);
+		} else {
+			first_sibling = get_first_sibling(cpu);
+			if (first_sibling == cpu)
+				map[cpu] = cpu_to_queue_index(nr_queues, cpu, online_mask);
+			else
+				map[cpu] = map[first_sibling];
 		}
-
-		/*
-		 * Less then nr_cpus queues, and we have some number of
-		 * threads per cores. Map sibling threads to the same
-		 * queue.
-		 */
-		first_sibling = get_first_sibling(i);
-		if (first_sibling == i) {
-			map[i] = cpu_to_queue_index(nr_uniq_cpus, nr_queues,
-							queue);
-			queue++;
-		} else
-			map[i] = map[first_sibling];
 	}
 
-	free_cpumask_var(cpus);
 	return 0;
 }
 EXPORT_SYMBOL_GPL(blk_mq_map_queues);
-- 
cgit v1.2.3