From 7aef2e780b13973ea60aed8c556107dabde6a495 Mon Sep 17 00:00:00 2001 From: Jianpeng Ma Date: Wed, 11 Sep 2013 13:21:07 -0600 Subject: [PATCH 1/7] block: trace all devices plug operation In func blk_queue_bio, if list of plug is empty,it will call blk_trace_plug. If process deal with a single device,it't ok.But if process deal with multi devices,it only trace the first device. Using request_count to judge, it can soleve this problem. In addition, i modify the comment. Signed-off-by: Jianpeng Ma Signed-off-by: Jens Axboe --- block/blk-core.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 93a18d1d3da8..91037f74668e 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1549,11 +1549,9 @@ get_rq: if (plug) { /* * If this is the first request added after a plug, fire - * of a plug trace. If others have been added before, check - * if we have multiple devices in this plug. If so, make a - * note to sort the list before dispatch. + * of a plug trace. */ - if (list_empty(&plug->list)) + if (!request_count) trace_block_plug(q); else { if (request_count >= BLK_MAX_REQUEST_COUNT) { From c1b511eb211a6c72d66f7755d2b30a9a91ef9423 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Thu, 29 Aug 2013 15:21:42 -0700 Subject: [PATCH 2/7] block: Convert kmalloc_node(...GFP_ZERO...) to kzalloc_node(...) Use the helper function instead of __GFP_ZERO. Signed-off-by: Joe Perches Signed-off-by: Jens Axboe --- block/cfq-iosched.c | 2 +- block/deadline-iosched.c | 2 +- block/elevator.c | 2 +- block/genhd.c | 3 +-- 4 files changed, 4 insertions(+), 5 deletions(-) diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index d5bbdcfd0dab..f0468e252ee4 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -4358,7 +4358,7 @@ static int cfq_init_queue(struct request_queue *q, struct elevator_type *e) if (!eq) return -ENOMEM; - cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL | __GFP_ZERO, q->node); + cfqd = kzalloc_node(sizeof(*cfqd), GFP_KERNEL, q->node); if (!cfqd) { kobject_put(&eq->kobj); return -ENOMEM; diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c index 20614a332362..9ef66406c625 100644 --- a/block/deadline-iosched.c +++ b/block/deadline-iosched.c @@ -346,7 +346,7 @@ static int deadline_init_queue(struct request_queue *q, struct elevator_type *e) if (!eq) return -ENOMEM; - dd = kmalloc_node(sizeof(*dd), GFP_KERNEL | __GFP_ZERO, q->node); + dd = kzalloc_node(sizeof(*dd), GFP_KERNEL, q->node); if (!dd) { kobject_put(&eq->kobj); return -ENOMEM; diff --git a/block/elevator.c b/block/elevator.c index 668394d18588..2bcbd8cc14d4 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -155,7 +155,7 @@ struct elevator_queue *elevator_alloc(struct request_queue *q, { struct elevator_queue *eq; - eq = kmalloc_node(sizeof(*eq), GFP_KERNEL | __GFP_ZERO, q->node); + eq = kzalloc_node(sizeof(*eq), GFP_KERNEL, q->node); if (unlikely(!eq)) goto err; diff --git a/block/genhd.c b/block/genhd.c index dadf42b454a3..791f41943132 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -1252,8 +1252,7 @@ struct gendisk *alloc_disk_node(int minors, int node_id) { struct gendisk *disk; - disk = kmalloc_node(sizeof(struct gendisk), - GFP_KERNEL | __GFP_ZERO, node_id); + disk = kzalloc_node(sizeof(struct gendisk), GFP_KERNEL, node_id); if (disk) { if (!init_part_stats(&disk->part0)) { kfree(disk); From 577cee1e8db6b98b51506e956264b84553426e65 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 28 Aug 2013 14:26:50 -0400 Subject: [PATCH 3/7] blkcg: relocate root_blkg setting and clearing Hello, Jens. The original thread can be read from http://thread.gmane.org/gmane.linux.kernel.cgroups/8937 While it leads to oops, given that it only triggers under specific configurations which aren't common. I don't think it's necessary to backport it through -stable and merging it during the coming merge window should be enough. Thanks! ----- 8< ----- Currently, q->root_blkg and q->root_rl.blkg are set from blkcg_activate_policy() and cleared from blkg_destroy_all(). This doesn't necessarily coincide with the lifetime of the root blkcg_gq leading to the following oops when blkcg is enabled but no policy is activated because __blk_queue_next_rl() malfunctions expecting the root_blkg pointers to be set. BUG: unable to handle kernel NULL pointer dereference at (null) IP: [] __wake_up_common+0x2b/0x90 PGD 60f7a9067 PUD 60f4c9067 PMD 0 Oops: 0000 [#1] SMP DEBUG_PAGEALLOC gsmi: Log Shutdown Reason 0x03 Modules linked in: act_mirred cls_tcindex cls_prioshift sch_dsmark xt_multiport iptable_mangle sata_mv elephant elephant_dev_num cdc_acm uhci_hcd ehci_hcd i2c_d CPU: 9 PID: 41382 Comm: iSCSI-write- Not tainted 3.11.0-dbg-DEV #19 Hardware name: Intel XXX task: ffff88060d16eec0 ti: ffff88060d170000 task.ti: ffff88060d170000 RIP: 0010:[] [] __wake_up_common+0x2b/0x90 RSP: 0000:ffff88060d171818 EFLAGS: 00010096 RAX: 0000000000000082 RBX: ffff880baa3dee60 RCX: 0000000000000000 RDX: 0000000000000000 RSI: 0000000000000003 RDI: ffff880baa3dee60 RBP: ffff88060d171858 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000002 R12: ffff880baa3dee98 R13: 0000000000000003 R14: 0000000000000000 R15: 0000000000000003 FS: 00007f977cba6700(0000) GS:ffff880c79c60000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 0000000000000000 CR3: 000000060f7a5000 CR4: 00000000000007e0 Stack: 0000000000000082 0000000000000000 ffff88060d171858 ffff880baa3dee60 0000000000000082 0000000000000003 0000000000000000 0000000000000000 ffff88060d171898 ffffffff810c7848 ffff88060d171888 ffff880bde4bc4b8 Call Trace: [] __wake_up+0x48/0x70 [] __blk_drain_queue+0x123/0x190 [] blk_cleanup_queue+0xf5/0x210 [] __scsi_remove_device+0x5a/0xd0 [] scsi_remove_device+0x34/0x50 [] scsi_remove_target+0x16b/0x220 [] __iscsi_unbind_session+0xd1/0x1b0 [] iscsi_remove_session+0xe2/0x1c0 [] iscsi_destroy_session+0x16/0x60 [] iscsi_session_teardown+0xd9/0x100 [] iscsi_sw_tcp_session_destroy+0x5a/0xb0 [] iscsi_if_rx+0x10e8/0x1560 [] netlink_unicast+0x145/0x200 [] netlink_sendmsg+0x303/0x410 [] sock_sendmsg+0xa6/0xd0 [] ___sys_sendmsg+0x38c/0x3a0 [] ? fget_light+0x40/0x160 [] ? fget_light+0x99/0x160 [] ? fget_light+0x40/0x160 [] __sys_sendmsg+0x49/0x90 [] SyS_sendmsg+0x12/0x20 [] system_call_fastpath+0x16/0x1b Code: 66 66 66 66 90 55 48 89 e5 41 57 41 89 f7 41 56 41 89 ce 41 55 41 54 4c 8d 67 38 53 48 83 ec 18 89 55 c4 48 8b 57 38 4c 89 45 c8 <4c> 8b 2a 48 8d 42 e8 49 Fix it by moving r->root_blkg and q->root_rl.blkg setting to blkg_create() and clearing to blkg_destroy() so that they area initialized when a root blkg is created and cleared when destroyed. Reported-and-tested-by: Anatol Pomozov Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- block/blk-cgroup.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 290792a13e3c..db30b6beee72 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -235,8 +235,13 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg, blkg->online = true; spin_unlock(&blkcg->lock); - if (!ret) + if (!ret) { + if (blkcg == &blkcg_root) { + q->root_blkg = blkg; + q->root_rl.blkg = blkg; + } return blkg; + } /* @blkg failed fully initialized, use the usual release path */ blkg_put(blkg); @@ -334,6 +339,15 @@ static void blkg_destroy(struct blkcg_gq *blkg) if (rcu_dereference_raw(blkcg->blkg_hint) == blkg) rcu_assign_pointer(blkcg->blkg_hint, NULL); + /* + * If root blkg is destroyed. Just clear the pointer since root_rl + * does not take reference on root blkg. + */ + if (blkcg == &blkcg_root) { + blkg->q->root_blkg = NULL; + blkg->q->root_rl.blkg = NULL; + } + /* * Put the reference taken at the time of creation so that when all * queues are gone, group can be destroyed. @@ -360,13 +374,6 @@ static void blkg_destroy_all(struct request_queue *q) blkg_destroy(blkg); spin_unlock(&blkcg->lock); } - - /* - * root blkg is destroyed. Just clear the pointer since - * root_rl does not take reference on root blkg. - */ - q->root_blkg = NULL; - q->root_rl.blkg = NULL; } /* @@ -973,8 +980,6 @@ int blkcg_activate_policy(struct request_queue *q, ret = PTR_ERR(blkg); goto out_unlock; } - q->root_blkg = blkg; - q->root_rl.blkg = blkg; list_for_each_entry(blkg, &q->blkg_list, q_node) cnt++; From adbe6991efd36104ac9eaf751993d35eaa7f493a Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Wed, 29 May 2013 16:29:55 -0600 Subject: [PATCH 4/7] bio-integrity: Fix use of bs->bio_integrity_pool after free This fixes a copy and paste error introduced by 9f060e2231 ("block: Convert integrity to bvec_alloc_bs()"). Found by Coverity (CID 1020654). Signed-off-by: Bjorn Helgaas Acked-by: Kent Overstreet Signed-off-by: Jens Axboe --- fs/bio-integrity.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c index 8fb42916d8a2..45e944fe52a6 100644 --- a/fs/bio-integrity.c +++ b/fs/bio-integrity.c @@ -734,7 +734,7 @@ void bioset_integrity_free(struct bio_set *bs) mempool_destroy(bs->bio_integrity_pool); if (bs->bvec_integrity_pool) - mempool_destroy(bs->bio_integrity_pool); + mempool_destroy(bs->bvec_integrity_pool); } EXPORT_SYMBOL(bioset_integrity_free); From 7652113c2f508b1c8176640dcd034730fe79bc48 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Wed, 18 Sep 2013 08:33:55 -0600 Subject: [PATCH 5/7] If the queue is dying then we only call the rq->end_io callout. This leaves bios setup on the request, because the caller assumes when the blk_execute_rq_nowait/blk_execute_rq call has completed that the rq->bios have been cleaned up. This patch has blk_execute_rq_nowait use __blk_end_request_all to free bios and also call rq->end_io. Signed-off-by: Mike Christie Signed-off-by: Jens Axboe --- block/blk-exec.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/block/blk-exec.c b/block/blk-exec.c index e70621396129..ae4f27d7944e 100644 --- a/block/blk-exec.c +++ b/block/blk-exec.c @@ -68,9 +68,9 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk, spin_lock_irq(q->queue_lock); if (unlikely(blk_queue_dying(q))) { + rq->cmd_flags |= REQ_QUIET; rq->errors = -ENXIO; - if (rq->end_io) - rq->end_io(rq, rq->errors); + __blk_end_request_all(rq, rq->errors); spin_unlock_irq(q->queue_lock); return; } From 75afb352991ff1cd3cf5955bfe611de6d83a0c87 Mon Sep 17 00:00:00 2001 From: Jun'ichi Nomura Date: Sat, 21 Sep 2013 13:57:47 -0600 Subject: [PATCH 6/7] block: Add nr_bios to block_rq_remap tracepoint Adding the number of bios in a remapped request to 'block_rq_remap' tracepoint. Request remapper clones bios in a request to track the completion status of each bio. So the number of bios can be useful information for investigation. Related discussions: http://www.redhat.com/archives/dm-devel/2013-August/msg00084.html http://www.redhat.com/archives/dm-devel/2013-September/msg00024.html Signed-off-by: Jun'ichi Nomura Acked-by: Mike Snitzer Cc: Jens Axboe Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 11 +++++++++++ include/trace/events/block.h | 6 ++++-- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 2fdb4a451b49..0e6f765aa1f5 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -862,6 +862,17 @@ static inline unsigned int blk_rq_get_max_sectors(struct request *rq) return blk_queue_get_max_sectors(q, rq->cmd_flags); } +static inline unsigned int blk_rq_count_bios(struct request *rq) +{ + unsigned int nr_bios = 0; + struct bio *bio; + + __rq_for_each_bio(bio, rq) + nr_bios++; + + return nr_bios; +} + /* * Request issue related functions. */ diff --git a/include/trace/events/block.h b/include/trace/events/block.h index 60ae7c3db912..4c2301d2ef1a 100644 --- a/include/trace/events/block.h +++ b/include/trace/events/block.h @@ -618,6 +618,7 @@ TRACE_EVENT(block_rq_remap, __field( unsigned int, nr_sector ) __field( dev_t, old_dev ) __field( sector_t, old_sector ) + __field( unsigned int, nr_bios ) __array( char, rwbs, RWBS_LEN) ), @@ -627,15 +628,16 @@ TRACE_EVENT(block_rq_remap, __entry->nr_sector = blk_rq_sectors(rq); __entry->old_dev = dev; __entry->old_sector = from; + __entry->nr_bios = blk_rq_count_bios(rq); blk_fill_rwbs(__entry->rwbs, rq->cmd_flags, blk_rq_bytes(rq)); ), - TP_printk("%d,%d %s %llu + %u <- (%d,%d) %llu", + TP_printk("%d,%d %s %llu + %u <- (%d,%d) %llu %u", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, (unsigned long long)__entry->sector, __entry->nr_sector, MAJOR(__entry->old_dev), MINOR(__entry->old_dev), - (unsigned long long)__entry->old_sector) + (unsigned long long)__entry->old_sector, __entry->nr_bios) ); #endif /* _TRACE_BLOCK_H */ From f3cff25f05f2ac29b2ee355e611b0657482f6f1d Mon Sep 17 00:00:00 2001 From: Anatol Pomozov Date: Sun, 22 Sep 2013 12:43:47 -0600 Subject: [PATCH 7/7] cfq: explicitly use 64bit divide operation for 64bit arguments 'samples' is 64bit operant, but do_div() second parameter is 32. do_div silently truncates high 32 bits and calculated result is invalid. In case if low 32bit of 'samples' are zeros then do_div() produces kernel crash. Signed-off-by: Anatol Pomozov Acked-by: Tejun Heo Signed-off-by: Jens Axboe --- block/cfq-iosched.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index f0468e252ee4..51e06ea06a2e 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -1803,7 +1803,7 @@ static u64 cfqg_prfill_avg_queue_size(struct seq_file *sf, if (samples) { v = blkg_stat_read(&cfqg->stats.avg_queue_size_sum); - do_div(v, samples); + v = div64_u64(v, samples); } __blkg_prfill_u64(sf, pd, v); return 0;