From 7b457c20d817ec07520f42802a5b4dc0e29e4261 Mon Sep 17 00:00:00 2001 From: Lars Pedersen Date: Thu, 1 Jan 2009 21:19:14 +0100 Subject: [PATCH] block: add completion deadlines for bounded device latency Command queuing may result in requests being ignored by a block device for extended period of time. This patch adds completion deadlines to the elevator layer that provide an upper bound on the worst-case latency incurred by command queuing. Two new tunables are added to the request queue sysfs: device_expire_sync device_expire_async that specify the maximum time in milliseconds requests can spend in a block device before additional dispatch of requests are blocked. --- block/blk-sysfs.c | 57 +++++++++++++++++++++++++++++++++++++++++ block/elevator.c | 66 +++++++++++++++++++++++++++++++++++++++++++++++- include/linux/blkdev.h | 10 +++++++ 3 files changed, 132 insertions(+), 1 deletions(-) diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 304ec73..72bb06d 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -9,6 +9,9 @@ #include "blk.h" +#define ASYNC (0) +#define SYNC (1) + struct queue_sysfs_entry { struct attribute attr; ssize_t (*show)(struct request_queue *, char *); @@ -95,6 +98,46 @@ queue_ra_store(struct request_queue *q, const char *page, size_t count) return ret; } +static ssize_t +queue_device_expire_sync_show(struct request_queue *q, char *page) +{ + int max = jiffies_to_msecs(q->max_dev_latency[SYNC]); + + return queue_var_show(max, (page)); +} + +static ssize_t queue_device_expire_sync_store(struct request_queue *q, + const char *page, size_t count) +{ + unsigned long max; + ssize_t ret = queue_var_store(&max, page, count); + + spin_lock_irq(q->queue_lock); + q->max_dev_latency[SYNC] = msecs_to_jiffies(max); + spin_unlock_irq(q->queue_lock); + return ret; +} + +static ssize_t +queue_device_expire_async_show(struct request_queue *q, char *page) +{ + int max = jiffies_to_msecs(q->max_dev_latency[ASYNC]); + + return queue_var_show(max, (page)); +} + +static ssize_t queue_device_expire_async_store(struct request_queue *q, + const char *page, size_t count) +{ + unsigned long max; + ssize_t ret = queue_var_store(&max, page, count); + + spin_lock_irq(q->queue_lock); + q->max_dev_latency[ASYNC] = msecs_to_jiffies(max); + spin_unlock_irq(q->queue_lock); + return ret; +} + static ssize_t queue_max_sectors_show(struct request_queue *q, char *page) { int max_sectors_kb = q->max_sectors >> 1; @@ -169,6 +212,18 @@ static struct queue_sysfs_entry queue_ra_entry = { .store = queue_ra_store, }; +static struct queue_sysfs_entry queue_max_age_entry = { + .attr = {.name = "device_expire_sync", .mode = S_IRUGO | S_IWUSR }, + .show = queue_device_expire_sync_show, + .store = queue_device_expire_sync_store, +}; + +static struct queue_sysfs_entry queue_max_age_async_entry = { + .attr = {.name = "device_expire_async", .mode = S_IRUGO | S_IWUSR }, + .show = queue_device_expire_async_show, + .store = queue_device_expire_async_store, +}; + static struct queue_sysfs_entry queue_max_sectors_entry = { .attr = {.name = "max_sectors_kb", .mode = S_IRUGO | S_IWUSR }, .show = queue_max_sectors_show, @@ -200,6 +255,8 @@ static struct queue_sysfs_entry queue_nomerges_entry = { static struct attribute *default_attrs[] = { &queue_requests_entry.attr, &queue_ra_entry.attr, + &queue_max_age_entry.attr, + &queue_max_age_async_entry.attr, &queue_max_hw_sectors_entry.attr, &queue_max_sectors_entry.attr, &queue_iosched_entry.attr, diff --git a/block/elevator.c b/block/elevator.c index ed6f8f3..dc99790 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -547,6 +547,11 @@ void elv_requeue_request(struct request_queue *q, struct request *rq) * in_flight count again */ if (blk_account_rq(rq)) { + const int sync = rq_is_sync(rq); + + rb_erase(&rq->rb_dispatch_node, &q->dispatch_list[sync]); + RB_CLEAR_NODE(&rq->rb_dispatch_node); + q->in_flight--; if (blk_sorted_rq(rq)) elv_deactivate_rq(q, rq); @@ -718,6 +723,29 @@ void elv_add_request(struct request_queue *q, struct request *rq, int where, } EXPORT_SYMBOL(elv_add_request); +static int +__elv_block_next_request(struct request_queue *q, int sync) +{ + struct rb_node *node = rb_first(&q->dispatch_list[sync]); + struct request *rq; + unsigned long j = jiffies, age = 0; + + if (node) { + rq = rb_entry(node, struct request, rb_dispatch_node); + + /* + * if jiffies overflow, we just ignore that small window + */ + if (j < rq->dispatch_time) + return 0; + age = j - rq->dispatch_time; + + if (q->max_dev_latency[sync] && age > q->max_dev_latency[sync]) + return 1; + } + return 0; +} + static inline struct request *__elv_next_request(struct request_queue *q) { struct request *rq; @@ -739,6 +767,9 @@ struct request *elv_next_request(struct request_queue *q) struct request *rq; int ret; + if (__elv_block_next_request(q, 1) || __elv_block_next_request(q, 0)) + return NULL; + while ((rq = __elv_next_request(q)) != NULL) { /* * Kill the empty barrier place holder, the driver must @@ -823,6 +854,29 @@ struct request *elv_next_request(struct request_queue *q) } EXPORT_SYMBOL(elv_next_request); +static void __elv_dequeue_request(struct rb_root *root, struct request *rq) +{ + struct rb_node **p = &root->rb_node; + struct rb_node *parent = NULL; + struct request *__rq; + + rq->dispatch_time = jiffies; + + while (*p) { + parent = *p; + + __rq = rb_entry(parent, struct request, rb_dispatch_node); + + if (rq->dispatch_time < __rq->dispatch_time) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; + } + + rb_link_node(&rq->rb_dispatch_node, parent, p); + rb_insert_color(&rq->rb_dispatch_node, root); +} + void elv_dequeue_request(struct request_queue *q, struct request *rq) { BUG_ON(list_empty(&rq->queuelist)); @@ -835,8 +889,13 @@ void elv_dequeue_request(struct request_queue *q, struct request *rq) * and to it is freed is accounted as io that is in progress at * the driver side. */ - if (blk_account_rq(rq)) + if (blk_account_rq(rq)) { + const int sync = rq_is_sync(rq); + q->in_flight++; + + __elv_dequeue_request(&q->dispatch_list[sync], rq); + } } EXPORT_SYMBOL(elv_dequeue_request); @@ -909,6 +968,11 @@ void elv_completed_request(struct request_queue *q, struct request *rq) * request is released from the driver, io must be done */ if (blk_account_rq(rq)) { + const int sync = rq_is_sync(rq); + + rb_erase(&rq->rb_dispatch_node, &q->dispatch_list[sync]); + RB_CLEAR_NODE(&rq->rb_dispatch_node); + q->in_flight--; if (blk_sorted_rq(rq) && e->ops->elevator_completed_req_fn) e->ops->elevator_completed_req_fn(q, rq); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 53ea933..08a08be 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -182,6 +182,12 @@ struct request { void *elevator_private; void *elevator_private2; + /* + * rb_node for sorted tree of dispatched requests + */ + struct rb_node rb_dispatch_node; + unsigned long dispatch_time; + struct gendisk *rq_disk; unsigned long start_time; @@ -298,6 +304,8 @@ struct request_queue struct request *last_merge; elevator_t *elevator; + struct rb_root dispatch_list[2]; + /* * the queue request freelist, one for reads and one for writes */ @@ -366,6 +374,8 @@ struct request_queue unsigned int nr_congestion_off; unsigned int nr_batching; + unsigned long max_dev_latency[2]; + unsigned int max_sectors; unsigned int max_hw_sectors; unsigned short max_phys_segments; -- 1.6.0.4