mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-24 17:23:25 -05:00
Merge branch 'for-2.6.37/core' of git://git.kernel.dk/linux-2.6-block
* 'for-2.6.37/core' of git://git.kernel.dk/linux-2.6-block: (39 commits) cfq-iosched: Fix a gcc 4.5 warning and put some comments block: Turn bvec_k{un,}map_irq() into static inline functions block: fix accounting bug on cross partition merges block: Make the integrity mapped property a bio flag block: Fix double free in blk_integrity_unregister block: Ensure physical block size is unsigned int blkio-throttle: Fix possible multiplication overflow in iops calculations blkio-throttle: limit max iops value to UINT_MAX blkio-throttle: There is no need to convert jiffies to milli seconds blkio-throttle: Fix link failure failure on i386 blkio: Recalculate the throttled bio dispatch time upon throttle limit change blkio: Add root group to td->tg_list blkio: deletion of a cgroup was causes oops blkio: Do not export throttle files if CONFIG_BLK_DEV_THROTTLING=n block: set the bounce_pfn to the actual DMA limit rather than to max memory block: revert bad fix for memory hotplug causing bounces Fix compile error in blk-exec.c for !CONFIG_DETECT_HUNG_TASK block: set the bounce_pfn to the actual DMA limit rather than to max memory block: Prevent hang_check firing during long I/O cfq: improve fsync performance for small files ... Fix up trivial conflicts due to __rcu sparse annotation in include/linux/genhd.h
This commit is contained in:
commit
e9dd2b6837
43 changed files with 2494 additions and 299 deletions
|
@ -8,12 +8,17 @@ both at leaf nodes as well as at intermediate nodes in a storage hierarchy.
|
|||
Plan is to use the same cgroup based management interface for blkio controller
|
||||
and based on user options switch IO policies in the background.
|
||||
|
||||
In the first phase, this patchset implements proportional weight time based
|
||||
division of disk policy. It is implemented in CFQ. Hence this policy takes
|
||||
effect only on leaf nodes when CFQ is being used.
|
||||
Currently two IO control policies are implemented. First one is proportional
|
||||
weight time based division of disk policy. It is implemented in CFQ. Hence
|
||||
this policy takes effect only on leaf nodes when CFQ is being used. The second
|
||||
one is throttling policy which can be used to specify upper IO rate limits
|
||||
on devices. This policy is implemented in generic block layer and can be
|
||||
used on leaf nodes as well as higher level logical devices like device mapper.
|
||||
|
||||
HOWTO
|
||||
=====
|
||||
Proportional Weight division of bandwidth
|
||||
-----------------------------------------
|
||||
You can do a very simple testing of running two dd threads in two different
|
||||
cgroups. Here is what you can do.
|
||||
|
||||
|
@ -55,6 +60,35 @@ cgroups. Here is what you can do.
|
|||
group dispatched to the disk. We provide fairness in terms of disk time, so
|
||||
ideally io.disk_time of cgroups should be in proportion to the weight.
|
||||
|
||||
Throttling/Upper Limit policy
|
||||
-----------------------------
|
||||
- Enable Block IO controller
|
||||
CONFIG_BLK_CGROUP=y
|
||||
|
||||
- Enable throttling in block layer
|
||||
CONFIG_BLK_DEV_THROTTLING=y
|
||||
|
||||
- Mount blkio controller
|
||||
mount -t cgroup -o blkio none /cgroup/blkio
|
||||
|
||||
- Specify a bandwidth rate on particular device for root group. The format
|
||||
for policy is "<major>:<minor> <byes_per_second>".
|
||||
|
||||
echo "8:16 1048576" > /cgroup/blkio/blkio.read_bps_device
|
||||
|
||||
Above will put a limit of 1MB/second on reads happening for root group
|
||||
on device having major/minor number 8:16.
|
||||
|
||||
- Run dd to read a file and see if rate is throttled to 1MB/s or not.
|
||||
|
||||
# dd if=/mnt/common/zerofile of=/dev/null bs=4K count=1024
|
||||
# iflag=direct
|
||||
1024+0 records in
|
||||
1024+0 records out
|
||||
4194304 bytes (4.2 MB) copied, 4.0001 s, 1.0 MB/s
|
||||
|
||||
Limits for writes can be put using blkio.write_bps_device file.
|
||||
|
||||
Various user visible config options
|
||||
===================================
|
||||
CONFIG_BLK_CGROUP
|
||||
|
@ -68,8 +102,13 @@ CONFIG_CFQ_GROUP_IOSCHED
|
|||
- Enables group scheduling in CFQ. Currently only 1 level of group
|
||||
creation is allowed.
|
||||
|
||||
CONFIG_BLK_DEV_THROTTLING
|
||||
- Enable block device throttling support in block layer.
|
||||
|
||||
Details of cgroup files
|
||||
=======================
|
||||
Proportional weight policy files
|
||||
--------------------------------
|
||||
- blkio.weight
|
||||
- Specifies per cgroup weight. This is default weight of the group
|
||||
on all the devices until and unless overridden by per device rule.
|
||||
|
@ -210,6 +249,67 @@ Details of cgroup files
|
|||
and minor number of the device and third field specifies the number
|
||||
of times a group was dequeued from a particular device.
|
||||
|
||||
Throttling/Upper limit policy files
|
||||
-----------------------------------
|
||||
- blkio.throttle.read_bps_device
|
||||
- Specifies upper limit on READ rate from the device. IO rate is
|
||||
specified in bytes per second. Rules are per deivce. Following is
|
||||
the format.
|
||||
|
||||
echo "<major>:<minor> <rate_bytes_per_second>" > /cgrp/blkio.read_bps_device
|
||||
|
||||
- blkio.throttle.write_bps_device
|
||||
- Specifies upper limit on WRITE rate to the device. IO rate is
|
||||
specified in bytes per second. Rules are per deivce. Following is
|
||||
the format.
|
||||
|
||||
echo "<major>:<minor> <rate_bytes_per_second>" > /cgrp/blkio.write_bps_device
|
||||
|
||||
- blkio.throttle.read_iops_device
|
||||
- Specifies upper limit on READ rate from the device. IO rate is
|
||||
specified in IO per second. Rules are per deivce. Following is
|
||||
the format.
|
||||
|
||||
echo "<major>:<minor> <rate_io_per_second>" > /cgrp/blkio.read_iops_device
|
||||
|
||||
- blkio.throttle.write_iops_device
|
||||
- Specifies upper limit on WRITE rate to the device. IO rate is
|
||||
specified in io per second. Rules are per deivce. Following is
|
||||
the format.
|
||||
|
||||
echo "<major>:<minor> <rate_io_per_second>" > /cgrp/blkio.write_iops_device
|
||||
|
||||
Note: If both BW and IOPS rules are specified for a device, then IO is
|
||||
subjectd to both the constraints.
|
||||
|
||||
- blkio.throttle.io_serviced
|
||||
- Number of IOs (bio) completed to/from the disk by the group (as
|
||||
seen by throttling policy). These are further divided by the type
|
||||
of operation - read or write, sync or async. First two fields specify
|
||||
the major and minor number of the device, third field specifies the
|
||||
operation type and the fourth field specifies the number of IOs.
|
||||
|
||||
blkio.io_serviced does accounting as seen by CFQ and counts are in
|
||||
number of requests (struct request). On the other hand,
|
||||
blkio.throttle.io_serviced counts number of IO in terms of number
|
||||
of bios as seen by throttling policy. These bios can later be
|
||||
merged by elevator and total number of requests completed can be
|
||||
lesser.
|
||||
|
||||
- blkio.throttle.io_service_bytes
|
||||
- Number of bytes transferred to/from the disk by the group. These
|
||||
are further divided by the type of operation - read or write, sync
|
||||
or async. First two fields specify the major and minor number of the
|
||||
device, third field specifies the operation type and the fourth field
|
||||
specifies the number of bytes.
|
||||
|
||||
These numbers should roughly be same as blkio.io_service_bytes as
|
||||
updated by CFQ. The difference between two is that
|
||||
blkio.io_service_bytes will not be updated if CFQ is not operating
|
||||
on request queue.
|
||||
|
||||
Common files among various policies
|
||||
-----------------------------------
|
||||
- blkio.reset_stats
|
||||
- Writing an int to this file will result in resetting all the stats
|
||||
for that cgroup.
|
||||
|
|
|
@ -77,6 +77,18 @@ config BLK_DEV_INTEGRITY
|
|||
T10/SCSI Data Integrity Field or the T13/ATA External Path
|
||||
Protection. If in doubt, say N.
|
||||
|
||||
config BLK_DEV_THROTTLING
|
||||
bool "Block layer bio throttling support"
|
||||
depends on BLK_CGROUP=y && EXPERIMENTAL
|
||||
default n
|
||||
---help---
|
||||
Block layer bio throttling support. It can be used to limit
|
||||
the IO rate to a device. IO rate policies are per cgroup and
|
||||
one needs to mount and use blkio cgroup controller for creating
|
||||
cgroups and specifying per device IO rate policies.
|
||||
|
||||
See Documentation/cgroups/blkio-controller.txt for more information.
|
||||
|
||||
endif # BLOCK
|
||||
|
||||
config BLOCK_COMPAT
|
||||
|
|
|
@ -9,6 +9,7 @@ obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \
|
|||
|
||||
obj-$(CONFIG_BLK_DEV_BSG) += bsg.o
|
||||
obj-$(CONFIG_BLK_CGROUP) += blk-cgroup.o
|
||||
obj-$(CONFIG_BLK_DEV_THROTTLING) += blk-throttle.o
|
||||
obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o
|
||||
obj-$(CONFIG_IOSCHED_DEADLINE) += deadline-iosched.o
|
||||
obj-$(CONFIG_IOSCHED_CFQ) += cfq-iosched.o
|
||||
|
|
|
@ -37,6 +37,12 @@ static void blkiocg_attach(struct cgroup_subsys *, struct cgroup *,
|
|||
static void blkiocg_destroy(struct cgroup_subsys *, struct cgroup *);
|
||||
static int blkiocg_populate(struct cgroup_subsys *, struct cgroup *);
|
||||
|
||||
/* for encoding cft->private value on file */
|
||||
#define BLKIOFILE_PRIVATE(x, val) (((x) << 16) | (val))
|
||||
/* What policy owns the file, proportional or throttle */
|
||||
#define BLKIOFILE_POLICY(val) (((val) >> 16) & 0xffff)
|
||||
#define BLKIOFILE_ATTR(val) ((val) & 0xffff)
|
||||
|
||||
struct cgroup_subsys blkio_subsys = {
|
||||
.name = "blkio",
|
||||
.create = blkiocg_create,
|
||||
|
@ -59,6 +65,27 @@ static inline void blkio_policy_insert_node(struct blkio_cgroup *blkcg,
|
|||
list_add(&pn->node, &blkcg->policy_list);
|
||||
}
|
||||
|
||||
static inline bool cftype_blkg_same_policy(struct cftype *cft,
|
||||
struct blkio_group *blkg)
|
||||
{
|
||||
enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private);
|
||||
|
||||
if (blkg->plid == plid)
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Determines if policy node matches cgroup file being accessed */
|
||||
static inline bool pn_matches_cftype(struct cftype *cft,
|
||||
struct blkio_policy_node *pn)
|
||||
{
|
||||
enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private);
|
||||
int fileid = BLKIOFILE_ATTR(cft->private);
|
||||
|
||||
return (plid == pn->plid && fileid == pn->fileid);
|
||||
}
|
||||
|
||||
/* Must be called with blkcg->lock held */
|
||||
static inline void blkio_policy_delete_node(struct blkio_policy_node *pn)
|
||||
{
|
||||
|
@ -67,12 +94,13 @@ static inline void blkio_policy_delete_node(struct blkio_policy_node *pn)
|
|||
|
||||
/* Must be called with blkcg->lock held */
|
||||
static struct blkio_policy_node *
|
||||
blkio_policy_search_node(const struct blkio_cgroup *blkcg, dev_t dev)
|
||||
blkio_policy_search_node(const struct blkio_cgroup *blkcg, dev_t dev,
|
||||
enum blkio_policy_id plid, int fileid)
|
||||
{
|
||||
struct blkio_policy_node *pn;
|
||||
|
||||
list_for_each_entry(pn, &blkcg->policy_list, node) {
|
||||
if (pn->dev == dev)
|
||||
if (pn->dev == dev && pn->plid == plid && pn->fileid == fileid)
|
||||
return pn;
|
||||
}
|
||||
|
||||
|
@ -86,6 +114,67 @@ struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup)
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(cgroup_to_blkio_cgroup);
|
||||
|
||||
static inline void
|
||||
blkio_update_group_weight(struct blkio_group *blkg, unsigned int weight)
|
||||
{
|
||||
struct blkio_policy_type *blkiop;
|
||||
|
||||
list_for_each_entry(blkiop, &blkio_list, list) {
|
||||
/* If this policy does not own the blkg, do not send updates */
|
||||
if (blkiop->plid != blkg->plid)
|
||||
continue;
|
||||
if (blkiop->ops.blkio_update_group_weight_fn)
|
||||
blkiop->ops.blkio_update_group_weight_fn(blkg->key,
|
||||
blkg, weight);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void blkio_update_group_bps(struct blkio_group *blkg, u64 bps,
|
||||
int fileid)
|
||||
{
|
||||
struct blkio_policy_type *blkiop;
|
||||
|
||||
list_for_each_entry(blkiop, &blkio_list, list) {
|
||||
|
||||
/* If this policy does not own the blkg, do not send updates */
|
||||
if (blkiop->plid != blkg->plid)
|
||||
continue;
|
||||
|
||||
if (fileid == BLKIO_THROTL_read_bps_device
|
||||
&& blkiop->ops.blkio_update_group_read_bps_fn)
|
||||
blkiop->ops.blkio_update_group_read_bps_fn(blkg->key,
|
||||
blkg, bps);
|
||||
|
||||
if (fileid == BLKIO_THROTL_write_bps_device
|
||||
&& blkiop->ops.blkio_update_group_write_bps_fn)
|
||||
blkiop->ops.blkio_update_group_write_bps_fn(blkg->key,
|
||||
blkg, bps);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void blkio_update_group_iops(struct blkio_group *blkg,
|
||||
unsigned int iops, int fileid)
|
||||
{
|
||||
struct blkio_policy_type *blkiop;
|
||||
|
||||
list_for_each_entry(blkiop, &blkio_list, list) {
|
||||
|
||||
/* If this policy does not own the blkg, do not send updates */
|
||||
if (blkiop->plid != blkg->plid)
|
||||
continue;
|
||||
|
||||
if (fileid == BLKIO_THROTL_read_iops_device
|
||||
&& blkiop->ops.blkio_update_group_read_iops_fn)
|
||||
blkiop->ops.blkio_update_group_read_iops_fn(blkg->key,
|
||||
blkg, iops);
|
||||
|
||||
if (fileid == BLKIO_THROTL_write_iops_device
|
||||
&& blkiop->ops.blkio_update_group_write_iops_fn)
|
||||
blkiop->ops.blkio_update_group_write_iops_fn(blkg->key,
|
||||
blkg,iops);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Add to the appropriate stat variable depending on the request type.
|
||||
* This should be called with the blkg->stats_lock held.
|
||||
|
@ -341,7 +430,8 @@ void blkiocg_update_io_merged_stats(struct blkio_group *blkg, bool direction,
|
|||
EXPORT_SYMBOL_GPL(blkiocg_update_io_merged_stats);
|
||||
|
||||
void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg,
|
||||
struct blkio_group *blkg, void *key, dev_t dev)
|
||||
struct blkio_group *blkg, void *key, dev_t dev,
|
||||
enum blkio_policy_id plid)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
|
@ -350,6 +440,7 @@ void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg,
|
|||
rcu_assign_pointer(blkg->key, key);
|
||||
blkg->blkcg_id = css_id(&blkcg->css);
|
||||
hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list);
|
||||
blkg->plid = plid;
|
||||
spin_unlock_irqrestore(&blkcg->lock, flags);
|
||||
/* Need to take css reference ? */
|
||||
cgroup_path(blkcg->css.cgroup, blkg->path, sizeof(blkg->path));
|
||||
|
@ -408,51 +499,6 @@ struct blkio_group *blkiocg_lookup_group(struct blkio_cgroup *blkcg, void *key)
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(blkiocg_lookup_group);
|
||||
|
||||
#define SHOW_FUNCTION(__VAR) \
|
||||
static u64 blkiocg_##__VAR##_read(struct cgroup *cgroup, \
|
||||
struct cftype *cftype) \
|
||||
{ \
|
||||
struct blkio_cgroup *blkcg; \
|
||||
\
|
||||
blkcg = cgroup_to_blkio_cgroup(cgroup); \
|
||||
return (u64)blkcg->__VAR; \
|
||||
}
|
||||
|
||||
SHOW_FUNCTION(weight);
|
||||
#undef SHOW_FUNCTION
|
||||
|
||||
static int
|
||||
blkiocg_weight_write(struct cgroup *cgroup, struct cftype *cftype, u64 val)
|
||||
{
|
||||
struct blkio_cgroup *blkcg;
|
||||
struct blkio_group *blkg;
|
||||
struct hlist_node *n;
|
||||
struct blkio_policy_type *blkiop;
|
||||
struct blkio_policy_node *pn;
|
||||
|
||||
if (val < BLKIO_WEIGHT_MIN || val > BLKIO_WEIGHT_MAX)
|
||||
return -EINVAL;
|
||||
|
||||
blkcg = cgroup_to_blkio_cgroup(cgroup);
|
||||
spin_lock(&blkio_list_lock);
|
||||
spin_lock_irq(&blkcg->lock);
|
||||
blkcg->weight = (unsigned int)val;
|
||||
|
||||
hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
|
||||
pn = blkio_policy_search_node(blkcg, blkg->dev);
|
||||
|
||||
if (pn)
|
||||
continue;
|
||||
|
||||
list_for_each_entry(blkiop, &blkio_list, list)
|
||||
blkiop->ops.blkio_update_group_weight_fn(blkg,
|
||||
blkcg->weight);
|
||||
}
|
||||
spin_unlock_irq(&blkcg->lock);
|
||||
spin_unlock(&blkio_list_lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
blkiocg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, u64 val)
|
||||
{
|
||||
|
@ -593,52 +639,6 @@ static uint64_t blkio_get_stat(struct blkio_group *blkg,
|
|||
return disk_total;
|
||||
}
|
||||
|
||||
#define SHOW_FUNCTION_PER_GROUP(__VAR, type, show_total) \
|
||||
static int blkiocg_##__VAR##_read(struct cgroup *cgroup, \
|
||||
struct cftype *cftype, struct cgroup_map_cb *cb) \
|
||||
{ \
|
||||
struct blkio_cgroup *blkcg; \
|
||||
struct blkio_group *blkg; \
|
||||
struct hlist_node *n; \
|
||||
uint64_t cgroup_total = 0; \
|
||||
\
|
||||
if (!cgroup_lock_live_group(cgroup)) \
|
||||
return -ENODEV; \
|
||||
\
|
||||
blkcg = cgroup_to_blkio_cgroup(cgroup); \
|
||||
rcu_read_lock(); \
|
||||
hlist_for_each_entry_rcu(blkg, n, &blkcg->blkg_list, blkcg_node) {\
|
||||
if (blkg->dev) { \
|
||||
spin_lock_irq(&blkg->stats_lock); \
|
||||
cgroup_total += blkio_get_stat(blkg, cb, \
|
||||
blkg->dev, type); \
|
||||
spin_unlock_irq(&blkg->stats_lock); \
|
||||
} \
|
||||
} \
|
||||
if (show_total) \
|
||||
cb->fill(cb, "Total", cgroup_total); \
|
||||
rcu_read_unlock(); \
|
||||
cgroup_unlock(); \
|
||||
return 0; \
|
||||
}
|
||||
|
||||
SHOW_FUNCTION_PER_GROUP(time, BLKIO_STAT_TIME, 0);
|
||||
SHOW_FUNCTION_PER_GROUP(sectors, BLKIO_STAT_SECTORS, 0);
|
||||
SHOW_FUNCTION_PER_GROUP(io_service_bytes, BLKIO_STAT_SERVICE_BYTES, 1);
|
||||
SHOW_FUNCTION_PER_GROUP(io_serviced, BLKIO_STAT_SERVICED, 1);
|
||||
SHOW_FUNCTION_PER_GROUP(io_service_time, BLKIO_STAT_SERVICE_TIME, 1);
|
||||
SHOW_FUNCTION_PER_GROUP(io_wait_time, BLKIO_STAT_WAIT_TIME, 1);
|
||||
SHOW_FUNCTION_PER_GROUP(io_merged, BLKIO_STAT_MERGED, 1);
|
||||
SHOW_FUNCTION_PER_GROUP(io_queued, BLKIO_STAT_QUEUED, 1);
|
||||
#ifdef CONFIG_DEBUG_BLK_CGROUP
|
||||
SHOW_FUNCTION_PER_GROUP(dequeue, BLKIO_STAT_DEQUEUE, 0);
|
||||
SHOW_FUNCTION_PER_GROUP(avg_queue_size, BLKIO_STAT_AVG_QUEUE_SIZE, 0);
|
||||
SHOW_FUNCTION_PER_GROUP(group_wait_time, BLKIO_STAT_GROUP_WAIT_TIME, 0);
|
||||
SHOW_FUNCTION_PER_GROUP(idle_time, BLKIO_STAT_IDLE_TIME, 0);
|
||||
SHOW_FUNCTION_PER_GROUP(empty_time, BLKIO_STAT_EMPTY_TIME, 0);
|
||||
#endif
|
||||
#undef SHOW_FUNCTION_PER_GROUP
|
||||
|
||||
static int blkio_check_dev_num(dev_t dev)
|
||||
{
|
||||
int part = 0;
|
||||
|
@ -652,13 +652,14 @@ static int blkio_check_dev_num(dev_t dev)
|
|||
}
|
||||
|
||||
static int blkio_policy_parse_and_set(char *buf,
|
||||
struct blkio_policy_node *newpn)
|
||||
struct blkio_policy_node *newpn, enum blkio_policy_id plid, int fileid)
|
||||
{
|
||||
char *s[4], *p, *major_s = NULL, *minor_s = NULL;
|
||||
int ret;
|
||||
unsigned long major, minor, temp;
|
||||
int i = 0;
|
||||
dev_t dev;
|
||||
u64 bps, iops;
|
||||
|
||||
memset(s, 0, sizeof(s));
|
||||
|
||||
|
@ -705,12 +706,47 @@ static int blkio_policy_parse_and_set(char *buf,
|
|||
if (s[1] == NULL)
|
||||
return -EINVAL;
|
||||
|
||||
ret = strict_strtoul(s[1], 10, &temp);
|
||||
if (ret || (temp < BLKIO_WEIGHT_MIN && temp > 0) ||
|
||||
temp > BLKIO_WEIGHT_MAX)
|
||||
return -EINVAL;
|
||||
switch (plid) {
|
||||
case BLKIO_POLICY_PROP:
|
||||
ret = strict_strtoul(s[1], 10, &temp);
|
||||
if (ret || (temp < BLKIO_WEIGHT_MIN && temp > 0) ||
|
||||
temp > BLKIO_WEIGHT_MAX)
|
||||
return -EINVAL;
|
||||
|
||||
newpn->weight = temp;
|
||||
newpn->plid = plid;
|
||||
newpn->fileid = fileid;
|
||||
newpn->val.weight = temp;
|
||||
break;
|
||||
case BLKIO_POLICY_THROTL:
|
||||
switch(fileid) {
|
||||
case BLKIO_THROTL_read_bps_device:
|
||||
case BLKIO_THROTL_write_bps_device:
|
||||
ret = strict_strtoull(s[1], 10, &bps);
|
||||
if (ret)
|
||||
return -EINVAL;
|
||||
|
||||
newpn->plid = plid;
|
||||
newpn->fileid = fileid;
|
||||
newpn->val.bps = bps;
|
||||
break;
|
||||
case BLKIO_THROTL_read_iops_device:
|
||||
case BLKIO_THROTL_write_iops_device:
|
||||
ret = strict_strtoull(s[1], 10, &iops);
|
||||
if (ret)
|
||||
return -EINVAL;
|
||||
|
||||
if (iops > THROTL_IOPS_MAX)
|
||||
return -EINVAL;
|
||||
|
||||
newpn->plid = plid;
|
||||
newpn->fileid = fileid;
|
||||
newpn->val.iops = (unsigned int)iops;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -720,26 +756,180 @@ unsigned int blkcg_get_weight(struct blkio_cgroup *blkcg,
|
|||
{
|
||||
struct blkio_policy_node *pn;
|
||||
|
||||
pn = blkio_policy_search_node(blkcg, dev);
|
||||
pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_PROP,
|
||||
BLKIO_PROP_weight_device);
|
||||
if (pn)
|
||||
return pn->weight;
|
||||
return pn->val.weight;
|
||||
else
|
||||
return blkcg->weight;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blkcg_get_weight);
|
||||
|
||||
uint64_t blkcg_get_read_bps(struct blkio_cgroup *blkcg, dev_t dev)
|
||||
{
|
||||
struct blkio_policy_node *pn;
|
||||
|
||||
static int blkiocg_weight_device_write(struct cgroup *cgrp, struct cftype *cft,
|
||||
const char *buffer)
|
||||
pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_THROTL,
|
||||
BLKIO_THROTL_read_bps_device);
|
||||
if (pn)
|
||||
return pn->val.bps;
|
||||
else
|
||||
return -1;
|
||||
}
|
||||
|
||||
uint64_t blkcg_get_write_bps(struct blkio_cgroup *blkcg, dev_t dev)
|
||||
{
|
||||
struct blkio_policy_node *pn;
|
||||
pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_THROTL,
|
||||
BLKIO_THROTL_write_bps_device);
|
||||
if (pn)
|
||||
return pn->val.bps;
|
||||
else
|
||||
return -1;
|
||||
}
|
||||
|
||||
unsigned int blkcg_get_read_iops(struct blkio_cgroup *blkcg, dev_t dev)
|
||||
{
|
||||
struct blkio_policy_node *pn;
|
||||
|
||||
pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_THROTL,
|
||||
BLKIO_THROTL_read_iops_device);
|
||||
if (pn)
|
||||
return pn->val.iops;
|
||||
else
|
||||
return -1;
|
||||
}
|
||||
|
||||
unsigned int blkcg_get_write_iops(struct blkio_cgroup *blkcg, dev_t dev)
|
||||
{
|
||||
struct blkio_policy_node *pn;
|
||||
pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_THROTL,
|
||||
BLKIO_THROTL_write_iops_device);
|
||||
if (pn)
|
||||
return pn->val.iops;
|
||||
else
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Checks whether user asked for deleting a policy rule */
|
||||
static bool blkio_delete_rule_command(struct blkio_policy_node *pn)
|
||||
{
|
||||
switch(pn->plid) {
|
||||
case BLKIO_POLICY_PROP:
|
||||
if (pn->val.weight == 0)
|
||||
return 1;
|
||||
break;
|
||||
case BLKIO_POLICY_THROTL:
|
||||
switch(pn->fileid) {
|
||||
case BLKIO_THROTL_read_bps_device:
|
||||
case BLKIO_THROTL_write_bps_device:
|
||||
if (pn->val.bps == 0)
|
||||
return 1;
|
||||
break;
|
||||
case BLKIO_THROTL_read_iops_device:
|
||||
case BLKIO_THROTL_write_iops_device:
|
||||
if (pn->val.iops == 0)
|
||||
return 1;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void blkio_update_policy_rule(struct blkio_policy_node *oldpn,
|
||||
struct blkio_policy_node *newpn)
|
||||
{
|
||||
switch(oldpn->plid) {
|
||||
case BLKIO_POLICY_PROP:
|
||||
oldpn->val.weight = newpn->val.weight;
|
||||
break;
|
||||
case BLKIO_POLICY_THROTL:
|
||||
switch(newpn->fileid) {
|
||||
case BLKIO_THROTL_read_bps_device:
|
||||
case BLKIO_THROTL_write_bps_device:
|
||||
oldpn->val.bps = newpn->val.bps;
|
||||
break;
|
||||
case BLKIO_THROTL_read_iops_device:
|
||||
case BLKIO_THROTL_write_iops_device:
|
||||
oldpn->val.iops = newpn->val.iops;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Some rules/values in blkg have changed. Propogate those to respective
|
||||
* policies.
|
||||
*/
|
||||
static void blkio_update_blkg_policy(struct blkio_cgroup *blkcg,
|
||||
struct blkio_group *blkg, struct blkio_policy_node *pn)
|
||||
{
|
||||
unsigned int weight, iops;
|
||||
u64 bps;
|
||||
|
||||
switch(pn->plid) {
|
||||
case BLKIO_POLICY_PROP:
|
||||
weight = pn->val.weight ? pn->val.weight :
|
||||
blkcg->weight;
|
||||
blkio_update_group_weight(blkg, weight);
|
||||
break;
|
||||
case BLKIO_POLICY_THROTL:
|
||||
switch(pn->fileid) {
|
||||
case BLKIO_THROTL_read_bps_device:
|
||||
case BLKIO_THROTL_write_bps_device:
|
||||
bps = pn->val.bps ? pn->val.bps : (-1);
|
||||
blkio_update_group_bps(blkg, bps, pn->fileid);
|
||||
break;
|
||||
case BLKIO_THROTL_read_iops_device:
|
||||
case BLKIO_THROTL_write_iops_device:
|
||||
iops = pn->val.iops ? pn->val.iops : (-1);
|
||||
blkio_update_group_iops(blkg, iops, pn->fileid);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* A policy node rule has been updated. Propogate this update to all the
|
||||
* block groups which might be affected by this update.
|
||||
*/
|
||||
static void blkio_update_policy_node_blkg(struct blkio_cgroup *blkcg,
|
||||
struct blkio_policy_node *pn)
|
||||
{
|
||||
struct blkio_group *blkg;
|
||||
struct hlist_node *n;
|
||||
|
||||
spin_lock(&blkio_list_lock);
|
||||
spin_lock_irq(&blkcg->lock);
|
||||
|
||||
hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
|
||||
if (pn->dev != blkg->dev || pn->plid != blkg->plid)
|
||||
continue;
|
||||
blkio_update_blkg_policy(blkcg, blkg, pn);
|
||||
}
|
||||
|
||||
spin_unlock_irq(&blkcg->lock);
|
||||
spin_unlock(&blkio_list_lock);
|
||||
}
|
||||
|
||||
static int blkiocg_file_write(struct cgroup *cgrp, struct cftype *cft,
|
||||
const char *buffer)
|
||||
{
|
||||
int ret = 0;
|
||||
char *buf;
|
||||
struct blkio_policy_node *newpn, *pn;
|
||||
struct blkio_cgroup *blkcg;
|
||||
struct blkio_group *blkg;
|
||||
int keep_newpn = 0;
|
||||
struct hlist_node *n;
|
||||
struct blkio_policy_type *blkiop;
|
||||
enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private);
|
||||
int fileid = BLKIOFILE_ATTR(cft->private);
|
||||
|
||||
buf = kstrdup(buffer, GFP_KERNEL);
|
||||
if (!buf)
|
||||
|
@ -751,7 +941,7 @@ static int blkiocg_weight_device_write(struct cgroup *cgrp, struct cftype *cft,
|
|||
goto free_buf;
|
||||
}
|
||||
|
||||
ret = blkio_policy_parse_and_set(buf, newpn);
|
||||
ret = blkio_policy_parse_and_set(buf, newpn, plid, fileid);
|
||||
if (ret)
|
||||
goto free_newpn;
|
||||
|
||||
|
@ -759,9 +949,9 @@ static int blkiocg_weight_device_write(struct cgroup *cgrp, struct cftype *cft,
|
|||
|
||||
spin_lock_irq(&blkcg->lock);
|
||||
|
||||
pn = blkio_policy_search_node(blkcg, newpn->dev);
|
||||
pn = blkio_policy_search_node(blkcg, newpn->dev, plid, fileid);
|
||||
if (!pn) {
|
||||
if (newpn->weight != 0) {
|
||||
if (!blkio_delete_rule_command(newpn)) {
|
||||
blkio_policy_insert_node(blkcg, newpn);
|
||||
keep_newpn = 1;
|
||||
}
|
||||
|
@ -769,33 +959,17 @@ static int blkiocg_weight_device_write(struct cgroup *cgrp, struct cftype *cft,
|
|||
goto update_io_group;
|
||||
}
|
||||
|
||||
if (newpn->weight == 0) {
|
||||
/* weight == 0 means deleteing a specific weight */
|
||||
if (blkio_delete_rule_command(newpn)) {
|
||||
blkio_policy_delete_node(pn);
|
||||
spin_unlock_irq(&blkcg->lock);
|
||||
goto update_io_group;
|
||||
}
|
||||
spin_unlock_irq(&blkcg->lock);
|
||||
|
||||
pn->weight = newpn->weight;
|
||||
blkio_update_policy_rule(pn, newpn);
|
||||
|
||||
update_io_group:
|
||||
/* update weight for each cfqg */
|
||||
spin_lock(&blkio_list_lock);
|
||||
spin_lock_irq(&blkcg->lock);
|
||||
|
||||
hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
|
||||
if (newpn->dev == blkg->dev) {
|
||||
list_for_each_entry(blkiop, &blkio_list, list)
|
||||
blkiop->ops.blkio_update_group_weight_fn(blkg,
|
||||
newpn->weight ?
|
||||
newpn->weight :
|
||||
blkcg->weight);
|
||||
}
|
||||
}
|
||||
|
||||
spin_unlock_irq(&blkcg->lock);
|
||||
spin_unlock(&blkio_list_lock);
|
||||
blkio_update_policy_node_blkg(blkcg, newpn);
|
||||
|
||||
free_newpn:
|
||||
if (!keep_newpn)
|
||||
|
@ -805,23 +979,256 @@ free_buf:
|
|||
return ret;
|
||||
}
|
||||
|
||||
static int blkiocg_weight_device_read(struct cgroup *cgrp, struct cftype *cft,
|
||||
struct seq_file *m)
|
||||
static void
|
||||
blkio_print_policy_node(struct seq_file *m, struct blkio_policy_node *pn)
|
||||
{
|
||||
switch(pn->plid) {
|
||||
case BLKIO_POLICY_PROP:
|
||||
if (pn->fileid == BLKIO_PROP_weight_device)
|
||||
seq_printf(m, "%u:%u\t%u\n", MAJOR(pn->dev),
|
||||
MINOR(pn->dev), pn->val.weight);
|
||||
break;
|
||||
case BLKIO_POLICY_THROTL:
|
||||
switch(pn->fileid) {
|
||||
case BLKIO_THROTL_read_bps_device:
|
||||
case BLKIO_THROTL_write_bps_device:
|
||||
seq_printf(m, "%u:%u\t%llu\n", MAJOR(pn->dev),
|
||||
MINOR(pn->dev), pn->val.bps);
|
||||
break;
|
||||
case BLKIO_THROTL_read_iops_device:
|
||||
case BLKIO_THROTL_write_iops_device:
|
||||
seq_printf(m, "%u:%u\t%u\n", MAJOR(pn->dev),
|
||||
MINOR(pn->dev), pn->val.iops);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
}
|
||||
|
||||
/* cgroup files which read their data from policy nodes end up here */
|
||||
static void blkio_read_policy_node_files(struct cftype *cft,
|
||||
struct blkio_cgroup *blkcg, struct seq_file *m)
|
||||
{
|
||||
struct blkio_cgroup *blkcg;
|
||||
struct blkio_policy_node *pn;
|
||||
|
||||
seq_printf(m, "dev\tweight\n");
|
||||
|
||||
blkcg = cgroup_to_blkio_cgroup(cgrp);
|
||||
if (!list_empty(&blkcg->policy_list)) {
|
||||
spin_lock_irq(&blkcg->lock);
|
||||
list_for_each_entry(pn, &blkcg->policy_list, node) {
|
||||
seq_printf(m, "%u:%u\t%u\n", MAJOR(pn->dev),
|
||||
MINOR(pn->dev), pn->weight);
|
||||
if (!pn_matches_cftype(cft, pn))
|
||||
continue;
|
||||
blkio_print_policy_node(m, pn);
|
||||
}
|
||||
spin_unlock_irq(&blkcg->lock);
|
||||
}
|
||||
}
|
||||
|
||||
static int blkiocg_file_read(struct cgroup *cgrp, struct cftype *cft,
|
||||
struct seq_file *m)
|
||||
{
|
||||
struct blkio_cgroup *blkcg;
|
||||
enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private);
|
||||
int name = BLKIOFILE_ATTR(cft->private);
|
||||
|
||||
blkcg = cgroup_to_blkio_cgroup(cgrp);
|
||||
|
||||
switch(plid) {
|
||||
case BLKIO_POLICY_PROP:
|
||||
switch(name) {
|
||||
case BLKIO_PROP_weight_device:
|
||||
blkio_read_policy_node_files(cft, blkcg, m);
|
||||
return 0;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
break;
|
||||
case BLKIO_POLICY_THROTL:
|
||||
switch(name){
|
||||
case BLKIO_THROTL_read_bps_device:
|
||||
case BLKIO_THROTL_write_bps_device:
|
||||
case BLKIO_THROTL_read_iops_device:
|
||||
case BLKIO_THROTL_write_iops_device:
|
||||
blkio_read_policy_node_files(cft, blkcg, m);
|
||||
return 0;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int blkio_read_blkg_stats(struct blkio_cgroup *blkcg,
|
||||
struct cftype *cft, struct cgroup_map_cb *cb, enum stat_type type,
|
||||
bool show_total)
|
||||
{
|
||||
struct blkio_group *blkg;
|
||||
struct hlist_node *n;
|
||||
uint64_t cgroup_total = 0;
|
||||
|
||||
rcu_read_lock();
|
||||
hlist_for_each_entry_rcu(blkg, n, &blkcg->blkg_list, blkcg_node) {
|
||||
if (blkg->dev) {
|
||||
if (!cftype_blkg_same_policy(cft, blkg))
|
||||
continue;
|
||||
spin_lock_irq(&blkg->stats_lock);
|
||||
cgroup_total += blkio_get_stat(blkg, cb, blkg->dev,
|
||||
type);
|
||||
spin_unlock_irq(&blkg->stats_lock);
|
||||
}
|
||||
}
|
||||
if (show_total)
|
||||
cb->fill(cb, "Total", cgroup_total);
|
||||
rcu_read_unlock();
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* All map kind of cgroup file get serviced by this function */
|
||||
static int blkiocg_file_read_map(struct cgroup *cgrp, struct cftype *cft,
|
||||
struct cgroup_map_cb *cb)
|
||||
{
|
||||
struct blkio_cgroup *blkcg;
|
||||
enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private);
|
||||
int name = BLKIOFILE_ATTR(cft->private);
|
||||
|
||||
blkcg = cgroup_to_blkio_cgroup(cgrp);
|
||||
|
||||
switch(plid) {
|
||||
case BLKIO_POLICY_PROP:
|
||||
switch(name) {
|
||||
case BLKIO_PROP_time:
|
||||
return blkio_read_blkg_stats(blkcg, cft, cb,
|
||||
BLKIO_STAT_TIME, 0);
|
||||
case BLKIO_PROP_sectors:
|
||||
return blkio_read_blkg_stats(blkcg, cft, cb,
|
||||
BLKIO_STAT_SECTORS, 0);
|
||||
case BLKIO_PROP_io_service_bytes:
|
||||
return blkio_read_blkg_stats(blkcg, cft, cb,
|
||||
BLKIO_STAT_SERVICE_BYTES, 1);
|
||||
case BLKIO_PROP_io_serviced:
|
||||
return blkio_read_blkg_stats(blkcg, cft, cb,
|
||||
BLKIO_STAT_SERVICED, 1);
|
||||
case BLKIO_PROP_io_service_time:
|
||||
return blkio_read_blkg_stats(blkcg, cft, cb,
|
||||
BLKIO_STAT_SERVICE_TIME, 1);
|
||||
case BLKIO_PROP_io_wait_time:
|
||||
return blkio_read_blkg_stats(blkcg, cft, cb,
|
||||
BLKIO_STAT_WAIT_TIME, 1);
|
||||
case BLKIO_PROP_io_merged:
|
||||
return blkio_read_blkg_stats(blkcg, cft, cb,
|
||||
BLKIO_STAT_MERGED, 1);
|
||||
case BLKIO_PROP_io_queued:
|
||||
return blkio_read_blkg_stats(blkcg, cft, cb,
|
||||
BLKIO_STAT_QUEUED, 1);
|
||||
#ifdef CONFIG_DEBUG_BLK_CGROUP
|
||||
case BLKIO_PROP_dequeue:
|
||||
return blkio_read_blkg_stats(blkcg, cft, cb,
|
||||
BLKIO_STAT_DEQUEUE, 0);
|
||||
case BLKIO_PROP_avg_queue_size:
|
||||
return blkio_read_blkg_stats(blkcg, cft, cb,
|
||||
BLKIO_STAT_AVG_QUEUE_SIZE, 0);
|
||||
case BLKIO_PROP_group_wait_time:
|
||||
return blkio_read_blkg_stats(blkcg, cft, cb,
|
||||
BLKIO_STAT_GROUP_WAIT_TIME, 0);
|
||||
case BLKIO_PROP_idle_time:
|
||||
return blkio_read_blkg_stats(blkcg, cft, cb,
|
||||
BLKIO_STAT_IDLE_TIME, 0);
|
||||
case BLKIO_PROP_empty_time:
|
||||
return blkio_read_blkg_stats(blkcg, cft, cb,
|
||||
BLKIO_STAT_EMPTY_TIME, 0);
|
||||
#endif
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
break;
|
||||
case BLKIO_POLICY_THROTL:
|
||||
switch(name){
|
||||
case BLKIO_THROTL_io_service_bytes:
|
||||
return blkio_read_blkg_stats(blkcg, cft, cb,
|
||||
BLKIO_STAT_SERVICE_BYTES, 1);
|
||||
case BLKIO_THROTL_io_serviced:
|
||||
return blkio_read_blkg_stats(blkcg, cft, cb,
|
||||
BLKIO_STAT_SERVICED, 1);
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int blkio_weight_write(struct blkio_cgroup *blkcg, u64 val)
|
||||
{
|
||||
struct blkio_group *blkg;
|
||||
struct hlist_node *n;
|
||||
struct blkio_policy_node *pn;
|
||||
|
||||
if (val < BLKIO_WEIGHT_MIN || val > BLKIO_WEIGHT_MAX)
|
||||
return -EINVAL;
|
||||
|
||||
spin_lock(&blkio_list_lock);
|
||||
spin_lock_irq(&blkcg->lock);
|
||||
blkcg->weight = (unsigned int)val;
|
||||
|
||||
hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
|
||||
pn = blkio_policy_search_node(blkcg, blkg->dev,
|
||||
BLKIO_POLICY_PROP, BLKIO_PROP_weight_device);
|
||||
if (pn)
|
||||
continue;
|
||||
|
||||
blkio_update_group_weight(blkg, blkcg->weight);
|
||||
}
|
||||
spin_unlock_irq(&blkcg->lock);
|
||||
spin_unlock(&blkio_list_lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static u64 blkiocg_file_read_u64 (struct cgroup *cgrp, struct cftype *cft) {
|
||||
struct blkio_cgroup *blkcg;
|
||||
enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private);
|
||||
int name = BLKIOFILE_ATTR(cft->private);
|
||||
|
||||
blkcg = cgroup_to_blkio_cgroup(cgrp);
|
||||
|
||||
switch(plid) {
|
||||
case BLKIO_POLICY_PROP:
|
||||
switch(name) {
|
||||
case BLKIO_PROP_weight:
|
||||
return (u64)blkcg->weight;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
blkiocg_file_write_u64(struct cgroup *cgrp, struct cftype *cft, u64 val)
|
||||
{
|
||||
struct blkio_cgroup *blkcg;
|
||||
enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private);
|
||||
int name = BLKIOFILE_ATTR(cft->private);
|
||||
|
||||
blkcg = cgroup_to_blkio_cgroup(cgrp);
|
||||
|
||||
switch(plid) {
|
||||
case BLKIO_POLICY_PROP:
|
||||
switch(name) {
|
||||
case BLKIO_PROP_weight:
|
||||
return blkio_weight_write(blkcg, val);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -829,71 +1236,151 @@ static int blkiocg_weight_device_read(struct cgroup *cgrp, struct cftype *cft,
|
|||
struct cftype blkio_files[] = {
|
||||
{
|
||||
.name = "weight_device",
|
||||
.read_seq_string = blkiocg_weight_device_read,
|
||||
.write_string = blkiocg_weight_device_write,
|
||||
.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
|
||||
BLKIO_PROP_weight_device),
|
||||
.read_seq_string = blkiocg_file_read,
|
||||
.write_string = blkiocg_file_write,
|
||||
.max_write_len = 256,
|
||||
},
|
||||
{
|
||||
.name = "weight",
|
||||
.read_u64 = blkiocg_weight_read,
|
||||
.write_u64 = blkiocg_weight_write,
|
||||
.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
|
||||
BLKIO_PROP_weight),
|
||||
.read_u64 = blkiocg_file_read_u64,
|
||||
.write_u64 = blkiocg_file_write_u64,
|
||||
},
|
||||
{
|
||||
.name = "time",
|
||||
.read_map = blkiocg_time_read,
|
||||
.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
|
||||
BLKIO_PROP_time),
|
||||
.read_map = blkiocg_file_read_map,
|
||||
},
|
||||
{
|
||||
.name = "sectors",
|
||||
.read_map = blkiocg_sectors_read,
|
||||
.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
|
||||
BLKIO_PROP_sectors),
|
||||
.read_map = blkiocg_file_read_map,
|
||||
},
|
||||
{
|
||||
.name = "io_service_bytes",
|
||||
.read_map = blkiocg_io_service_bytes_read,
|
||||
.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
|
||||
BLKIO_PROP_io_service_bytes),
|
||||
.read_map = blkiocg_file_read_map,
|
||||
},
|
||||
{
|
||||
.name = "io_serviced",
|
||||
.read_map = blkiocg_io_serviced_read,
|
||||
.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
|
||||
BLKIO_PROP_io_serviced),
|
||||
.read_map = blkiocg_file_read_map,
|
||||
},
|
||||
{
|
||||
.name = "io_service_time",
|
||||
.read_map = blkiocg_io_service_time_read,
|
||||
.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
|
||||
BLKIO_PROP_io_service_time),
|
||||
.read_map = blkiocg_file_read_map,
|
||||
},
|
||||
{
|
||||
.name = "io_wait_time",
|
||||
.read_map = blkiocg_io_wait_time_read,
|
||||
.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
|
||||
BLKIO_PROP_io_wait_time),
|
||||
.read_map = blkiocg_file_read_map,
|
||||
},
|
||||
{
|
||||
.name = "io_merged",
|
||||
.read_map = blkiocg_io_merged_read,
|
||||
.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
|
||||
BLKIO_PROP_io_merged),
|
||||
.read_map = blkiocg_file_read_map,
|
||||
},
|
||||
{
|
||||
.name = "io_queued",
|
||||
.read_map = blkiocg_io_queued_read,
|
||||
.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
|
||||
BLKIO_PROP_io_queued),
|
||||
.read_map = blkiocg_file_read_map,
|
||||
},
|
||||
{
|
||||
.name = "reset_stats",
|
||||
.write_u64 = blkiocg_reset_stats,
|
||||
},
|
||||
#ifdef CONFIG_BLK_DEV_THROTTLING
|
||||
{
|
||||
.name = "throttle.read_bps_device",
|
||||
.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL,
|
||||
BLKIO_THROTL_read_bps_device),
|
||||
.read_seq_string = blkiocg_file_read,
|
||||
.write_string = blkiocg_file_write,
|
||||
.max_write_len = 256,
|
||||
},
|
||||
|
||||
{
|
||||
.name = "throttle.write_bps_device",
|
||||
.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL,
|
||||
BLKIO_THROTL_write_bps_device),
|
||||
.read_seq_string = blkiocg_file_read,
|
||||
.write_string = blkiocg_file_write,
|
||||
.max_write_len = 256,
|
||||
},
|
||||
|
||||
{
|
||||
.name = "throttle.read_iops_device",
|
||||
.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL,
|
||||
BLKIO_THROTL_read_iops_device),
|
||||
.read_seq_string = blkiocg_file_read,
|
||||
.write_string = blkiocg_file_write,
|
||||
.max_write_len = 256,
|
||||
},
|
||||
|
||||
{
|
||||
.name = "throttle.write_iops_device",
|
||||
.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL,
|
||||
BLKIO_THROTL_write_iops_device),
|
||||
.read_seq_string = blkiocg_file_read,
|
||||
.write_string = blkiocg_file_write,
|
||||
.max_write_len = 256,
|
||||
},
|
||||
{
|
||||
.name = "throttle.io_service_bytes",
|
||||
.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL,
|
||||
BLKIO_THROTL_io_service_bytes),
|
||||
.read_map = blkiocg_file_read_map,
|
||||
},
|
||||
{
|
||||
.name = "throttle.io_serviced",
|
||||
.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL,
|
||||
BLKIO_THROTL_io_serviced),
|
||||
.read_map = blkiocg_file_read_map,
|
||||
},
|
||||
#endif /* CONFIG_BLK_DEV_THROTTLING */
|
||||
|
||||
#ifdef CONFIG_DEBUG_BLK_CGROUP
|
||||
{
|
||||
.name = "avg_queue_size",
|
||||
.read_map = blkiocg_avg_queue_size_read,
|
||||
.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
|
||||
BLKIO_PROP_avg_queue_size),
|
||||
.read_map = blkiocg_file_read_map,
|
||||
},
|
||||
{
|
||||
.name = "group_wait_time",
|
||||
.read_map = blkiocg_group_wait_time_read,
|
||||
.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
|
||||
BLKIO_PROP_group_wait_time),
|
||||
.read_map = blkiocg_file_read_map,
|
||||
},
|
||||
{
|
||||
.name = "idle_time",
|
||||
.read_map = blkiocg_idle_time_read,
|
||||
.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
|
||||
BLKIO_PROP_idle_time),
|
||||
.read_map = blkiocg_file_read_map,
|
||||
},
|
||||
{
|
||||
.name = "empty_time",
|
||||
.read_map = blkiocg_empty_time_read,
|
||||
.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
|
||||
BLKIO_PROP_empty_time),
|
||||
.read_map = blkiocg_file_read_map,
|
||||
},
|
||||
{
|
||||
.name = "dequeue",
|
||||
.read_map = blkiocg_dequeue_read,
|
||||
.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
|
||||
BLKIO_PROP_dequeue),
|
||||
.read_map = blkiocg_file_read_map,
|
||||
},
|
||||
#endif
|
||||
};
|
||||
|
@ -932,13 +1419,14 @@ static void blkiocg_destroy(struct cgroup_subsys *subsys, struct cgroup *cgroup)
|
|||
/*
|
||||
* This blkio_group is being unlinked as associated cgroup is
|
||||
* going away. Let all the IO controlling policies know about
|
||||
* this event. Currently this is static call to one io
|
||||
* controlling policy. Once we have more policies in place, we
|
||||
* need some dynamic registration of callback function.
|
||||
* this event.
|
||||
*/
|
||||
spin_lock(&blkio_list_lock);
|
||||
list_for_each_entry(blkiop, &blkio_list, list)
|
||||
list_for_each_entry(blkiop, &blkio_list, list) {
|
||||
if (blkiop->plid != blkg->plid)
|
||||
continue;
|
||||
blkiop->ops.blkio_unlink_group_fn(key, blkg);
|
||||
}
|
||||
spin_unlock(&blkio_list_lock);
|
||||
} while (1);
|
||||
|
||||
|
|
|
@ -15,6 +15,14 @@
|
|||
|
||||
#include <linux/cgroup.h>
|
||||
|
||||
enum blkio_policy_id {
|
||||
BLKIO_POLICY_PROP = 0, /* Proportional Bandwidth division */
|
||||
BLKIO_POLICY_THROTL, /* Throttling */
|
||||
};
|
||||
|
||||
/* Max limits for throttle policy */
|
||||
#define THROTL_IOPS_MAX UINT_MAX
|
||||
|
||||
#if defined(CONFIG_BLK_CGROUP) || defined(CONFIG_BLK_CGROUP_MODULE)
|
||||
|
||||
#ifndef CONFIG_BLK_CGROUP
|
||||
|
@ -65,6 +73,35 @@ enum blkg_state_flags {
|
|||
BLKG_empty,
|
||||
};
|
||||
|
||||
/* cgroup files owned by proportional weight policy */
|
||||
enum blkcg_file_name_prop {
|
||||
BLKIO_PROP_weight = 1,
|
||||
BLKIO_PROP_weight_device,
|
||||
BLKIO_PROP_io_service_bytes,
|
||||
BLKIO_PROP_io_serviced,
|
||||
BLKIO_PROP_time,
|
||||
BLKIO_PROP_sectors,
|
||||
BLKIO_PROP_io_service_time,
|
||||
BLKIO_PROP_io_wait_time,
|
||||
BLKIO_PROP_io_merged,
|
||||
BLKIO_PROP_io_queued,
|
||||
BLKIO_PROP_avg_queue_size,
|
||||
BLKIO_PROP_group_wait_time,
|
||||
BLKIO_PROP_idle_time,
|
||||
BLKIO_PROP_empty_time,
|
||||
BLKIO_PROP_dequeue,
|
||||
};
|
||||
|
||||
/* cgroup files owned by throttle policy */
|
||||
enum blkcg_file_name_throtl {
|
||||
BLKIO_THROTL_read_bps_device,
|
||||
BLKIO_THROTL_write_bps_device,
|
||||
BLKIO_THROTL_read_iops_device,
|
||||
BLKIO_THROTL_write_iops_device,
|
||||
BLKIO_THROTL_io_service_bytes,
|
||||
BLKIO_THROTL_io_serviced,
|
||||
};
|
||||
|
||||
struct blkio_cgroup {
|
||||
struct cgroup_subsys_state css;
|
||||
unsigned int weight;
|
||||
|
@ -112,6 +149,8 @@ struct blkio_group {
|
|||
char path[128];
|
||||
/* The device MKDEV(major, minor), this group has been created for */
|
||||
dev_t dev;
|
||||
/* policy which owns this blk group */
|
||||
enum blkio_policy_id plid;
|
||||
|
||||
/* Need to serialize the stats in the case of reset/update */
|
||||
spinlock_t stats_lock;
|
||||
|
@ -121,24 +160,60 @@ struct blkio_group {
|
|||
struct blkio_policy_node {
|
||||
struct list_head node;
|
||||
dev_t dev;
|
||||
unsigned int weight;
|
||||
/* This node belongs to max bw policy or porportional weight policy */
|
||||
enum blkio_policy_id plid;
|
||||
/* cgroup file to which this rule belongs to */
|
||||
int fileid;
|
||||
|
||||
union {
|
||||
unsigned int weight;
|
||||
/*
|
||||
* Rate read/write in terms of byptes per second
|
||||
* Whether this rate represents read or write is determined
|
||||
* by file type "fileid".
|
||||
*/
|
||||
u64 bps;
|
||||
unsigned int iops;
|
||||
} val;
|
||||
};
|
||||
|
||||
extern unsigned int blkcg_get_weight(struct blkio_cgroup *blkcg,
|
||||
dev_t dev);
|
||||
extern uint64_t blkcg_get_read_bps(struct blkio_cgroup *blkcg,
|
||||
dev_t dev);
|
||||
extern uint64_t blkcg_get_write_bps(struct blkio_cgroup *blkcg,
|
||||
dev_t dev);
|
||||
extern unsigned int blkcg_get_read_iops(struct blkio_cgroup *blkcg,
|
||||
dev_t dev);
|
||||
extern unsigned int blkcg_get_write_iops(struct blkio_cgroup *blkcg,
|
||||
dev_t dev);
|
||||
|
||||
typedef void (blkio_unlink_group_fn) (void *key, struct blkio_group *blkg);
|
||||
typedef void (blkio_update_group_weight_fn) (struct blkio_group *blkg,
|
||||
unsigned int weight);
|
||||
|
||||
typedef void (blkio_update_group_weight_fn) (void *key,
|
||||
struct blkio_group *blkg, unsigned int weight);
|
||||
typedef void (blkio_update_group_read_bps_fn) (void * key,
|
||||
struct blkio_group *blkg, u64 read_bps);
|
||||
typedef void (blkio_update_group_write_bps_fn) (void *key,
|
||||
struct blkio_group *blkg, u64 write_bps);
|
||||
typedef void (blkio_update_group_read_iops_fn) (void *key,
|
||||
struct blkio_group *blkg, unsigned int read_iops);
|
||||
typedef void (blkio_update_group_write_iops_fn) (void *key,
|
||||
struct blkio_group *blkg, unsigned int write_iops);
|
||||
|
||||
struct blkio_policy_ops {
|
||||
blkio_unlink_group_fn *blkio_unlink_group_fn;
|
||||
blkio_update_group_weight_fn *blkio_update_group_weight_fn;
|
||||
blkio_update_group_read_bps_fn *blkio_update_group_read_bps_fn;
|
||||
blkio_update_group_write_bps_fn *blkio_update_group_write_bps_fn;
|
||||
blkio_update_group_read_iops_fn *blkio_update_group_read_iops_fn;
|
||||
blkio_update_group_write_iops_fn *blkio_update_group_write_iops_fn;
|
||||
};
|
||||
|
||||
struct blkio_policy_type {
|
||||
struct list_head list;
|
||||
struct blkio_policy_ops ops;
|
||||
enum blkio_policy_id plid;
|
||||
};
|
||||
|
||||
/* Blkio controller policy registration */
|
||||
|
@ -212,7 +287,8 @@ static inline void blkiocg_set_start_empty_time(struct blkio_group *blkg) {}
|
|||
extern struct blkio_cgroup blkio_root_cgroup;
|
||||
extern struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup);
|
||||
extern void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg,
|
||||
struct blkio_group *blkg, void *key, dev_t dev);
|
||||
struct blkio_group *blkg, void *key, dev_t dev,
|
||||
enum blkio_policy_id plid);
|
||||
extern int blkiocg_del_blkio_group(struct blkio_group *blkg);
|
||||
extern struct blkio_group *blkiocg_lookup_group(struct blkio_cgroup *blkcg,
|
||||
void *key);
|
||||
|
@ -234,7 +310,8 @@ static inline struct blkio_cgroup *
|
|||
cgroup_to_blkio_cgroup(struct cgroup *cgroup) { return NULL; }
|
||||
|
||||
static inline void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg,
|
||||
struct blkio_group *blkg, void *key, dev_t dev) {}
|
||||
struct blkio_group *blkg, void *key, dev_t dev,
|
||||
enum blkio_policy_id plid) {}
|
||||
|
||||
static inline int
|
||||
blkiocg_del_blkio_group(struct blkio_group *blkg) { return 0; }
|
||||
|
|
|
@ -64,13 +64,15 @@ static void drive_stat_acct(struct request *rq, int new_io)
|
|||
return;
|
||||
|
||||
cpu = part_stat_lock();
|
||||
part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq));
|
||||
|
||||
if (!new_io)
|
||||
if (!new_io) {
|
||||
part = rq->part;
|
||||
part_stat_inc(cpu, part, merges[rw]);
|
||||
else {
|
||||
} else {
|
||||
part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq));
|
||||
part_round_stats(cpu, part);
|
||||
part_inc_in_flight(part, rw);
|
||||
rq->part = part;
|
||||
}
|
||||
|
||||
part_stat_unlock();
|
||||
|
@ -128,6 +130,7 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
|
|||
rq->ref_count = 1;
|
||||
rq->start_time = jiffies;
|
||||
set_start_time_ns(rq);
|
||||
rq->part = NULL;
|
||||
}
|
||||
EXPORT_SYMBOL(blk_rq_init);
|
||||
|
||||
|
@ -382,6 +385,7 @@ void blk_sync_queue(struct request_queue *q)
|
|||
del_timer_sync(&q->unplug_timer);
|
||||
del_timer_sync(&q->timeout);
|
||||
cancel_work_sync(&q->unplug_work);
|
||||
throtl_shutdown_timer_wq(q);
|
||||
}
|
||||
EXPORT_SYMBOL(blk_sync_queue);
|
||||
|
||||
|
@ -459,6 +463,8 @@ void blk_cleanup_queue(struct request_queue *q)
|
|||
if (q->elevator)
|
||||
elevator_exit(q->elevator);
|
||||
|
||||
blk_throtl_exit(q);
|
||||
|
||||
blk_put_queue(q);
|
||||
}
|
||||
EXPORT_SYMBOL(blk_cleanup_queue);
|
||||
|
@ -515,6 +521,11 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
if (blk_throtl_init(q)) {
|
||||
kmem_cache_free(blk_requestq_cachep, q);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
setup_timer(&q->backing_dev_info.laptop_mode_wb_timer,
|
||||
laptop_mode_timer_fn, (unsigned long) q);
|
||||
init_timer(&q->unplug_timer);
|
||||
|
@ -796,11 +807,16 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
|
|||
rl->starved[is_sync] = 0;
|
||||
|
||||
priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
|
||||
if (priv)
|
||||
if (priv) {
|
||||
rl->elvpriv++;
|
||||
|
||||
if (blk_queue_io_stat(q))
|
||||
rw_flags |= REQ_IO_STAT;
|
||||
/*
|
||||
* Don't do stats for non-priv requests
|
||||
*/
|
||||
if (blk_queue_io_stat(q))
|
||||
rw_flags |= REQ_IO_STAT;
|
||||
}
|
||||
|
||||
spin_unlock_irq(q->queue_lock);
|
||||
|
||||
rq = blk_alloc_request(q, rw_flags, priv, gfp_mask);
|
||||
|
@ -1522,6 +1538,15 @@ static inline void __generic_make_request(struct bio *bio)
|
|||
goto end_io;
|
||||
}
|
||||
|
||||
blk_throtl_bio(q, &bio);
|
||||
|
||||
/*
|
||||
* If bio = NULL, bio has been throttled and will be submitted
|
||||
* later.
|
||||
*/
|
||||
if (!bio)
|
||||
break;
|
||||
|
||||
trace_block_bio_queue(q, bio);
|
||||
|
||||
ret = q->make_request_fn(q, bio);
|
||||
|
@ -1612,11 +1637,12 @@ void submit_bio(int rw, struct bio *bio)
|
|||
|
||||
if (unlikely(block_dump)) {
|
||||
char b[BDEVNAME_SIZE];
|
||||
printk(KERN_DEBUG "%s(%d): %s block %Lu on %s\n",
|
||||
printk(KERN_DEBUG "%s(%d): %s block %Lu on %s (%u sectors)\n",
|
||||
current->comm, task_pid_nr(current),
|
||||
(rw & WRITE) ? "WRITE" : "READ",
|
||||
(unsigned long long)bio->bi_sector,
|
||||
bdevname(bio->bi_bdev, b));
|
||||
bdevname(bio->bi_bdev, b),
|
||||
count);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1759,7 +1785,7 @@ static void blk_account_io_completion(struct request *req, unsigned int bytes)
|
|||
int cpu;
|
||||
|
||||
cpu = part_stat_lock();
|
||||
part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req));
|
||||
part = req->part;
|
||||
part_stat_add(cpu, part, sectors[rw], bytes >> 9);
|
||||
part_stat_unlock();
|
||||
}
|
||||
|
@ -1779,7 +1805,7 @@ static void blk_account_io_done(struct request *req)
|
|||
int cpu;
|
||||
|
||||
cpu = part_stat_lock();
|
||||
part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req));
|
||||
part = req->part;
|
||||
|
||||
part_stat_inc(cpu, part, ios[rw]);
|
||||
part_stat_add(cpu, part, ticks[rw], duration);
|
||||
|
@ -2579,6 +2605,13 @@ int kblockd_schedule_work(struct request_queue *q, struct work_struct *work)
|
|||
}
|
||||
EXPORT_SYMBOL(kblockd_schedule_work);
|
||||
|
||||
int kblockd_schedule_delayed_work(struct request_queue *q,
|
||||
struct delayed_work *dwork, unsigned long delay)
|
||||
{
|
||||
return queue_delayed_work(kblockd_workqueue, dwork, delay);
|
||||
}
|
||||
EXPORT_SYMBOL(kblockd_schedule_delayed_work);
|
||||
|
||||
int __init blk_dev_init(void)
|
||||
{
|
||||
BUILD_BUG_ON(__REQ_NR_BITS > 8 *
|
||||
|
|
|
@ -80,6 +80,7 @@ int blk_execute_rq(struct request_queue *q, struct gendisk *bd_disk,
|
|||
DECLARE_COMPLETION_ONSTACK(wait);
|
||||
char sense[SCSI_SENSE_BUFFERSIZE];
|
||||
int err = 0;
|
||||
unsigned long hang_check;
|
||||
|
||||
/*
|
||||
* we need an extra reference to the request, so we can look at
|
||||
|
@ -95,7 +96,13 @@ int blk_execute_rq(struct request_queue *q, struct gendisk *bd_disk,
|
|||
|
||||
rq->end_io_data = &wait;
|
||||
blk_execute_rq_nowait(q, bd_disk, rq, at_head, blk_end_sync_rq);
|
||||
wait_for_completion(&wait);
|
||||
|
||||
/* Prevent hang_check timer from firing at us during very long I/O */
|
||||
hang_check = sysctl_hung_task_timeout_secs;
|
||||
if (hang_check)
|
||||
while (!wait_for_completion_timeout(&wait, hang_check * (HZ/2)));
|
||||
else
|
||||
wait_for_completion(&wait);
|
||||
|
||||
if (rq->errors)
|
||||
err = -EIO;
|
||||
|
|
|
@ -32,24 +32,37 @@ static struct kmem_cache *integrity_cachep;
|
|||
|
||||
/**
|
||||
* blk_rq_count_integrity_sg - Count number of integrity scatterlist elements
|
||||
* @rq: request with integrity metadata attached
|
||||
* @q: request queue
|
||||
* @bio: bio with integrity metadata attached
|
||||
*
|
||||
* Description: Returns the number of elements required in a
|
||||
* scatterlist corresponding to the integrity metadata in a request.
|
||||
* scatterlist corresponding to the integrity metadata in a bio.
|
||||
*/
|
||||
int blk_rq_count_integrity_sg(struct request *rq)
|
||||
int blk_rq_count_integrity_sg(struct request_queue *q, struct bio *bio)
|
||||
{
|
||||
struct bio_vec *iv, *ivprv;
|
||||
struct req_iterator iter;
|
||||
unsigned int segments;
|
||||
struct bio_vec *iv, *ivprv = NULL;
|
||||
unsigned int segments = 0;
|
||||
unsigned int seg_size = 0;
|
||||
unsigned int i = 0;
|
||||
|
||||
ivprv = NULL;
|
||||
segments = 0;
|
||||
bio_for_each_integrity_vec(iv, bio, i) {
|
||||
|
||||
rq_for_each_integrity_segment(iv, rq, iter) {
|
||||
if (ivprv) {
|
||||
if (!BIOVEC_PHYS_MERGEABLE(ivprv, iv))
|
||||
goto new_segment;
|
||||
|
||||
if (!ivprv || !BIOVEC_PHYS_MERGEABLE(ivprv, iv))
|
||||
if (!BIOVEC_SEG_BOUNDARY(q, ivprv, iv))
|
||||
goto new_segment;
|
||||
|
||||
if (seg_size + iv->bv_len > queue_max_segment_size(q))
|
||||
goto new_segment;
|
||||
|
||||
seg_size += iv->bv_len;
|
||||
} else {
|
||||
new_segment:
|
||||
segments++;
|
||||
seg_size = iv->bv_len;
|
||||
}
|
||||
|
||||
ivprv = iv;
|
||||
}
|
||||
|
@ -60,30 +73,34 @@ EXPORT_SYMBOL(blk_rq_count_integrity_sg);
|
|||
|
||||
/**
|
||||
* blk_rq_map_integrity_sg - Map integrity metadata into a scatterlist
|
||||
* @rq: request with integrity metadata attached
|
||||
* @q: request queue
|
||||
* @bio: bio with integrity metadata attached
|
||||
* @sglist: target scatterlist
|
||||
*
|
||||
* Description: Map the integrity vectors in request into a
|
||||
* scatterlist. The scatterlist must be big enough to hold all
|
||||
* elements. I.e. sized using blk_rq_count_integrity_sg().
|
||||
*/
|
||||
int blk_rq_map_integrity_sg(struct request *rq, struct scatterlist *sglist)
|
||||
int blk_rq_map_integrity_sg(struct request_queue *q, struct bio *bio,
|
||||
struct scatterlist *sglist)
|
||||
{
|
||||
struct bio_vec *iv, *ivprv;
|
||||
struct req_iterator iter;
|
||||
struct scatterlist *sg;
|
||||
unsigned int segments;
|
||||
struct bio_vec *iv, *ivprv = NULL;
|
||||
struct scatterlist *sg = NULL;
|
||||
unsigned int segments = 0;
|
||||
unsigned int i = 0;
|
||||
|
||||
ivprv = NULL;
|
||||
sg = NULL;
|
||||
segments = 0;
|
||||
|
||||
rq_for_each_integrity_segment(iv, rq, iter) {
|
||||
bio_for_each_integrity_vec(iv, bio, i) {
|
||||
|
||||
if (ivprv) {
|
||||
if (!BIOVEC_PHYS_MERGEABLE(ivprv, iv))
|
||||
goto new_segment;
|
||||
|
||||
if (!BIOVEC_SEG_BOUNDARY(q, ivprv, iv))
|
||||
goto new_segment;
|
||||
|
||||
if (sg->length + iv->bv_len > queue_max_segment_size(q))
|
||||
goto new_segment;
|
||||
|
||||
sg->length += iv->bv_len;
|
||||
} else {
|
||||
new_segment:
|
||||
|
@ -162,6 +179,40 @@ int blk_integrity_compare(struct gendisk *gd1, struct gendisk *gd2)
|
|||
}
|
||||
EXPORT_SYMBOL(blk_integrity_compare);
|
||||
|
||||
int blk_integrity_merge_rq(struct request_queue *q, struct request *req,
|
||||
struct request *next)
|
||||
{
|
||||
if (blk_integrity_rq(req) != blk_integrity_rq(next))
|
||||
return -1;
|
||||
|
||||
if (req->nr_integrity_segments + next->nr_integrity_segments >
|
||||
q->limits.max_integrity_segments)
|
||||
return -1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(blk_integrity_merge_rq);
|
||||
|
||||
int blk_integrity_merge_bio(struct request_queue *q, struct request *req,
|
||||
struct bio *bio)
|
||||
{
|
||||
int nr_integrity_segs;
|
||||
struct bio *next = bio->bi_next;
|
||||
|
||||
bio->bi_next = NULL;
|
||||
nr_integrity_segs = blk_rq_count_integrity_sg(q, bio);
|
||||
bio->bi_next = next;
|
||||
|
||||
if (req->nr_integrity_segments + nr_integrity_segs >
|
||||
q->limits.max_integrity_segments)
|
||||
return -1;
|
||||
|
||||
req->nr_integrity_segments += nr_integrity_segs;
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(blk_integrity_merge_bio);
|
||||
|
||||
struct integrity_sysfs_entry {
|
||||
struct attribute attr;
|
||||
ssize_t (*show)(struct blk_integrity *, char *);
|
||||
|
@ -381,7 +432,6 @@ void blk_integrity_unregister(struct gendisk *disk)
|
|||
kobject_uevent(&bi->kobj, KOBJ_REMOVE);
|
||||
kobject_del(&bi->kobj);
|
||||
kobject_put(&bi->kobj);
|
||||
kmem_cache_free(integrity_cachep, bi);
|
||||
disk->integrity = NULL;
|
||||
}
|
||||
EXPORT_SYMBOL(blk_integrity_unregister);
|
||||
|
|
|
@ -54,7 +54,7 @@ static int __blk_rq_map_user(struct request_queue *q, struct request *rq,
|
|||
* direct dma. else, set up kernel bounce buffers
|
||||
*/
|
||||
uaddr = (unsigned long) ubuf;
|
||||
if (blk_rq_aligned(q, ubuf, len) && !map_data)
|
||||
if (blk_rq_aligned(q, uaddr, len) && !map_data)
|
||||
bio = bio_map_user(q, NULL, uaddr, len, reading, gfp_mask);
|
||||
else
|
||||
bio = bio_copy_user(q, map_data, uaddr, len, reading, gfp_mask);
|
||||
|
@ -288,6 +288,7 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
|
|||
unsigned int len, gfp_t gfp_mask)
|
||||
{
|
||||
int reading = rq_data_dir(rq) == READ;
|
||||
unsigned long addr = (unsigned long) kbuf;
|
||||
int do_copy = 0;
|
||||
struct bio *bio;
|
||||
int ret;
|
||||
|
@ -297,7 +298,7 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
|
|||
if (!len || !kbuf)
|
||||
return -EINVAL;
|
||||
|
||||
do_copy = !blk_rq_aligned(q, kbuf, len) || object_is_on_stack(kbuf);
|
||||
do_copy = !blk_rq_aligned(q, addr, len) || object_is_on_stack(kbuf);
|
||||
if (do_copy)
|
||||
bio = bio_copy_kern(q, kbuf, len, gfp_mask, reading);
|
||||
else
|
||||
|
|
|
@ -205,12 +205,11 @@ static inline int ll_new_hw_segment(struct request_queue *q,
|
|||
{
|
||||
int nr_phys_segs = bio_phys_segments(q, bio);
|
||||
|
||||
if (req->nr_phys_segments + nr_phys_segs > queue_max_segments(q)) {
|
||||
req->cmd_flags |= REQ_NOMERGE;
|
||||
if (req == q->last_merge)
|
||||
q->last_merge = NULL;
|
||||
return 0;
|
||||
}
|
||||
if (req->nr_phys_segments + nr_phys_segs > queue_max_segments(q))
|
||||
goto no_merge;
|
||||
|
||||
if (bio_integrity(bio) && blk_integrity_merge_bio(q, req, bio))
|
||||
goto no_merge;
|
||||
|
||||
/*
|
||||
* This will form the start of a new hw segment. Bump both
|
||||
|
@ -218,6 +217,12 @@ static inline int ll_new_hw_segment(struct request_queue *q,
|
|||
*/
|
||||
req->nr_phys_segments += nr_phys_segs;
|
||||
return 1;
|
||||
|
||||
no_merge:
|
||||
req->cmd_flags |= REQ_NOMERGE;
|
||||
if (req == q->last_merge)
|
||||
q->last_merge = NULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ll_back_merge_fn(struct request_queue *q, struct request *req,
|
||||
|
@ -301,6 +306,9 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
|
|||
if (total_phys_segments > queue_max_segments(q))
|
||||
return 0;
|
||||
|
||||
if (blk_integrity_rq(req) && blk_integrity_merge_rq(q, req, next))
|
||||
return 0;
|
||||
|
||||
/* Merge is OK... */
|
||||
req->nr_phys_segments = total_phys_segments;
|
||||
return 1;
|
||||
|
@ -343,7 +351,7 @@ static void blk_account_io_merge(struct request *req)
|
|||
int cpu;
|
||||
|
||||
cpu = part_stat_lock();
|
||||
part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req));
|
||||
part = req->part;
|
||||
|
||||
part_round_stats(cpu, part);
|
||||
part_dec_in_flight(part, rq_data_dir(req));
|
||||
|
@ -384,9 +392,6 @@ static int attempt_merge(struct request_queue *q, struct request *req,
|
|||
|| next->special)
|
||||
return 0;
|
||||
|
||||
if (blk_integrity_rq(req) != blk_integrity_rq(next))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* If we are allowed to merge, then append bio list
|
||||
* from next to rq and release next. merge_requests_fn
|
||||
|
|
|
@ -111,6 +111,7 @@ EXPORT_SYMBOL_GPL(blk_queue_lld_busy);
|
|||
void blk_set_default_limits(struct queue_limits *lim)
|
||||
{
|
||||
lim->max_segments = BLK_MAX_SEGMENTS;
|
||||
lim->max_integrity_segments = 0;
|
||||
lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK;
|
||||
lim->max_segment_size = BLK_MAX_SEGMENT_SIZE;
|
||||
lim->max_sectors = BLK_DEF_MAX_SECTORS;
|
||||
|
@ -213,7 +214,7 @@ void blk_queue_bounce_limit(struct request_queue *q, u64 dma_mask)
|
|||
*/
|
||||
if (b_pfn < (min_t(u64, 0xffffffffUL, BLK_BOUNCE_HIGH) >> PAGE_SHIFT))
|
||||
dma = 1;
|
||||
q->limits.bounce_pfn = max_low_pfn;
|
||||
q->limits.bounce_pfn = max(max_low_pfn, b_pfn);
|
||||
#else
|
||||
if (b_pfn < blk_max_low_pfn)
|
||||
dma = 1;
|
||||
|
@ -343,7 +344,7 @@ EXPORT_SYMBOL(blk_queue_logical_block_size);
|
|||
* hardware can operate on without reverting to read-modify-write
|
||||
* operations.
|
||||
*/
|
||||
void blk_queue_physical_block_size(struct request_queue *q, unsigned short size)
|
||||
void blk_queue_physical_block_size(struct request_queue *q, unsigned int size)
|
||||
{
|
||||
q->limits.physical_block_size = size;
|
||||
|
||||
|
@ -455,11 +456,6 @@ void blk_queue_io_opt(struct request_queue *q, unsigned int opt)
|
|||
}
|
||||
EXPORT_SYMBOL(blk_queue_io_opt);
|
||||
|
||||
/*
|
||||
* Returns the minimum that is _not_ zero, unless both are zero.
|
||||
*/
|
||||
#define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r))
|
||||
|
||||
/**
|
||||
* blk_queue_stack_limits - inherit underlying queue limits for stacked drivers
|
||||
* @t: the stacking driver (top)
|
||||
|
@ -514,6 +510,8 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
|
|||
b->seg_boundary_mask);
|
||||
|
||||
t->max_segments = min_not_zero(t->max_segments, b->max_segments);
|
||||
t->max_integrity_segments = min_not_zero(t->max_integrity_segments,
|
||||
b->max_integrity_segments);
|
||||
|
||||
t->max_segment_size = min_not_zero(t->max_segment_size,
|
||||
b->max_segment_size);
|
||||
|
|
|
@ -112,6 +112,11 @@ static ssize_t queue_max_segments_show(struct request_queue *q, char *page)
|
|||
return queue_var_show(queue_max_segments(q), (page));
|
||||
}
|
||||
|
||||
static ssize_t queue_max_integrity_segments_show(struct request_queue *q, char *page)
|
||||
{
|
||||
return queue_var_show(q->limits.max_integrity_segments, (page));
|
||||
}
|
||||
|
||||
static ssize_t queue_max_segment_size_show(struct request_queue *q, char *page)
|
||||
{
|
||||
if (test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags))
|
||||
|
@ -288,6 +293,11 @@ static struct queue_sysfs_entry queue_max_segments_entry = {
|
|||
.show = queue_max_segments_show,
|
||||
};
|
||||
|
||||
static struct queue_sysfs_entry queue_max_integrity_segments_entry = {
|
||||
.attr = {.name = "max_integrity_segments", .mode = S_IRUGO },
|
||||
.show = queue_max_integrity_segments_show,
|
||||
};
|
||||
|
||||
static struct queue_sysfs_entry queue_max_segment_size_entry = {
|
||||
.attr = {.name = "max_segment_size", .mode = S_IRUGO },
|
||||
.show = queue_max_segment_size_show,
|
||||
|
@ -375,6 +385,7 @@ static struct attribute *default_attrs[] = {
|
|||
&queue_max_hw_sectors_entry.attr,
|
||||
&queue_max_sectors_entry.attr,
|
||||
&queue_max_segments_entry.attr,
|
||||
&queue_max_integrity_segments_entry.attr,
|
||||
&queue_max_segment_size_entry.attr,
|
||||
&queue_iosched_entry.attr,
|
||||
&queue_hw_sector_size_entry.attr,
|
||||
|
|
1123
block/blk-throttle.c
Normal file
1123
block/blk-throttle.c
Normal file
File diff suppressed because it is too large
Load diff
12
block/blk.h
12
block/blk.h
|
@ -110,10 +110,6 @@ void blk_queue_congestion_threshold(struct request_queue *q);
|
|||
|
||||
int blk_dev_init(void);
|
||||
|
||||
void elv_quiesce_start(struct request_queue *q);
|
||||
void elv_quiesce_end(struct request_queue *q);
|
||||
|
||||
|
||||
/*
|
||||
* Return the threshold (number of used requests) at which the queue is
|
||||
* considered to be congested. It include a little hysteresis to keep the
|
||||
|
@ -132,14 +128,6 @@ static inline int queue_congestion_off_threshold(struct request_queue *q)
|
|||
return q->nr_congestion_off;
|
||||
}
|
||||
|
||||
#if defined(CONFIG_BLK_DEV_INTEGRITY)
|
||||
|
||||
#define rq_for_each_integrity_segment(bvl, _rq, _iter) \
|
||||
__rq_for_each_bio(_iter.bio, _rq) \
|
||||
bip_for_each_vec(bvl, _iter.bio->bi_integrity, _iter.i)
|
||||
|
||||
#endif /* BLK_DEV_INTEGRITY */
|
||||
|
||||
static inline int blk_cpu_to_group(int cpu)
|
||||
{
|
||||
int group = NR_CPUS;
|
||||
|
|
|
@ -160,6 +160,7 @@ enum wl_prio_t {
|
|||
BE_WORKLOAD = 0,
|
||||
RT_WORKLOAD = 1,
|
||||
IDLE_WORKLOAD = 2,
|
||||
CFQ_PRIO_NR,
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -184,10 +185,19 @@ struct cfq_group {
|
|||
/* number of cfqq currently on this group */
|
||||
int nr_cfqq;
|
||||
|
||||
/* Per group busy queus average. Useful for workload slice calc. */
|
||||
unsigned int busy_queues_avg[2];
|
||||
/*
|
||||
* rr lists of queues with requests, onle rr for each priority class.
|
||||
* Per group busy queus average. Useful for workload slice calc. We
|
||||
* create the array for each prio class but at run time it is used
|
||||
* only for RT and BE class and slot for IDLE class remains unused.
|
||||
* This is primarily done to avoid confusion and a gcc warning.
|
||||
*/
|
||||
unsigned int busy_queues_avg[CFQ_PRIO_NR];
|
||||
/*
|
||||
* rr lists of queues with requests. We maintain service trees for
|
||||
* RT and BE classes. These trees are subdivided in subclasses
|
||||
* of SYNC, SYNC_NOIDLE and ASYNC based on workload type. For IDLE
|
||||
* class there is no subclassification and all the cfq queues go on
|
||||
* a single tree service_tree_idle.
|
||||
* Counts are embedded in the cfq_rb_root
|
||||
*/
|
||||
struct cfq_rb_root service_trees[2][3];
|
||||
|
@ -221,7 +231,6 @@ struct cfq_data {
|
|||
enum wl_type_t serving_type;
|
||||
unsigned long workload_expires;
|
||||
struct cfq_group *serving_group;
|
||||
bool noidle_tree_requires_idle;
|
||||
|
||||
/*
|
||||
* Each priority tree is sorted by next_request position. These
|
||||
|
@ -977,8 +986,8 @@ static inline struct cfq_group *cfqg_of_blkg(struct blkio_group *blkg)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
void
|
||||
cfq_update_blkio_group_weight(struct blkio_group *blkg, unsigned int weight)
|
||||
void cfq_update_blkio_group_weight(void *key, struct blkio_group *blkg,
|
||||
unsigned int weight)
|
||||
{
|
||||
cfqg_of_blkg(blkg)->weight = weight;
|
||||
}
|
||||
|
@ -2180,7 +2189,6 @@ static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg)
|
|||
slice = max_t(unsigned, slice, CFQ_MIN_TT);
|
||||
cfq_log(cfqd, "workload slice:%d", slice);
|
||||
cfqd->workload_expires = jiffies + slice;
|
||||
cfqd->noidle_tree_requires_idle = false;
|
||||
}
|
||||
|
||||
static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd)
|
||||
|
@ -3177,7 +3185,9 @@ cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq,
|
|||
if (cfqq->queued[0] + cfqq->queued[1] >= 4)
|
||||
cfq_mark_cfqq_deep(cfqq);
|
||||
|
||||
if (!atomic_read(&cic->ioc->nr_tasks) || !cfqd->cfq_slice_idle ||
|
||||
if (cfqq->next_rq && (cfqq->next_rq->cmd_flags & REQ_NOIDLE))
|
||||
enable_idle = 0;
|
||||
else if (!atomic_read(&cic->ioc->nr_tasks) || !cfqd->cfq_slice_idle ||
|
||||
(!cfq_cfqq_deep(cfqq) && CFQQ_SEEKY(cfqq)))
|
||||
enable_idle = 0;
|
||||
else if (sample_valid(cic->ttime_samples)) {
|
||||
|
@ -3494,17 +3504,7 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
|
|||
cfq_slice_expired(cfqd, 1);
|
||||
else if (sync && cfqq_empty &&
|
||||
!cfq_close_cooperator(cfqd, cfqq)) {
|
||||
cfqd->noidle_tree_requires_idle |=
|
||||
!(rq->cmd_flags & REQ_NOIDLE);
|
||||
/*
|
||||
* Idling is enabled for SYNC_WORKLOAD.
|
||||
* SYNC_NOIDLE_WORKLOAD idles at the end of the tree
|
||||
* only if we processed at least one !REQ_NOIDLE request
|
||||
*/
|
||||
if (cfqd->serving_type == SYNC_WORKLOAD
|
||||
|| cfqd->noidle_tree_requires_idle
|
||||
|| cfqq->cfqg->nr_cfqq == 1)
|
||||
cfq_arm_slice_timer(cfqd);
|
||||
cfq_arm_slice_timer(cfqd);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -4090,6 +4090,7 @@ static struct blkio_policy_type blkio_policy_cfq = {
|
|||
.blkio_unlink_group_fn = cfq_unlink_blkio_group,
|
||||
.blkio_update_group_weight_fn = cfq_update_blkio_group_weight,
|
||||
},
|
||||
.plid = BLKIO_POLICY_PROP,
|
||||
};
|
||||
#else
|
||||
static struct blkio_policy_type blkio_policy_cfq;
|
||||
|
|
|
@ -69,7 +69,7 @@ static inline void cfq_blkiocg_update_completion_stats(struct blkio_group *blkg,
|
|||
|
||||
static inline void cfq_blkiocg_add_blkio_group(struct blkio_cgroup *blkcg,
|
||||
struct blkio_group *blkg, void *key, dev_t dev) {
|
||||
blkiocg_add_blkio_group(blkcg, blkg, key, dev);
|
||||
blkiocg_add_blkio_group(blkcg, blkg, key, dev, BLKIO_POLICY_PROP);
|
||||
}
|
||||
|
||||
static inline int cfq_blkiocg_del_blkio_group(struct blkio_group *blkg)
|
||||
|
|
|
@ -541,13 +541,15 @@ void add_disk(struct gendisk *disk)
|
|||
disk->major = MAJOR(devt);
|
||||
disk->first_minor = MINOR(devt);
|
||||
|
||||
/* Register BDI before referencing it from bdev */
|
||||
bdi = &disk->queue->backing_dev_info;
|
||||
bdi_register_dev(bdi, disk_devt(disk));
|
||||
|
||||
blk_register_region(disk_devt(disk), disk->minors, NULL,
|
||||
exact_match, exact_lock, disk);
|
||||
register_disk(disk);
|
||||
blk_register_queue(disk);
|
||||
|
||||
bdi = &disk->queue->backing_dev_info;
|
||||
bdi_register_dev(bdi, disk_devt(disk));
|
||||
retval = sysfs_create_link(&disk_to_dev(disk)->kobj, &bdi->dev->kobj,
|
||||
"bdi");
|
||||
WARN_ON(retval);
|
||||
|
@ -642,6 +644,7 @@ void __init printk_all_partitions(void)
|
|||
struct hd_struct *part;
|
||||
char name_buf[BDEVNAME_SIZE];
|
||||
char devt_buf[BDEVT_SIZE];
|
||||
u8 uuid[PARTITION_META_INFO_UUIDLTH * 2 + 1];
|
||||
|
||||
/*
|
||||
* Don't show empty devices or things that have been
|
||||
|
@ -660,10 +663,14 @@ void __init printk_all_partitions(void)
|
|||
while ((part = disk_part_iter_next(&piter))) {
|
||||
bool is_part0 = part == &disk->part0;
|
||||
|
||||
printk("%s%s %10llu %s", is_part0 ? "" : " ",
|
||||
uuid[0] = 0;
|
||||
if (part->info)
|
||||
part_unpack_uuid(part->info->uuid, uuid);
|
||||
|
||||
printk("%s%s %10llu %s %s", is_part0 ? "" : " ",
|
||||
bdevt_str(part_devt(part), devt_buf),
|
||||
(unsigned long long)part->nr_sects >> 1,
|
||||
disk_name(disk, part->partno, name_buf));
|
||||
disk_name(disk, part->partno, name_buf), uuid);
|
||||
if (is_part0) {
|
||||
if (disk->driverfs_dev != NULL &&
|
||||
disk->driverfs_dev->driver != NULL)
|
||||
|
@ -925,8 +932,15 @@ static void disk_free_ptbl_rcu_cb(struct rcu_head *head)
|
|||
{
|
||||
struct disk_part_tbl *ptbl =
|
||||
container_of(head, struct disk_part_tbl, rcu_head);
|
||||
struct gendisk *disk = ptbl->disk;
|
||||
struct request_queue *q = disk->queue;
|
||||
unsigned long flags;
|
||||
|
||||
kfree(ptbl);
|
||||
|
||||
spin_lock_irqsave(q->queue_lock, flags);
|
||||
elv_quiesce_end(q);
|
||||
spin_unlock_irqrestore(q->queue_lock, flags);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -944,11 +958,17 @@ static void disk_replace_part_tbl(struct gendisk *disk,
|
|||
struct disk_part_tbl *new_ptbl)
|
||||
{
|
||||
struct disk_part_tbl *old_ptbl = disk->part_tbl;
|
||||
struct request_queue *q = disk->queue;
|
||||
|
||||
rcu_assign_pointer(disk->part_tbl, new_ptbl);
|
||||
|
||||
if (old_ptbl) {
|
||||
rcu_assign_pointer(old_ptbl->last_lookup, NULL);
|
||||
|
||||
spin_lock_irq(q->queue_lock);
|
||||
elv_quiesce_start(q);
|
||||
spin_unlock_irq(q->queue_lock);
|
||||
|
||||
call_rcu(&old_ptbl->rcu_head, disk_free_ptbl_rcu_cb);
|
||||
}
|
||||
}
|
||||
|
@ -989,6 +1009,7 @@ int disk_expand_part_tbl(struct gendisk *disk, int partno)
|
|||
return -ENOMEM;
|
||||
|
||||
new_ptbl->len = target;
|
||||
new_ptbl->disk = disk;
|
||||
|
||||
for (i = 0; i < len; i++)
|
||||
rcu_assign_pointer(new_ptbl->part[i], old_ptbl->part[i]);
|
||||
|
@ -1004,6 +1025,7 @@ static void disk_release(struct device *dev)
|
|||
kfree(disk->random);
|
||||
disk_replace_part_tbl(disk, NULL);
|
||||
free_part_stats(&disk->part0);
|
||||
free_part_info(&disk->part0);
|
||||
kfree(disk);
|
||||
}
|
||||
struct class block_class = {
|
||||
|
|
|
@ -62,7 +62,7 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user
|
|||
|
||||
/* all seems OK */
|
||||
part = add_partition(disk, partno, start, length,
|
||||
ADDPART_FLAG_NONE);
|
||||
ADDPART_FLAG_NONE, NULL);
|
||||
mutex_unlock(&bdev->bd_mutex);
|
||||
return IS_ERR(part) ? PTR_ERR(part) : 0;
|
||||
case BLKPG_DEL_PARTITION:
|
||||
|
|
|
@ -2972,7 +2972,6 @@ static int receive_sizes(struct drbd_conf *mdev, struct p_header *h)
|
|||
* we still need to figure out whether we accept that. */
|
||||
mdev->p_size = p_size;
|
||||
|
||||
#define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r))
|
||||
if (get_ldev(mdev)) {
|
||||
warn_if_differ_considerably(mdev, "lower level device sizes",
|
||||
p_size, drbd_get_max_capacity(mdev->ldev));
|
||||
|
|
|
@ -706,8 +706,6 @@ static int dm_add_exception(void *context, chunk_t old, chunk_t new)
|
|||
return 0;
|
||||
}
|
||||
|
||||
#define min_not_zero(l, r) (((l) == 0) ? (r) : (((r) == 0) ? (l) : min(l, r)))
|
||||
|
||||
/*
|
||||
* Return a minimum chunk size of all snapshots that have the specified origin.
|
||||
* Return zero if the origin has no snapshots.
|
||||
|
|
|
@ -486,11 +486,6 @@ static int __table_get_device(struct dm_table *t, struct dm_target *ti,
|
|||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns the minimum that is _not_ zero, unless both are zero.
|
||||
*/
|
||||
#define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r))
|
||||
|
||||
int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev,
|
||||
sector_t start, sector_t len, void *data)
|
||||
{
|
||||
|
|
|
@ -681,6 +681,7 @@ void zfcp_scsi_set_prot(struct zfcp_adapter *adapter)
|
|||
adapter->adapter_features & FSF_FEATURE_DIX_PROT_TCPIP) {
|
||||
mask |= SHOST_DIX_TYPE1_PROTECTION;
|
||||
scsi_host_set_guard(shost, SHOST_DIX_GUARD_IP);
|
||||
shost->sg_prot_tablesize = ZFCP_QDIO_MAX_SBALES_PER_REQ / 2;
|
||||
shost->sg_tablesize = ZFCP_QDIO_MAX_SBALES_PER_REQ / 2;
|
||||
shost->max_sectors = ZFCP_QDIO_MAX_SBALES_PER_REQ * 8 / 2;
|
||||
}
|
||||
|
|
|
@ -376,6 +376,7 @@ struct Scsi_Host *scsi_host_alloc(struct scsi_host_template *sht, int privsize)
|
|||
shost->this_id = sht->this_id;
|
||||
shost->can_queue = sht->can_queue;
|
||||
shost->sg_tablesize = sht->sg_tablesize;
|
||||
shost->sg_prot_tablesize = sht->sg_prot_tablesize;
|
||||
shost->cmd_per_lun = sht->cmd_per_lun;
|
||||
shost->unchecked_isa_dma = sht->unchecked_isa_dma;
|
||||
shost->use_clustering = sht->use_clustering;
|
||||
|
|
|
@ -968,11 +968,13 @@ static int scsi_init_sgtable(struct request *req, struct scsi_data_buffer *sdb,
|
|||
*/
|
||||
int scsi_init_io(struct scsi_cmnd *cmd, gfp_t gfp_mask)
|
||||
{
|
||||
int error = scsi_init_sgtable(cmd->request, &cmd->sdb, gfp_mask);
|
||||
struct request *rq = cmd->request;
|
||||
|
||||
int error = scsi_init_sgtable(rq, &cmd->sdb, gfp_mask);
|
||||
if (error)
|
||||
goto err_exit;
|
||||
|
||||
if (blk_bidi_rq(cmd->request)) {
|
||||
if (blk_bidi_rq(rq)) {
|
||||
struct scsi_data_buffer *bidi_sdb = kmem_cache_zalloc(
|
||||
scsi_sdb_cache, GFP_ATOMIC);
|
||||
if (!bidi_sdb) {
|
||||
|
@ -980,28 +982,28 @@ int scsi_init_io(struct scsi_cmnd *cmd, gfp_t gfp_mask)
|
|||
goto err_exit;
|
||||
}
|
||||
|
||||
cmd->request->next_rq->special = bidi_sdb;
|
||||
error = scsi_init_sgtable(cmd->request->next_rq, bidi_sdb,
|
||||
GFP_ATOMIC);
|
||||
rq->next_rq->special = bidi_sdb;
|
||||
error = scsi_init_sgtable(rq->next_rq, bidi_sdb, GFP_ATOMIC);
|
||||
if (error)
|
||||
goto err_exit;
|
||||
}
|
||||
|
||||
if (blk_integrity_rq(cmd->request)) {
|
||||
if (blk_integrity_rq(rq)) {
|
||||
struct scsi_data_buffer *prot_sdb = cmd->prot_sdb;
|
||||
int ivecs, count;
|
||||
|
||||
BUG_ON(prot_sdb == NULL);
|
||||
ivecs = blk_rq_count_integrity_sg(cmd->request);
|
||||
ivecs = blk_rq_count_integrity_sg(rq->q, rq->bio);
|
||||
|
||||
if (scsi_alloc_sgtable(prot_sdb, ivecs, gfp_mask)) {
|
||||
error = BLKPREP_DEFER;
|
||||
goto err_exit;
|
||||
}
|
||||
|
||||
count = blk_rq_map_integrity_sg(cmd->request,
|
||||
count = blk_rq_map_integrity_sg(rq->q, rq->bio,
|
||||
prot_sdb->table.sgl);
|
||||
BUG_ON(unlikely(count > ivecs));
|
||||
BUG_ON(unlikely(count > queue_max_integrity_segments(rq->q)));
|
||||
|
||||
cmd->prot_sdb = prot_sdb;
|
||||
cmd->prot_sdb->table.nents = count;
|
||||
|
@ -1625,6 +1627,14 @@ struct request_queue *__scsi_alloc_queue(struct Scsi_Host *shost,
|
|||
blk_queue_max_segments(q, min_t(unsigned short, shost->sg_tablesize,
|
||||
SCSI_MAX_SG_CHAIN_SEGMENTS));
|
||||
|
||||
if (scsi_host_prot_dma(shost)) {
|
||||
shost->sg_prot_tablesize =
|
||||
min_not_zero(shost->sg_prot_tablesize,
|
||||
(unsigned short)SCSI_MAX_PROT_SG_SEGMENTS);
|
||||
BUG_ON(shost->sg_prot_tablesize < shost->sg_tablesize);
|
||||
blk_queue_max_integrity_segments(q, shost->sg_prot_tablesize);
|
||||
}
|
||||
|
||||
blk_queue_max_hw_sectors(q, shost->max_sectors);
|
||||
blk_queue_bounce_limit(q, scsi_calculate_bounce_limit(shost));
|
||||
blk_queue_segment_boundary(q, shost->dma_boundary);
|
||||
|
|
|
@ -251,6 +251,7 @@ shost_rd_attr(host_busy, "%hu\n");
|
|||
shost_rd_attr(cmd_per_lun, "%hd\n");
|
||||
shost_rd_attr(can_queue, "%hd\n");
|
||||
shost_rd_attr(sg_tablesize, "%hu\n");
|
||||
shost_rd_attr(sg_prot_tablesize, "%hu\n");
|
||||
shost_rd_attr(unchecked_isa_dma, "%d\n");
|
||||
shost_rd_attr(prot_capabilities, "%u\n");
|
||||
shost_rd_attr(prot_guard_type, "%hd\n");
|
||||
|
@ -262,6 +263,7 @@ static struct attribute *scsi_sysfs_shost_attrs[] = {
|
|||
&dev_attr_cmd_per_lun.attr,
|
||||
&dev_attr_can_queue.attr,
|
||||
&dev_attr_sg_tablesize.attr,
|
||||
&dev_attr_sg_prot_tablesize.attr,
|
||||
&dev_attr_unchecked_isa_dma.attr,
|
||||
&dev_attr_proc_name.attr,
|
||||
&dev_attr_scan.attr,
|
||||
|
|
|
@ -375,21 +375,20 @@ int sd_dif_prepare(struct request *rq, sector_t hw_sector, unsigned int sector_s
|
|||
unsigned int i, j;
|
||||
u32 phys, virt;
|
||||
|
||||
/* Already remapped? */
|
||||
if (rq->cmd_flags & REQ_INTEGRITY)
|
||||
return 0;
|
||||
|
||||
sdkp = rq->bio->bi_bdev->bd_disk->private_data;
|
||||
|
||||
if (sdkp->protection_type == SD_DIF_TYPE3_PROTECTION)
|
||||
return 0;
|
||||
|
||||
rq->cmd_flags |= REQ_INTEGRITY;
|
||||
phys = hw_sector & 0xffffffff;
|
||||
|
||||
__rq_for_each_bio(bio, rq) {
|
||||
struct bio_vec *iv;
|
||||
|
||||
/* Already remapped? */
|
||||
if (bio_flagged(bio, BIO_MAPPED_INTEGRITY))
|
||||
break;
|
||||
|
||||
virt = bio->bi_integrity->bip_sector & 0xffffffff;
|
||||
|
||||
bip_for_each_vec(iv, bio->bi_integrity, i) {
|
||||
|
@ -408,6 +407,8 @@ int sd_dif_prepare(struct request *rq, sector_t hw_sector, unsigned int sector_s
|
|||
|
||||
kunmap_atomic(sdt, KM_USER0);
|
||||
}
|
||||
|
||||
bio->bi_flags |= BIO_MAPPED_INTEGRITY;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
|
|
@ -1660,7 +1660,7 @@ static int sg_start_req(Sg_request *srp, unsigned char *cmd)
|
|||
if (sg_allow_dio && hp->flags & SG_FLAG_DIRECT_IO &&
|
||||
dxfer_dir != SG_DXFER_UNKNOWN && !iov_count &&
|
||||
!sfp->parentdp->device->host->unchecked_isa_dma &&
|
||||
blk_rq_aligned(q, hp->dxferp, dxfer_len))
|
||||
blk_rq_aligned(q, (unsigned long)hp->dxferp, dxfer_len))
|
||||
md = NULL;
|
||||
else
|
||||
md = &map_data;
|
||||
|
|
|
@ -318,7 +318,7 @@ void journal_commit_transaction(journal_t *journal)
|
|||
int first_tag = 0;
|
||||
int tag_flag;
|
||||
int i;
|
||||
int write_op = WRITE;
|
||||
int write_op = WRITE_SYNC;
|
||||
|
||||
/*
|
||||
* First job: lock down the current transaction and wait for
|
||||
|
|
|
@ -360,7 +360,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
|
|||
int tag_bytes = journal_tag_bytes(journal);
|
||||
struct buffer_head *cbh = NULL; /* For transactional checksums */
|
||||
__u32 crc32_sum = ~0;
|
||||
int write_op = WRITE;
|
||||
int write_op = WRITE_SYNC;
|
||||
|
||||
/*
|
||||
* First job: lock down the current transaction and wait for
|
||||
|
|
|
@ -352,6 +352,7 @@ static void part_release(struct device *dev)
|
|||
{
|
||||
struct hd_struct *p = dev_to_part(dev);
|
||||
free_part_stats(p);
|
||||
free_part_info(p);
|
||||
kfree(p);
|
||||
}
|
||||
|
||||
|
@ -364,17 +365,25 @@ struct device_type part_type = {
|
|||
static void delete_partition_rcu_cb(struct rcu_head *head)
|
||||
{
|
||||
struct hd_struct *part = container_of(head, struct hd_struct, rcu_head);
|
||||
struct gendisk *disk = part_to_disk(part);
|
||||
struct request_queue *q = disk->queue;
|
||||
unsigned long flags;
|
||||
|
||||
part->start_sect = 0;
|
||||
part->nr_sects = 0;
|
||||
part_stat_set_all(part, 0);
|
||||
put_device(part_to_dev(part));
|
||||
|
||||
spin_lock_irqsave(q->queue_lock, flags);
|
||||
elv_quiesce_end(q);
|
||||
spin_unlock_irqrestore(q->queue_lock, flags);
|
||||
}
|
||||
|
||||
void delete_partition(struct gendisk *disk, int partno)
|
||||
{
|
||||
struct disk_part_tbl *ptbl = disk->part_tbl;
|
||||
struct hd_struct *part;
|
||||
struct request_queue *q = disk->queue;
|
||||
|
||||
if (partno >= ptbl->len)
|
||||
return;
|
||||
|
@ -389,6 +398,10 @@ void delete_partition(struct gendisk *disk, int partno)
|
|||
kobject_put(part->holder_dir);
|
||||
device_del(part_to_dev(part));
|
||||
|
||||
spin_lock_irq(q->queue_lock);
|
||||
elv_quiesce_start(q);
|
||||
spin_unlock_irq(q->queue_lock);
|
||||
|
||||
call_rcu(&part->rcu_head, delete_partition_rcu_cb);
|
||||
}
|
||||
|
||||
|
@ -401,7 +414,8 @@ static DEVICE_ATTR(whole_disk, S_IRUSR | S_IRGRP | S_IROTH,
|
|||
whole_disk_show, NULL);
|
||||
|
||||
struct hd_struct *add_partition(struct gendisk *disk, int partno,
|
||||
sector_t start, sector_t len, int flags)
|
||||
sector_t start, sector_t len, int flags,
|
||||
struct partition_meta_info *info)
|
||||
{
|
||||
struct hd_struct *p;
|
||||
dev_t devt = MKDEV(0, 0);
|
||||
|
@ -438,6 +452,14 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
|
|||
p->partno = partno;
|
||||
p->policy = get_disk_ro(disk);
|
||||
|
||||
if (info) {
|
||||
struct partition_meta_info *pinfo = alloc_part_info(disk);
|
||||
if (!pinfo)
|
||||
goto out_free_stats;
|
||||
memcpy(pinfo, info, sizeof(*info));
|
||||
p->info = pinfo;
|
||||
}
|
||||
|
||||
dname = dev_name(ddev);
|
||||
if (isdigit(dname[strlen(dname) - 1]))
|
||||
dev_set_name(pdev, "%sp%d", dname, partno);
|
||||
|
@ -451,7 +473,7 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
|
|||
|
||||
err = blk_alloc_devt(p, &devt);
|
||||
if (err)
|
||||
goto out_free_stats;
|
||||
goto out_free_info;
|
||||
pdev->devt = devt;
|
||||
|
||||
/* delay uevent until 'holders' subdir is created */
|
||||
|
@ -481,6 +503,8 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
|
|||
|
||||
return p;
|
||||
|
||||
out_free_info:
|
||||
free_part_info(p);
|
||||
out_free_stats:
|
||||
free_part_stats(p);
|
||||
out_free:
|
||||
|
@ -642,6 +666,7 @@ rescan:
|
|||
/* add partitions */
|
||||
for (p = 1; p < state->limit; p++) {
|
||||
sector_t size, from;
|
||||
struct partition_meta_info *info = NULL;
|
||||
|
||||
size = state->parts[p].size;
|
||||
if (!size)
|
||||
|
@ -675,8 +700,12 @@ rescan:
|
|||
size = get_capacity(disk) - from;
|
||||
}
|
||||
}
|
||||
|
||||
if (state->parts[p].has_info)
|
||||
info = &state->parts[p].info;
|
||||
part = add_partition(disk, p, from, size,
|
||||
state->parts[p].flags);
|
||||
state->parts[p].flags,
|
||||
&state->parts[p].info);
|
||||
if (IS_ERR(part)) {
|
||||
printk(KERN_ERR " %s: p%d could not be added: %ld\n",
|
||||
disk->disk_name, p, -PTR_ERR(part));
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
#include <linux/pagemap.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/genhd.h>
|
||||
|
||||
/*
|
||||
* add_gd_partition adds a partitions details to the devices partition
|
||||
|
@ -12,6 +13,8 @@ struct parsed_partitions {
|
|||
sector_t from;
|
||||
sector_t size;
|
||||
int flags;
|
||||
bool has_info;
|
||||
struct partition_meta_info info;
|
||||
} parts[DISK_MAX_PARTS];
|
||||
int next;
|
||||
int limit;
|
||||
|
|
|
@ -94,6 +94,7 @@
|
|||
*
|
||||
************************************************************/
|
||||
#include <linux/crc32.h>
|
||||
#include <linux/ctype.h>
|
||||
#include <linux/math64.h>
|
||||
#include <linux/slab.h>
|
||||
#include "check.h"
|
||||
|
@ -604,6 +605,7 @@ int efi_partition(struct parsed_partitions *state)
|
|||
gpt_entry *ptes = NULL;
|
||||
u32 i;
|
||||
unsigned ssz = bdev_logical_block_size(state->bdev) / 512;
|
||||
u8 unparsed_guid[37];
|
||||
|
||||
if (!find_valid_gpt(state, &gpt, &ptes) || !gpt || !ptes) {
|
||||
kfree(gpt);
|
||||
|
@ -614,6 +616,9 @@ int efi_partition(struct parsed_partitions *state)
|
|||
pr_debug("GUID Partition Table is valid! Yea!\n");
|
||||
|
||||
for (i = 0; i < le32_to_cpu(gpt->num_partition_entries) && i < state->limit-1; i++) {
|
||||
struct partition_meta_info *info;
|
||||
unsigned label_count = 0;
|
||||
unsigned label_max;
|
||||
u64 start = le64_to_cpu(ptes[i].starting_lba);
|
||||
u64 size = le64_to_cpu(ptes[i].ending_lba) -
|
||||
le64_to_cpu(ptes[i].starting_lba) + 1ULL;
|
||||
|
@ -627,6 +632,26 @@ int efi_partition(struct parsed_partitions *state)
|
|||
if (!efi_guidcmp(ptes[i].partition_type_guid,
|
||||
PARTITION_LINUX_RAID_GUID))
|
||||
state->parts[i + 1].flags = ADDPART_FLAG_RAID;
|
||||
|
||||
info = &state->parts[i + 1].info;
|
||||
/* Instead of doing a manual swap to big endian, reuse the
|
||||
* common ASCII hex format as the interim.
|
||||
*/
|
||||
efi_guid_unparse(&ptes[i].unique_partition_guid, unparsed_guid);
|
||||
part_pack_uuid(unparsed_guid, info->uuid);
|
||||
|
||||
/* Naively convert UTF16-LE to 7 bits. */
|
||||
label_max = min(sizeof(info->volname) - 1,
|
||||
sizeof(ptes[i].partition_name));
|
||||
info->volname[label_max] = 0;
|
||||
while (label_count < label_max) {
|
||||
u8 c = ptes[i].partition_name[label_count] & 0xff;
|
||||
if (c && !isprint(c))
|
||||
c = '!';
|
||||
info->volname[label_count] = c;
|
||||
label_count++;
|
||||
}
|
||||
state->parts[i + 1].has_info = true;
|
||||
}
|
||||
kfree(ptes);
|
||||
kfree(gpt);
|
||||
|
|
|
@ -346,8 +346,15 @@ static inline void bvec_kunmap_irq(char *buffer, unsigned long *flags)
|
|||
}
|
||||
|
||||
#else
|
||||
#define bvec_kmap_irq(bvec, flags) (page_address((bvec)->bv_page) + (bvec)->bv_offset)
|
||||
#define bvec_kunmap_irq(buf, flags) do { *(flags) = 0; } while (0)
|
||||
static inline char *bvec_kmap_irq(struct bio_vec *bvec, unsigned long *flags)
|
||||
{
|
||||
return page_address(bvec->bv_page) + bvec->bv_offset;
|
||||
}
|
||||
|
||||
static inline void bvec_kunmap_irq(char *buffer, unsigned long *flags)
|
||||
{
|
||||
*flags = 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline char *__bio_kmap_irq(struct bio *bio, unsigned short idx,
|
||||
|
@ -496,6 +503,10 @@ static inline struct bio *bio_list_get(struct bio_list *bl)
|
|||
#define bip_for_each_vec(bvl, bip, i) \
|
||||
__bip_for_each_vec(bvl, bip, i, (bip)->bip_idx)
|
||||
|
||||
#define bio_for_each_integrity_vec(_bvl, _bio, _iter) \
|
||||
for_each_bio(_bio) \
|
||||
bip_for_each_vec(_bvl, _bio->bi_integrity, _iter)
|
||||
|
||||
#define bio_integrity(bio) (bio->bi_integrity != NULL)
|
||||
|
||||
extern struct bio_integrity_payload *bio_integrity_alloc_bioset(struct bio *, gfp_t, unsigned int, struct bio_set *);
|
||||
|
|
|
@ -97,6 +97,7 @@ struct bio {
|
|||
#define BIO_NULL_MAPPED 9 /* contains invalid user pages */
|
||||
#define BIO_FS_INTEGRITY 10 /* fs owns integrity data, not block layer */
|
||||
#define BIO_QUIET 11 /* Make BIO Quiet */
|
||||
#define BIO_MAPPED_INTEGRITY 12/* integrity metadata has been remapped */
|
||||
#define bio_flagged(bio, flag) ((bio)->bi_flags & (1 << (flag)))
|
||||
|
||||
/*
|
||||
|
@ -130,6 +131,8 @@ enum rq_flag_bits {
|
|||
/* bio only flags */
|
||||
__REQ_UNPLUG, /* unplug the immediately after submission */
|
||||
__REQ_RAHEAD, /* read ahead, can fail anytime */
|
||||
__REQ_THROTTLED, /* This bio has already been subjected to
|
||||
* throttling rules. Don't do it again. */
|
||||
|
||||
/* request only flags */
|
||||
__REQ_SORTED, /* elevator knows about this request */
|
||||
|
@ -146,7 +149,6 @@ enum rq_flag_bits {
|
|||
__REQ_ORDERED_COLOR, /* is before or after barrier */
|
||||
__REQ_ALLOCED, /* request came from our alloc pool */
|
||||
__REQ_COPY_USER, /* contains copies of user pages */
|
||||
__REQ_INTEGRITY, /* integrity metadata has been remapped */
|
||||
__REQ_FLUSH, /* request for cache flush */
|
||||
__REQ_IO_STAT, /* account I/O stat */
|
||||
__REQ_MIXED_MERGE, /* merge of different types, fail separately */
|
||||
|
@ -172,6 +174,7 @@ enum rq_flag_bits {
|
|||
|
||||
#define REQ_UNPLUG (1 << __REQ_UNPLUG)
|
||||
#define REQ_RAHEAD (1 << __REQ_RAHEAD)
|
||||
#define REQ_THROTTLED (1 << __REQ_THROTTLED)
|
||||
|
||||
#define REQ_SORTED (1 << __REQ_SORTED)
|
||||
#define REQ_SOFTBARRIER (1 << __REQ_SOFTBARRIER)
|
||||
|
@ -187,7 +190,6 @@ enum rq_flag_bits {
|
|||
#define REQ_ORDERED_COLOR (1 << __REQ_ORDERED_COLOR)
|
||||
#define REQ_ALLOCED (1 << __REQ_ALLOCED)
|
||||
#define REQ_COPY_USER (1 << __REQ_COPY_USER)
|
||||
#define REQ_INTEGRITY (1 << __REQ_INTEGRITY)
|
||||
#define REQ_FLUSH (1 << __REQ_FLUSH)
|
||||
#define REQ_IO_STAT (1 << __REQ_IO_STAT)
|
||||
#define REQ_MIXED_MERGE (1 << __REQ_MIXED_MERGE)
|
||||
|
|
|
@ -115,6 +115,7 @@ struct request {
|
|||
void *elevator_private3;
|
||||
|
||||
struct gendisk *rq_disk;
|
||||
struct hd_struct *part;
|
||||
unsigned long start_time;
|
||||
#ifdef CONFIG_BLK_CGROUP
|
||||
unsigned long long start_time_ns;
|
||||
|
@ -124,6 +125,9 @@ struct request {
|
|||
* physical address coalescing is performed.
|
||||
*/
|
||||
unsigned short nr_phys_segments;
|
||||
#if defined(CONFIG_BLK_DEV_INTEGRITY)
|
||||
unsigned short nr_integrity_segments;
|
||||
#endif
|
||||
|
||||
unsigned short ioprio;
|
||||
|
||||
|
@ -243,6 +247,7 @@ struct queue_limits {
|
|||
|
||||
unsigned short logical_block_size;
|
||||
unsigned short max_segments;
|
||||
unsigned short max_integrity_segments;
|
||||
|
||||
unsigned char misaligned;
|
||||
unsigned char discard_misaligned;
|
||||
|
@ -367,6 +372,11 @@ struct request_queue
|
|||
#if defined(CONFIG_BLK_DEV_BSG)
|
||||
struct bsg_class_device bsg_dev;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_BLK_DEV_THROTTLING
|
||||
/* Throttle data */
|
||||
struct throtl_data *td;
|
||||
#endif
|
||||
};
|
||||
|
||||
#define QUEUE_FLAG_CLUSTER 0 /* cluster several segments into 1 */
|
||||
|
@ -851,7 +861,7 @@ extern void blk_queue_max_segment_size(struct request_queue *, unsigned int);
|
|||
extern void blk_queue_max_discard_sectors(struct request_queue *q,
|
||||
unsigned int max_discard_sectors);
|
||||
extern void blk_queue_logical_block_size(struct request_queue *, unsigned short);
|
||||
extern void blk_queue_physical_block_size(struct request_queue *, unsigned short);
|
||||
extern void blk_queue_physical_block_size(struct request_queue *, unsigned int);
|
||||
extern void blk_queue_alignment_offset(struct request_queue *q,
|
||||
unsigned int alignment);
|
||||
extern void blk_limits_io_min(struct queue_limits *limits, unsigned int min);
|
||||
|
@ -1004,7 +1014,7 @@ static inline unsigned int queue_physical_block_size(struct request_queue *q)
|
|||
return q->limits.physical_block_size;
|
||||
}
|
||||
|
||||
static inline int bdev_physical_block_size(struct block_device *bdev)
|
||||
static inline unsigned int bdev_physical_block_size(struct block_device *bdev)
|
||||
{
|
||||
return queue_physical_block_size(bdev_get_queue(bdev));
|
||||
}
|
||||
|
@ -1093,11 +1103,11 @@ static inline int queue_dma_alignment(struct request_queue *q)
|
|||
return q ? q->dma_alignment : 511;
|
||||
}
|
||||
|
||||
static inline int blk_rq_aligned(struct request_queue *q, void *addr,
|
||||
static inline int blk_rq_aligned(struct request_queue *q, unsigned long addr,
|
||||
unsigned int len)
|
||||
{
|
||||
unsigned int alignment = queue_dma_alignment(q) | q->dma_pad_mask;
|
||||
return !((unsigned long)addr & alignment) && !(len & alignment);
|
||||
return !(addr & alignment) && !(len & alignment);
|
||||
}
|
||||
|
||||
/* assumes size > 256 */
|
||||
|
@ -1127,6 +1137,7 @@ static inline void put_dev_sector(Sector p)
|
|||
|
||||
struct work_struct;
|
||||
int kblockd_schedule_work(struct request_queue *q, struct work_struct *work);
|
||||
int kblockd_schedule_delayed_work(struct request_queue *q, struct delayed_work *dwork, unsigned long delay);
|
||||
|
||||
#ifdef CONFIG_BLK_CGROUP
|
||||
/*
|
||||
|
@ -1170,6 +1181,24 @@ static inline uint64_t rq_io_start_time_ns(struct request *req)
|
|||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_BLK_DEV_THROTTLING
|
||||
extern int blk_throtl_init(struct request_queue *q);
|
||||
extern void blk_throtl_exit(struct request_queue *q);
|
||||
extern int blk_throtl_bio(struct request_queue *q, struct bio **bio);
|
||||
extern void throtl_schedule_delayed_work(struct request_queue *q, unsigned long delay);
|
||||
extern void throtl_shutdown_timer_wq(struct request_queue *q);
|
||||
#else /* CONFIG_BLK_DEV_THROTTLING */
|
||||
static inline int blk_throtl_bio(struct request_queue *q, struct bio **bio)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int blk_throtl_init(struct request_queue *q) { return 0; }
|
||||
static inline int blk_throtl_exit(struct request_queue *q) { return 0; }
|
||||
static inline void throtl_schedule_delayed_work(struct request_queue *q, unsigned long delay) {}
|
||||
static inline void throtl_shutdown_timer_wq(struct request_queue *q) {}
|
||||
#endif /* CONFIG_BLK_DEV_THROTTLING */
|
||||
|
||||
#define MODULE_ALIAS_BLOCKDEV(major,minor) \
|
||||
MODULE_ALIAS("block-major-" __stringify(major) "-" __stringify(minor))
|
||||
#define MODULE_ALIAS_BLOCKDEV_MAJOR(major) \
|
||||
|
@ -1213,8 +1242,13 @@ struct blk_integrity {
|
|||
extern int blk_integrity_register(struct gendisk *, struct blk_integrity *);
|
||||
extern void blk_integrity_unregister(struct gendisk *);
|
||||
extern int blk_integrity_compare(struct gendisk *, struct gendisk *);
|
||||
extern int blk_rq_map_integrity_sg(struct request *, struct scatterlist *);
|
||||
extern int blk_rq_count_integrity_sg(struct request *);
|
||||
extern int blk_rq_map_integrity_sg(struct request_queue *, struct bio *,
|
||||
struct scatterlist *);
|
||||
extern int blk_rq_count_integrity_sg(struct request_queue *, struct bio *);
|
||||
extern int blk_integrity_merge_rq(struct request_queue *, struct request *,
|
||||
struct request *);
|
||||
extern int blk_integrity_merge_bio(struct request_queue *, struct request *,
|
||||
struct bio *);
|
||||
|
||||
static inline
|
||||
struct blk_integrity *bdev_get_integrity(struct block_device *bdev)
|
||||
|
@ -1235,16 +1269,32 @@ static inline int blk_integrity_rq(struct request *rq)
|
|||
return bio_integrity(rq->bio);
|
||||
}
|
||||
|
||||
static inline void blk_queue_max_integrity_segments(struct request_queue *q,
|
||||
unsigned int segs)
|
||||
{
|
||||
q->limits.max_integrity_segments = segs;
|
||||
}
|
||||
|
||||
static inline unsigned short
|
||||
queue_max_integrity_segments(struct request_queue *q)
|
||||
{
|
||||
return q->limits.max_integrity_segments;
|
||||
}
|
||||
|
||||
#else /* CONFIG_BLK_DEV_INTEGRITY */
|
||||
|
||||
#define blk_integrity_rq(rq) (0)
|
||||
#define blk_rq_count_integrity_sg(a) (0)
|
||||
#define blk_rq_map_integrity_sg(a, b) (0)
|
||||
#define blk_rq_count_integrity_sg(a, b) (0)
|
||||
#define blk_rq_map_integrity_sg(a, b, c) (0)
|
||||
#define bdev_get_integrity(a) (0)
|
||||
#define blk_get_integrity(a) (0)
|
||||
#define blk_integrity_compare(a, b) (0)
|
||||
#define blk_integrity_register(a, b) (0)
|
||||
#define blk_integrity_unregister(a) do { } while (0);
|
||||
#define blk_queue_max_integrity_segments(a, b) do { } while (0);
|
||||
#define queue_max_integrity_segments(a) (0)
|
||||
#define blk_integrity_merge_rq(a, b, c) (0)
|
||||
#define blk_integrity_merge_bio(a, b, c) (0)
|
||||
|
||||
#endif /* CONFIG_BLK_DEV_INTEGRITY */
|
||||
|
||||
|
|
|
@ -122,6 +122,8 @@ extern void elv_completed_request(struct request_queue *, struct request *);
|
|||
extern int elv_set_request(struct request_queue *, struct request *, gfp_t);
|
||||
extern void elv_put_request(struct request_queue *, struct request *);
|
||||
extern void elv_drain_elevator(struct request_queue *);
|
||||
extern void elv_quiesce_start(struct request_queue *);
|
||||
extern void elv_quiesce_end(struct request_queue *);
|
||||
|
||||
/*
|
||||
* io scheduler registration
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
#include <linux/types.h>
|
||||
#include <linux/kdev_t.h>
|
||||
#include <linux/rcupdate.h>
|
||||
#include <linux/slab.h>
|
||||
|
||||
#ifdef CONFIG_BLOCK
|
||||
|
||||
|
@ -86,7 +87,15 @@ struct disk_stats {
|
|||
unsigned long io_ticks;
|
||||
unsigned long time_in_queue;
|
||||
};
|
||||
|
||||
|
||||
#define PARTITION_META_INFO_VOLNAMELTH 64
|
||||
#define PARTITION_META_INFO_UUIDLTH 16
|
||||
|
||||
struct partition_meta_info {
|
||||
u8 uuid[PARTITION_META_INFO_UUIDLTH]; /* always big endian */
|
||||
u8 volname[PARTITION_META_INFO_VOLNAMELTH];
|
||||
};
|
||||
|
||||
struct hd_struct {
|
||||
sector_t start_sect;
|
||||
sector_t nr_sects;
|
||||
|
@ -95,6 +104,7 @@ struct hd_struct {
|
|||
struct device __dev;
|
||||
struct kobject *holder_dir;
|
||||
int policy, partno;
|
||||
struct partition_meta_info *info;
|
||||
#ifdef CONFIG_FAIL_MAKE_REQUEST
|
||||
int make_it_fail;
|
||||
#endif
|
||||
|
@ -130,6 +140,7 @@ struct disk_part_tbl {
|
|||
struct rcu_head rcu_head;
|
||||
int len;
|
||||
struct hd_struct __rcu *last_lookup;
|
||||
struct gendisk *disk;
|
||||
struct hd_struct __rcu *part[];
|
||||
};
|
||||
|
||||
|
@ -181,6 +192,30 @@ static inline struct gendisk *part_to_disk(struct hd_struct *part)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
static inline void part_pack_uuid(const u8 *uuid_str, u8 *to)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < 16; ++i) {
|
||||
*to++ = (hex_to_bin(*uuid_str) << 4) |
|
||||
(hex_to_bin(*(uuid_str + 1)));
|
||||
uuid_str += 2;
|
||||
switch (i) {
|
||||
case 3:
|
||||
case 5:
|
||||
case 7:
|
||||
case 9:
|
||||
uuid_str++;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static inline char *part_unpack_uuid(const u8 *uuid, char *out)
|
||||
{
|
||||
sprintf(out, "%pU", uuid);
|
||||
return out;
|
||||
}
|
||||
|
||||
static inline int disk_max_parts(struct gendisk *disk)
|
||||
{
|
||||
if (disk->flags & GENHD_FL_EXT_DEVT)
|
||||
|
@ -342,6 +377,19 @@ static inline int part_in_flight(struct hd_struct *part)
|
|||
return part->in_flight[0] + part->in_flight[1];
|
||||
}
|
||||
|
||||
static inline struct partition_meta_info *alloc_part_info(struct gendisk *disk)
|
||||
{
|
||||
if (disk)
|
||||
return kzalloc_node(sizeof(struct partition_meta_info),
|
||||
GFP_KERNEL, disk->node_id);
|
||||
return kzalloc(sizeof(struct partition_meta_info), GFP_KERNEL);
|
||||
}
|
||||
|
||||
static inline void free_part_info(struct hd_struct *part)
|
||||
{
|
||||
kfree(part->info);
|
||||
}
|
||||
|
||||
/* block/blk-core.c */
|
||||
extern void part_round_stats(int cpu, struct hd_struct *part);
|
||||
|
||||
|
@ -533,7 +581,9 @@ extern int disk_expand_part_tbl(struct gendisk *disk, int target);
|
|||
extern int rescan_partitions(struct gendisk *disk, struct block_device *bdev);
|
||||
extern struct hd_struct * __must_check add_partition(struct gendisk *disk,
|
||||
int partno, sector_t start,
|
||||
sector_t len, int flags);
|
||||
sector_t len, int flags,
|
||||
struct partition_meta_info
|
||||
*info);
|
||||
extern void delete_partition(struct gendisk *, int);
|
||||
extern void printk_all_partitions(void);
|
||||
|
||||
|
|
|
@ -651,6 +651,16 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { }
|
|||
(void) (&_max1 == &_max2); \
|
||||
_max1 > _max2 ? _max1 : _max2; })
|
||||
|
||||
/**
|
||||
* min_not_zero - return the minimum that is _not_ zero, unless both are zero
|
||||
* @x: value1
|
||||
* @y: value2
|
||||
*/
|
||||
#define min_not_zero(x, y) ({ \
|
||||
typeof(x) __x = (x); \
|
||||
typeof(y) __y = (y); \
|
||||
__x == 0 ? __y : ((__y == 0) ? __x : min(__x, __y)); })
|
||||
|
||||
/**
|
||||
* clamp - return a value clamped to a given range with strict typechecking
|
||||
* @val: current value
|
||||
|
|
|
@ -336,6 +336,9 @@ extern unsigned long sysctl_hung_task_warnings;
|
|||
extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
|
||||
void __user *buffer,
|
||||
size_t *lenp, loff_t *ppos);
|
||||
#else
|
||||
/* Avoid need for ifdefs elsewhere in the code */
|
||||
enum { sysctl_hung_task_timeout_secs = 0 };
|
||||
#endif
|
||||
|
||||
/* Attach to any functions which should be ignored in wchan output. */
|
||||
|
|
|
@ -31,6 +31,12 @@ struct scsi_cmnd;
|
|||
#define SCSI_MAX_SG_CHAIN_SEGMENTS SCSI_MAX_SG_SEGMENTS
|
||||
#endif
|
||||
|
||||
/*
|
||||
* DIX-capable adapters effectively support infinite chaining for the
|
||||
* protection information scatterlist
|
||||
*/
|
||||
#define SCSI_MAX_PROT_SG_SEGMENTS 0xFFFF
|
||||
|
||||
/*
|
||||
* Special value for scanning to specify scanning or rescanning of all
|
||||
* possible channels, (target) ids, or luns on a given shost.
|
||||
|
|
|
@ -388,6 +388,7 @@ struct scsi_host_template {
|
|||
* of scatter-gather.
|
||||
*/
|
||||
unsigned short sg_tablesize;
|
||||
unsigned short sg_prot_tablesize;
|
||||
|
||||
/*
|
||||
* Set this if the host adapter has limitations beside segment count.
|
||||
|
@ -599,6 +600,7 @@ struct Scsi_Host {
|
|||
int can_queue;
|
||||
short cmd_per_lun;
|
||||
short unsigned int sg_tablesize;
|
||||
short unsigned int sg_prot_tablesize;
|
||||
short unsigned int max_sectors;
|
||||
unsigned long dma_boundary;
|
||||
/*
|
||||
|
@ -823,6 +825,11 @@ static inline unsigned int scsi_host_get_prot(struct Scsi_Host *shost)
|
|||
return shost->prot_capabilities;
|
||||
}
|
||||
|
||||
static inline int scsi_host_prot_dma(struct Scsi_Host *shost)
|
||||
{
|
||||
return shost->prot_capabilities >= SHOST_DIX_TYPE0_PROTECTION;
|
||||
}
|
||||
|
||||
static inline unsigned int scsi_host_dif_capable(struct Scsi_Host *shost, unsigned int target_type)
|
||||
{
|
||||
static unsigned char cap[] = { 0,
|
||||
|
|
|
@ -661,11 +661,14 @@ config BLK_CGROUP
|
|||
|
||||
Currently, CFQ IO scheduler uses it to recognize task groups and
|
||||
control disk bandwidth allocation (proportional time slice allocation)
|
||||
to such task groups.
|
||||
to such task groups. It is also used by bio throttling logic in
|
||||
block layer to implement upper limit in IO rates on a device.
|
||||
|
||||
This option only enables generic Block IO controller infrastructure.
|
||||
One needs to also enable actual IO controlling logic in CFQ for it
|
||||
to take effect. (CONFIG_CFQ_GROUP_IOSCHED=y).
|
||||
One needs to also enable actual IO controlling logic/policy. For
|
||||
enabling proportional weight division of disk bandwidth in CFQ seti
|
||||
CONFIG_CFQ_GROUP_IOSCHED=y and for enabling throttling policy set
|
||||
CONFIG_BLK_THROTTLE=y.
|
||||
|
||||
See Documentation/cgroups/blkio-controller.txt for more information.
|
||||
|
||||
|
|
|
@ -58,6 +58,62 @@ static int __init readwrite(char *str)
|
|||
__setup("ro", readonly);
|
||||
__setup("rw", readwrite);
|
||||
|
||||
#ifdef CONFIG_BLOCK
|
||||
/**
|
||||
* match_dev_by_uuid - callback for finding a partition using its uuid
|
||||
* @dev: device passed in by the caller
|
||||
* @data: opaque pointer to a 36 byte char array with a UUID
|
||||
*
|
||||
* Returns 1 if the device matches, and 0 otherwise.
|
||||
*/
|
||||
static int match_dev_by_uuid(struct device *dev, void *data)
|
||||
{
|
||||
u8 *uuid = data;
|
||||
struct hd_struct *part = dev_to_part(dev);
|
||||
|
||||
if (!part->info)
|
||||
goto no_match;
|
||||
|
||||
if (memcmp(uuid, part->info->uuid, sizeof(part->info->uuid)))
|
||||
goto no_match;
|
||||
|
||||
return 1;
|
||||
no_match:
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* devt_from_partuuid - looks up the dev_t of a partition by its UUID
|
||||
* @uuid: 36 byte char array containing a hex ascii UUID
|
||||
*
|
||||
* The function will return the first partition which contains a matching
|
||||
* UUID value in its partition_meta_info struct. This does not search
|
||||
* by filesystem UUIDs.
|
||||
*
|
||||
* Returns the matching dev_t on success or 0 on failure.
|
||||
*/
|
||||
static dev_t __init devt_from_partuuid(char *uuid_str)
|
||||
{
|
||||
dev_t res = 0;
|
||||
struct device *dev = NULL;
|
||||
u8 uuid[16];
|
||||
|
||||
/* Pack the requested UUID in the expected format. */
|
||||
part_pack_uuid(uuid_str, uuid);
|
||||
|
||||
dev = class_find_device(&block_class, NULL, uuid, &match_dev_by_uuid);
|
||||
if (!dev)
|
||||
goto done;
|
||||
|
||||
res = dev->devt;
|
||||
put_device(dev);
|
||||
|
||||
done:
|
||||
return res;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Convert a name into device number. We accept the following variants:
|
||||
*
|
||||
|
@ -68,6 +124,8 @@ __setup("rw", readwrite);
|
|||
* of partition - device number of disk plus the partition number
|
||||
* 5) /dev/<disk_name>p<decimal> - same as the above, that form is
|
||||
* used when disk name of partitioned disk ends on a digit.
|
||||
* 6) PARTUUID=00112233-4455-6677-8899-AABBCCDDEEFF representing the
|
||||
* unique id of a partition if the partition table provides it.
|
||||
*
|
||||
* If name doesn't have fall into the categories above, we return (0,0).
|
||||
* block_class is used to check if something is a disk name. If the disk
|
||||
|
@ -82,6 +140,18 @@ dev_t name_to_dev_t(char *name)
|
|||
dev_t res = 0;
|
||||
int part;
|
||||
|
||||
#ifdef CONFIG_BLOCK
|
||||
if (strncmp(name, "PARTUUID=", 9) == 0) {
|
||||
name += 9;
|
||||
if (strlen(name) != 36)
|
||||
goto fail;
|
||||
res = devt_from_partuuid(name);
|
||||
if (!res)
|
||||
goto fail;
|
||||
goto done;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (strncmp(name, "/dev/", 5) != 0) {
|
||||
unsigned maj, min;
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue