mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2024-11-11 04:18:39 +08:00
for-5.14/block-2021-06-29
-----BEGIN PGP SIGNATURE----- iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAmDbXAwQHGF4Ym9lQGtl cm5lbC5kawAKCRD301j7KXHgpr0HEADDJaSgjpnWQwH1RVLNagJa9KnktxZYsEs+ as3QmDdpKRG3rEC9bdE7FLe/xq3WBaO5j1hTQ9P6IguqLyS1Df72DtTlKyaCrZoe zv9eIlY4lZUfksE2nzWmlN9uG0FBVXeEQpHCLSNbUZeK1zvV6+NNhQqw2kc0sEqu hReUFeMUbsMcu/w5T3XMVJNsTMCql9wta2H0q5hONQyJQSrIwa1D+sUdE5I8fO4j bnoYX9yxHX26EztX1UJiGRgoq5Trz7LY7hAfljKSkewpFwiHE2vBdq2L0C2RKsIV tTs2DjMCMQyPNeA7WAG8HlR4aPG+7+/fuBP1KJHkykjWXglWN7OqISuBv6rrBgQs gNRnZ4qmb1CzD6aLEBk59nHt6po6eMxXIW856YktKy8rKcrgK29qP44Z+oomkPKo ZjQ0wqN5CvpObM/dIKxl9bAJ4zQDHBt49d5nTTQLfWl/mgevu6ZNWD/hONyCQmFy zKKqQ/wkxWHutOsjC5/MKNb3ZRNH9tt9X+HfULO2DU6IqqifYw/ex4z4MVsBopJC 7pPfd81kgC73TgXe1AaCwHqNWsrqYCuTK0ew1CtGudlS3lucMwtap4GBiCgg5gbu M8pEgwO4OcCLHyRUc8zdfqI7HumbprbFmojPkwGSEe0ofVD74lMhzbUj5jvTYY2B t8D2XcgyOA== =lhon -----END PGP SIGNATURE----- Merge tag 'for-5.14/block-2021-06-29' of git://git.kernel.dk/linux-block Pull core block updates from Jens Axboe: - disk events cleanup (Christoph) - gendisk and request queue allocation simplifications (Christoph) - bdev_disk_changed cleanups (Christoph) - IO priority improvements (Bart) - Chained bio completion trace fix (Edward) - blk-wbt fixes (Jan) - blk-wbt enable/disable fix (Zhang) - Scheduler dispatch improvements (Jan, Ming) - Shared tagset scheduler improvements (John) - BFQ updates (Paolo, Luca, Pietro) - BFQ lock inversion fix (Jan) - Documentation improvements (Kir) - CLONE_IO block cgroup fix (Tejun) - Remove of ancient and deprecated block dump feature (zhangyi) - Discard merge fix (Ming) - Misc fixes or followup fixes (Colin, Damien, Dan, Long, Max, Thomas, Yang) * tag 'for-5.14/block-2021-06-29' of git://git.kernel.dk/linux-block: (129 commits) block: fix discard request merge block/mq-deadline: Remove a WARN_ON_ONCE() call blk-mq: update hctx->dispatch_busy in case of real scheduler blk: Fix lock inversion between ioc lock and bfqd lock bfq: Remove merged request already in bfq_requests_merged() block: pass a gendisk to bdev_disk_changed block: move bdev_disk_changed block: add the events* attributes to disk_attrs block: move the disk events code to a separate file block: fix trace completion for chained bio block/partitions/msdos: Fix typo inidicator -> indicator block, bfq: reset waker pointer with shared queues block, bfq: check waker only for queues with no in-flight I/O block, bfq: avoid delayed merge of async queues block, bfq: boost throughput by extending queue-merging times block, bfq: consider also creation time in delayed stable merge block, bfq: fix delayed stable merge check block, bfq: let also stably merged queues enjoy weight raising blk-wbt: make sure throttle is enabled properly blk-wbt: introduce a new disable state to prevent false positive by rwb_enabled() ...
This commit is contained in:
commit
df668a5fe4
@ -17,36 +17,37 @@ level logical devices like device mapper.
|
||||
|
||||
HOWTO
|
||||
=====
|
||||
|
||||
Throttling/Upper Limit policy
|
||||
-----------------------------
|
||||
- Enable Block IO controller::
|
||||
Enable Block IO controller::
|
||||
|
||||
CONFIG_BLK_CGROUP=y
|
||||
|
||||
- Enable throttling in block layer::
|
||||
Enable throttling in block layer::
|
||||
|
||||
CONFIG_BLK_DEV_THROTTLING=y
|
||||
|
||||
- Mount blkio controller (see cgroups.txt, Why are cgroups needed?)::
|
||||
Mount blkio controller (see cgroups.txt, Why are cgroups needed?)::
|
||||
|
||||
mount -t cgroup -o blkio none /sys/fs/cgroup/blkio
|
||||
|
||||
- Specify a bandwidth rate on particular device for root group. The format
|
||||
for policy is "<major>:<minor> <bytes_per_second>"::
|
||||
Specify a bandwidth rate on particular device for root group. The format
|
||||
for policy is "<major>:<minor> <bytes_per_second>"::
|
||||
|
||||
echo "8:16 1048576" > /sys/fs/cgroup/blkio/blkio.throttle.read_bps_device
|
||||
|
||||
Above will put a limit of 1MB/second on reads happening for root group
|
||||
on device having major/minor number 8:16.
|
||||
This will put a limit of 1MB/second on reads happening for root group
|
||||
on device having major/minor number 8:16.
|
||||
|
||||
- Run dd to read a file and see if rate is throttled to 1MB/s or not::
|
||||
Run dd to read a file and see if rate is throttled to 1MB/s or not::
|
||||
|
||||
# dd iflag=direct if=/mnt/common/zerofile of=/dev/null bs=4K count=1024
|
||||
1024+0 records in
|
||||
1024+0 records out
|
||||
4194304 bytes (4.2 MB) copied, 4.0001 s, 1.0 MB/s
|
||||
|
||||
Limits for writes can be put using blkio.throttle.write_bps_device file.
|
||||
Limits for writes can be put using blkio.throttle.write_bps_device file.
|
||||
|
||||
Hierarchical Cgroups
|
||||
====================
|
||||
@ -79,85 +80,89 @@ following::
|
||||
|
||||
Various user visible config options
|
||||
===================================
|
||||
CONFIG_BLK_CGROUP
|
||||
- Block IO controller.
|
||||
|
||||
CONFIG_BFQ_CGROUP_DEBUG
|
||||
- Debug help. Right now some additional stats file show up in cgroup
|
||||
CONFIG_BLK_CGROUP
|
||||
Block IO controller.
|
||||
|
||||
CONFIG_BFQ_CGROUP_DEBUG
|
||||
Debug help. Right now some additional stats file show up in cgroup
|
||||
if this option is enabled.
|
||||
|
||||
CONFIG_BLK_DEV_THROTTLING
|
||||
- Enable block device throttling support in block layer.
|
||||
CONFIG_BLK_DEV_THROTTLING
|
||||
Enable block device throttling support in block layer.
|
||||
|
||||
Details of cgroup files
|
||||
=======================
|
||||
|
||||
Proportional weight policy files
|
||||
--------------------------------
|
||||
- blkio.weight
|
||||
- Specifies per cgroup weight. This is default weight of the group
|
||||
on all the devices until and unless overridden by per device rule.
|
||||
(See blkio.weight_device).
|
||||
Currently allowed range of weights is from 10 to 1000.
|
||||
|
||||
- blkio.weight_device
|
||||
- One can specify per cgroup per device rules using this interface.
|
||||
These rules override the default value of group weight as specified
|
||||
by blkio.weight.
|
||||
blkio.bfq.weight
|
||||
Specifies per cgroup weight. This is default weight of the group
|
||||
on all the devices until and unless overridden by per device rule
|
||||
(see `blkio.bfq.weight_device` below).
|
||||
|
||||
Currently allowed range of weights is from 1 to 1000. For more details,
|
||||
see Documentation/block/bfq-iosched.rst.
|
||||
|
||||
blkio.bfq.weight_device
|
||||
Specifes per cgroup per device weights, overriding the default group
|
||||
weight. For more details, see Documentation/block/bfq-iosched.rst.
|
||||
|
||||
Following is the format::
|
||||
|
||||
# echo dev_maj:dev_minor weight > blkio.weight_device
|
||||
# echo dev_maj:dev_minor weight > blkio.bfq.weight_device
|
||||
|
||||
Configure weight=300 on /dev/sdb (8:16) in this cgroup::
|
||||
|
||||
# echo 8:16 300 > blkio.weight_device
|
||||
# cat blkio.weight_device
|
||||
# echo 8:16 300 > blkio.bfq.weight_device
|
||||
# cat blkio.bfq.weight_device
|
||||
dev weight
|
||||
8:16 300
|
||||
|
||||
Configure weight=500 on /dev/sda (8:0) in this cgroup::
|
||||
|
||||
# echo 8:0 500 > blkio.weight_device
|
||||
# cat blkio.weight_device
|
||||
# echo 8:0 500 > blkio.bfq.weight_device
|
||||
# cat blkio.bfq.weight_device
|
||||
dev weight
|
||||
8:0 500
|
||||
8:16 300
|
||||
|
||||
Remove specific weight for /dev/sda in this cgroup::
|
||||
|
||||
# echo 8:0 0 > blkio.weight_device
|
||||
# cat blkio.weight_device
|
||||
# echo 8:0 0 > blkio.bfq.weight_device
|
||||
# cat blkio.bfq.weight_device
|
||||
dev weight
|
||||
8:16 300
|
||||
|
||||
- blkio.time
|
||||
- disk time allocated to cgroup per device in milliseconds. First
|
||||
blkio.time
|
||||
Disk time allocated to cgroup per device in milliseconds. First
|
||||
two fields specify the major and minor number of the device and
|
||||
third field specifies the disk time allocated to group in
|
||||
milliseconds.
|
||||
|
||||
- blkio.sectors
|
||||
- number of sectors transferred to/from disk by the group. First
|
||||
blkio.sectors
|
||||
Number of sectors transferred to/from disk by the group. First
|
||||
two fields specify the major and minor number of the device and
|
||||
third field specifies the number of sectors transferred by the
|
||||
group to/from the device.
|
||||
|
||||
- blkio.io_service_bytes
|
||||
- Number of bytes transferred to/from the disk by the group. These
|
||||
blkio.io_service_bytes
|
||||
Number of bytes transferred to/from the disk by the group. These
|
||||
are further divided by the type of operation - read or write, sync
|
||||
or async. First two fields specify the major and minor number of the
|
||||
device, third field specifies the operation type and the fourth field
|
||||
specifies the number of bytes.
|
||||
|
||||
- blkio.io_serviced
|
||||
- Number of IOs (bio) issued to the disk by the group. These
|
||||
blkio.io_serviced
|
||||
Number of IOs (bio) issued to the disk by the group. These
|
||||
are further divided by the type of operation - read or write, sync
|
||||
or async. First two fields specify the major and minor number of the
|
||||
device, third field specifies the operation type and the fourth field
|
||||
specifies the number of IOs.
|
||||
|
||||
- blkio.io_service_time
|
||||
- Total amount of time between request dispatch and request completion
|
||||
blkio.io_service_time
|
||||
Total amount of time between request dispatch and request completion
|
||||
for the IOs done by this cgroup. This is in nanoseconds to make it
|
||||
meaningful for flash devices too. For devices with queue depth of 1,
|
||||
this time represents the actual service time. When queue_depth > 1,
|
||||
@ -170,8 +175,8 @@ Proportional weight policy files
|
||||
specifies the operation type and the fourth field specifies the
|
||||
io_service_time in ns.
|
||||
|
||||
- blkio.io_wait_time
|
||||
- Total amount of time the IOs for this cgroup spent waiting in the
|
||||
blkio.io_wait_time
|
||||
Total amount of time the IOs for this cgroup spent waiting in the
|
||||
scheduler queues for service. This can be greater than the total time
|
||||
elapsed since it is cumulative io_wait_time for all IOs. It is not a
|
||||
measure of total time the cgroup spent waiting but rather a measure of
|
||||
@ -185,24 +190,24 @@ Proportional weight policy files
|
||||
minor number of the device, third field specifies the operation type
|
||||
and the fourth field specifies the io_wait_time in ns.
|
||||
|
||||
- blkio.io_merged
|
||||
- Total number of bios/requests merged into requests belonging to this
|
||||
blkio.io_merged
|
||||
Total number of bios/requests merged into requests belonging to this
|
||||
cgroup. This is further divided by the type of operation - read or
|
||||
write, sync or async.
|
||||
|
||||
- blkio.io_queued
|
||||
- Total number of requests queued up at any given instant for this
|
||||
blkio.io_queued
|
||||
Total number of requests queued up at any given instant for this
|
||||
cgroup. This is further divided by the type of operation - read or
|
||||
write, sync or async.
|
||||
|
||||
- blkio.avg_queue_size
|
||||
- Debugging aid only enabled if CONFIG_BFQ_CGROUP_DEBUG=y.
|
||||
blkio.avg_queue_size
|
||||
Debugging aid only enabled if CONFIG_BFQ_CGROUP_DEBUG=y.
|
||||
The average queue size for this cgroup over the entire time of this
|
||||
cgroup's existence. Queue size samples are taken each time one of the
|
||||
queues of this cgroup gets a timeslice.
|
||||
|
||||
- blkio.group_wait_time
|
||||
- Debugging aid only enabled if CONFIG_BFQ_CGROUP_DEBUG=y.
|
||||
blkio.group_wait_time
|
||||
Debugging aid only enabled if CONFIG_BFQ_CGROUP_DEBUG=y.
|
||||
This is the amount of time the cgroup had to wait since it became busy
|
||||
(i.e., went from 0 to 1 request queued) to get a timeslice for one of
|
||||
its queues. This is different from the io_wait_time which is the
|
||||
@ -212,8 +217,8 @@ Proportional weight policy files
|
||||
will only report the group_wait_time accumulated till the last time it
|
||||
got a timeslice and will not include the current delta.
|
||||
|
||||
- blkio.empty_time
|
||||
- Debugging aid only enabled if CONFIG_BFQ_CGROUP_DEBUG=y.
|
||||
blkio.empty_time
|
||||
Debugging aid only enabled if CONFIG_BFQ_CGROUP_DEBUG=y.
|
||||
This is the amount of time a cgroup spends without any pending
|
||||
requests when not being served, i.e., it does not include any time
|
||||
spent idling for one of the queues of the cgroup. This is in
|
||||
@ -221,8 +226,8 @@ Proportional weight policy files
|
||||
the stat will only report the empty_time accumulated till the last
|
||||
time it had a pending request and will not include the current delta.
|
||||
|
||||
- blkio.idle_time
|
||||
- Debugging aid only enabled if CONFIG_BFQ_CGROUP_DEBUG=y.
|
||||
blkio.idle_time
|
||||
Debugging aid only enabled if CONFIG_BFQ_CGROUP_DEBUG=y.
|
||||
This is the amount of time spent by the IO scheduler idling for a
|
||||
given cgroup in anticipation of a better request than the existing ones
|
||||
from other queues/cgroups. This is in nanoseconds. If this is read
|
||||
@ -230,60 +235,60 @@ Proportional weight policy files
|
||||
idle_time accumulated till the last idle period and will not include
|
||||
the current delta.
|
||||
|
||||
- blkio.dequeue
|
||||
- Debugging aid only enabled if CONFIG_BFQ_CGROUP_DEBUG=y. This
|
||||
blkio.dequeue
|
||||
Debugging aid only enabled if CONFIG_BFQ_CGROUP_DEBUG=y. This
|
||||
gives the statistics about how many a times a group was dequeued
|
||||
from service tree of the device. First two fields specify the major
|
||||
and minor number of the device and third field specifies the number
|
||||
of times a group was dequeued from a particular device.
|
||||
|
||||
- blkio.*_recursive
|
||||
- Recursive version of various stats. These files show the
|
||||
blkio.*_recursive
|
||||
Recursive version of various stats. These files show the
|
||||
same information as their non-recursive counterparts but
|
||||
include stats from all the descendant cgroups.
|
||||
|
||||
Throttling/Upper limit policy files
|
||||
-----------------------------------
|
||||
- blkio.throttle.read_bps_device
|
||||
- Specifies upper limit on READ rate from the device. IO rate is
|
||||
blkio.throttle.read_bps_device
|
||||
Specifies upper limit on READ rate from the device. IO rate is
|
||||
specified in bytes per second. Rules are per device. Following is
|
||||
the format::
|
||||
|
||||
echo "<major>:<minor> <rate_bytes_per_second>" > /cgrp/blkio.throttle.read_bps_device
|
||||
|
||||
- blkio.throttle.write_bps_device
|
||||
- Specifies upper limit on WRITE rate to the device. IO rate is
|
||||
blkio.throttle.write_bps_device
|
||||
Specifies upper limit on WRITE rate to the device. IO rate is
|
||||
specified in bytes per second. Rules are per device. Following is
|
||||
the format::
|
||||
|
||||
echo "<major>:<minor> <rate_bytes_per_second>" > /cgrp/blkio.throttle.write_bps_device
|
||||
|
||||
- blkio.throttle.read_iops_device
|
||||
- Specifies upper limit on READ rate from the device. IO rate is
|
||||
blkio.throttle.read_iops_device
|
||||
Specifies upper limit on READ rate from the device. IO rate is
|
||||
specified in IO per second. Rules are per device. Following is
|
||||
the format::
|
||||
|
||||
echo "<major>:<minor> <rate_io_per_second>" > /cgrp/blkio.throttle.read_iops_device
|
||||
|
||||
- blkio.throttle.write_iops_device
|
||||
- Specifies upper limit on WRITE rate to the device. IO rate is
|
||||
blkio.throttle.write_iops_device
|
||||
Specifies upper limit on WRITE rate to the device. IO rate is
|
||||
specified in io per second. Rules are per device. Following is
|
||||
the format::
|
||||
|
||||
echo "<major>:<minor> <rate_io_per_second>" > /cgrp/blkio.throttle.write_iops_device
|
||||
|
||||
Note: If both BW and IOPS rules are specified for a device, then IO is
|
||||
subjected to both the constraints.
|
||||
Note: If both BW and IOPS rules are specified for a device, then IO is
|
||||
subjected to both the constraints.
|
||||
|
||||
- blkio.throttle.io_serviced
|
||||
- Number of IOs (bio) issued to the disk by the group. These
|
||||
blkio.throttle.io_serviced
|
||||
Number of IOs (bio) issued to the disk by the group. These
|
||||
are further divided by the type of operation - read or write, sync
|
||||
or async. First two fields specify the major and minor number of the
|
||||
device, third field specifies the operation type and the fourth field
|
||||
specifies the number of IOs.
|
||||
|
||||
- blkio.throttle.io_service_bytes
|
||||
- Number of bytes transferred to/from the disk by the group. These
|
||||
blkio.throttle.io_service_bytes
|
||||
Number of bytes transferred to/from the disk by the group. These
|
||||
are further divided by the type of operation - read or write, sync
|
||||
or async. First two fields specify the major and minor number of the
|
||||
device, third field specifies the operation type and the fourth field
|
||||
@ -291,6 +296,6 @@ Note: If both BW and IOPS rules are specified for a device, then IO is
|
||||
|
||||
Common files among various policies
|
||||
-----------------------------------
|
||||
- blkio.reset_stats
|
||||
- Writing an int to this file will result in resetting all the stats
|
||||
blkio.reset_stats
|
||||
Writing an int to this file will result in resetting all the stats
|
||||
for that cgroup.
|
||||
|
@ -56,6 +56,7 @@ v1 is available under :ref:`Documentation/admin-guide/cgroup-v1/index.rst <cgrou
|
||||
5-3-3. IO Latency
|
||||
5-3-3-1. How IO Latency Throttling Works
|
||||
5-3-3-2. IO Latency Interface Files
|
||||
5-3-4. IO Priority
|
||||
5-4. PID
|
||||
5-4-1. PID Interface Files
|
||||
5-5. Cpuset
|
||||
@ -1866,6 +1867,60 @@ IO Latency Interface Files
|
||||
duration of time between evaluation events. Windows only elapse
|
||||
with IO activity. Idle periods extend the most recent window.
|
||||
|
||||
IO Priority
|
||||
~~~~~~~~~~~
|
||||
|
||||
A single attribute controls the behavior of the I/O priority cgroup policy,
|
||||
namely the blkio.prio.class attribute. The following values are accepted for
|
||||
that attribute:
|
||||
|
||||
no-change
|
||||
Do not modify the I/O priority class.
|
||||
|
||||
none-to-rt
|
||||
For requests that do not have an I/O priority class (NONE),
|
||||
change the I/O priority class into RT. Do not modify
|
||||
the I/O priority class of other requests.
|
||||
|
||||
restrict-to-be
|
||||
For requests that do not have an I/O priority class or that have I/O
|
||||
priority class RT, change it into BE. Do not modify the I/O priority
|
||||
class of requests that have priority class IDLE.
|
||||
|
||||
idle
|
||||
Change the I/O priority class of all requests into IDLE, the lowest
|
||||
I/O priority class.
|
||||
|
||||
The following numerical values are associated with the I/O priority policies:
|
||||
|
||||
+-------------+---+
|
||||
| no-change | 0 |
|
||||
+-------------+---+
|
||||
| none-to-rt | 1 |
|
||||
+-------------+---+
|
||||
| rt-to-be | 2 |
|
||||
+-------------+---+
|
||||
| all-to-idle | 3 |
|
||||
+-------------+---+
|
||||
|
||||
The numerical value that corresponds to each I/O priority class is as follows:
|
||||
|
||||
+-------------------------------+---+
|
||||
| IOPRIO_CLASS_NONE | 0 |
|
||||
+-------------------------------+---+
|
||||
| IOPRIO_CLASS_RT (real-time) | 1 |
|
||||
+-------------------------------+---+
|
||||
| IOPRIO_CLASS_BE (best effort) | 2 |
|
||||
+-------------------------------+---+
|
||||
| IOPRIO_CLASS_IDLE | 3 |
|
||||
+-------------------------------+---+
|
||||
|
||||
The algorithm to set the I/O priority class for a request is as follows:
|
||||
|
||||
- Translate the I/O priority class policy into a number.
|
||||
- Change the request I/O priority class into the maximum of the I/O priority
|
||||
class policy number and the numerical I/O priority class.
|
||||
|
||||
PID
|
||||
---
|
||||
|
||||
|
@ -101,17 +101,6 @@ this results in concentration of disk activity in a small time interval which
|
||||
occurs only once every 10 minutes, or whenever the disk is forced to spin up by
|
||||
a cache miss. The disk can then be spun down in the periods of inactivity.
|
||||
|
||||
If you want to find out which process caused the disk to spin up, you can
|
||||
gather information by setting the flag /proc/sys/vm/block_dump. When this flag
|
||||
is set, Linux reports all disk read and write operations that take place, and
|
||||
all block dirtyings done to files. This makes it possible to debug why a disk
|
||||
needs to spin up, and to increase battery life even more. The output of
|
||||
block_dump is written to the kernel output, and it can be retrieved using
|
||||
"dmesg". When you use block_dump and your kernel logging level also includes
|
||||
kernel debugging messages, you probably want to turn off klogd, otherwise
|
||||
the output of block_dump will be logged, causing disk activity that is not
|
||||
normally there.
|
||||
|
||||
|
||||
Configuration
|
||||
-------------
|
||||
|
@ -25,7 +25,6 @@ files can be found in mm/swap.c.
|
||||
Currently, these files are in /proc/sys/vm:
|
||||
|
||||
- admin_reserve_kbytes
|
||||
- block_dump
|
||||
- compact_memory
|
||||
- compaction_proactiveness
|
||||
- compact_unevictable_allowed
|
||||
@ -106,13 +105,6 @@ On x86_64 this is about 128MB.
|
||||
Changing this takes effect whenever an application requests memory.
|
||||
|
||||
|
||||
block_dump
|
||||
==========
|
||||
|
||||
block_dump enables block I/O debugging when set to a nonzero value. More
|
||||
information on block I/O debugging is in Documentation/admin-guide/laptops/laptop-mode.rst.
|
||||
|
||||
|
||||
compact_memory
|
||||
==============
|
||||
|
||||
|
@ -553,20 +553,36 @@ throughput sustainable with bfq, because updating the blkio.bfq.*
|
||||
stats is rather costly, especially for some of the stats enabled by
|
||||
CONFIG_BFQ_CGROUP_DEBUG.
|
||||
|
||||
Parameters to set
|
||||
-----------------
|
||||
Parameters
|
||||
----------
|
||||
|
||||
For each group, there is only the following parameter to set.
|
||||
For each group, the following parameters can be set:
|
||||
|
||||
weight (namely blkio.bfq.weight or io.bfq-weight): the weight of the
|
||||
group inside its parent. Available values: 1..1000 (default 100). The
|
||||
linear mapping between ioprio and weights, described at the beginning
|
||||
of the tunable section, is still valid, but all weights higher than
|
||||
IOPRIO_BE_NR*10 are mapped to ioprio 0.
|
||||
weight
|
||||
This specifies the default weight for the cgroup inside its parent.
|
||||
Available values: 1..1000 (default: 100).
|
||||
|
||||
Recall that, if low-latency is set, then BFQ automatically raises the
|
||||
weight of the queues associated with interactive and soft real-time
|
||||
applications. Unset this tunable if you need/want to control weights.
|
||||
For cgroup v1, it is set by writing the value to `blkio.bfq.weight`.
|
||||
|
||||
For cgroup v2, it is set by writing the value to `io.bfq.weight`.
|
||||
(with an optional prefix of `default` and a space).
|
||||
|
||||
The linear mapping between ioprio and weights, described at the beginning
|
||||
of the tunable section, is still valid, but all weights higher than
|
||||
IOPRIO_BE_NR*10 are mapped to ioprio 0.
|
||||
|
||||
Recall that, if low-latency is set, then BFQ automatically raises the
|
||||
weight of the queues associated with interactive and soft real-time
|
||||
applications. Unset this tunable if you need/want to control weights.
|
||||
|
||||
weight_device
|
||||
This specifies a per-device weight for the cgroup. The syntax is
|
||||
`minor:major weight`. A weight of `0` may be used to reset to the default
|
||||
weight.
|
||||
|
||||
For cgroup v1, it is set by writing the value to `blkio.bfq.weight_device`.
|
||||
|
||||
For cgroup v2, the file name is `io.bfq.weight`.
|
||||
|
||||
|
||||
[1]
|
||||
|
@ -480,7 +480,7 @@ prototypes::
|
||||
locking rules:
|
||||
|
||||
======================= ===================
|
||||
ops bd_mutex
|
||||
ops open_mutex
|
||||
======================= ===================
|
||||
open: yes
|
||||
release: yes
|
||||
|
@ -55,7 +55,6 @@ struct nfhd_device {
|
||||
int id;
|
||||
u32 blocks, bsize;
|
||||
int bshift;
|
||||
struct request_queue *queue;
|
||||
struct gendisk *disk;
|
||||
};
|
||||
|
||||
@ -119,32 +118,24 @@ static int __init nfhd_init_one(int id, u32 blocks, u32 bsize)
|
||||
dev->bsize = bsize;
|
||||
dev->bshift = ffs(bsize) - 10;
|
||||
|
||||
dev->queue = blk_alloc_queue(NUMA_NO_NODE);
|
||||
if (dev->queue == NULL)
|
||||
goto free_dev;
|
||||
|
||||
blk_queue_logical_block_size(dev->queue, bsize);
|
||||
|
||||
dev->disk = alloc_disk(16);
|
||||
dev->disk = blk_alloc_disk(NUMA_NO_NODE);
|
||||
if (!dev->disk)
|
||||
goto free_queue;
|
||||
goto free_dev;
|
||||
|
||||
dev->disk->major = major_num;
|
||||
dev->disk->first_minor = dev_id * 16;
|
||||
dev->disk->minors = 16;
|
||||
dev->disk->fops = &nfhd_ops;
|
||||
dev->disk->private_data = dev;
|
||||
sprintf(dev->disk->disk_name, "nfhd%u", dev_id);
|
||||
set_capacity(dev->disk, (sector_t)blocks * (bsize / 512));
|
||||
dev->disk->queue = dev->queue;
|
||||
|
||||
blk_queue_logical_block_size(dev->disk->queue, bsize);
|
||||
add_disk(dev->disk);
|
||||
|
||||
list_add_tail(&dev->list, &nfhd_list);
|
||||
|
||||
return 0;
|
||||
|
||||
free_queue:
|
||||
blk_cleanup_queue(dev->queue);
|
||||
free_dev:
|
||||
kfree(dev);
|
||||
out:
|
||||
@ -186,8 +177,7 @@ static void __exit nfhd_exit(void)
|
||||
list_for_each_entry_safe(dev, next, &nfhd_list, list) {
|
||||
list_del(&dev->list);
|
||||
del_gendisk(dev->disk);
|
||||
put_disk(dev->disk);
|
||||
blk_cleanup_queue(dev->queue);
|
||||
blk_cleanup_disk(dev->disk);
|
||||
kfree(dev);
|
||||
}
|
||||
unregister_blkdev(major_num, "nfhd");
|
||||
|
@ -27,7 +27,6 @@
|
||||
struct simdisk {
|
||||
const char *filename;
|
||||
spinlock_t lock;
|
||||
struct request_queue *queue;
|
||||
struct gendisk *gd;
|
||||
struct proc_dir_entry *procfile;
|
||||
int users;
|
||||
@ -266,21 +265,13 @@ static int __init simdisk_setup(struct simdisk *dev, int which,
|
||||
spin_lock_init(&dev->lock);
|
||||
dev->users = 0;
|
||||
|
||||
dev->queue = blk_alloc_queue(NUMA_NO_NODE);
|
||||
if (dev->queue == NULL) {
|
||||
pr_err("blk_alloc_queue failed\n");
|
||||
goto out_alloc_queue;
|
||||
}
|
||||
|
||||
dev->gd = alloc_disk(SIMDISK_MINORS);
|
||||
if (dev->gd == NULL) {
|
||||
pr_err("alloc_disk failed\n");
|
||||
goto out_alloc_disk;
|
||||
}
|
||||
dev->gd = blk_alloc_disk(NUMA_NO_NODE);
|
||||
if (!dev->gd)
|
||||
return -ENOMEM;
|
||||
dev->gd->major = simdisk_major;
|
||||
dev->gd->first_minor = which;
|
||||
dev->gd->minors = SIMDISK_MINORS;
|
||||
dev->gd->fops = &simdisk_ops;
|
||||
dev->gd->queue = dev->queue;
|
||||
dev->gd->private_data = dev;
|
||||
snprintf(dev->gd->disk_name, 32, "simdisk%d", which);
|
||||
set_capacity(dev->gd, 0);
|
||||
@ -288,12 +279,6 @@ static int __init simdisk_setup(struct simdisk *dev, int which,
|
||||
|
||||
dev->procfile = proc_create_data(tmp, 0644, procdir, &simdisk_proc_ops, dev);
|
||||
return 0;
|
||||
|
||||
out_alloc_disk:
|
||||
blk_cleanup_queue(dev->queue);
|
||||
dev->queue = NULL;
|
||||
out_alloc_queue:
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
static int __init simdisk_init(void)
|
||||
@ -343,10 +328,10 @@ static void simdisk_teardown(struct simdisk *dev, int which,
|
||||
char tmp[2] = { '0' + which, 0 };
|
||||
|
||||
simdisk_detach(dev);
|
||||
if (dev->gd)
|
||||
if (dev->gd) {
|
||||
del_gendisk(dev->gd);
|
||||
if (dev->queue)
|
||||
blk_cleanup_queue(dev->queue);
|
||||
blk_cleanup_disk(dev->gd);
|
||||
}
|
||||
remove_proc_entry(tmp, procdir);
|
||||
}
|
||||
|
||||
|
@ -133,6 +133,13 @@ config BLK_WBT
|
||||
dynamically on an algorithm loosely based on CoDel, factoring in
|
||||
the realtime performance of the disk.
|
||||
|
||||
config BLK_WBT_MQ
|
||||
bool "Enable writeback throttling by default"
|
||||
default y
|
||||
depends on BLK_WBT
|
||||
help
|
||||
Enable writeback throttling by default for request-based block devices.
|
||||
|
||||
config BLK_CGROUP_IOLATENCY
|
||||
bool "Enable support for latency based cgroup IO protection"
|
||||
depends on BLK_CGROUP=y
|
||||
@ -155,12 +162,14 @@ config BLK_CGROUP_IOCOST
|
||||
distributes IO capacity between different groups based on
|
||||
their share of the overall weight distribution.
|
||||
|
||||
config BLK_WBT_MQ
|
||||
bool "Multiqueue writeback throttling"
|
||||
default y
|
||||
depends on BLK_WBT
|
||||
config BLK_CGROUP_IOPRIO
|
||||
bool "Cgroup I/O controller for assigning an I/O priority class"
|
||||
depends on BLK_CGROUP
|
||||
help
|
||||
Enable writeback throttling by default on multiqueue devices.
|
||||
Enable the .prio interface for assigning an I/O priority class to
|
||||
requests. The I/O priority class affects the order in which an I/O
|
||||
scheduler and block devices process requests. Only some I/O schedulers
|
||||
and some block devices support I/O priorities.
|
||||
|
||||
config BLK_DEBUG_FS
|
||||
bool "Block layer debugging information in debugfs"
|
||||
|
@ -9,6 +9,12 @@ config MQ_IOSCHED_DEADLINE
|
||||
help
|
||||
MQ version of the deadline IO scheduler.
|
||||
|
||||
config MQ_IOSCHED_DEADLINE_CGROUP
|
||||
tristate
|
||||
default y
|
||||
depends on MQ_IOSCHED_DEADLINE
|
||||
depends on BLK_CGROUP
|
||||
|
||||
config MQ_IOSCHED_KYBER
|
||||
tristate "Kyber I/O scheduler"
|
||||
default y
|
||||
|
@ -8,7 +8,8 @@ obj-$(CONFIG_BLOCK) := bio.o elevator.o blk-core.o blk-sysfs.o \
|
||||
blk-exec.o blk-merge.o blk-timeout.o \
|
||||
blk-lib.o blk-mq.o blk-mq-tag.o blk-stat.o \
|
||||
blk-mq-sysfs.o blk-mq-cpumap.o blk-mq-sched.o ioctl.o \
|
||||
genhd.o ioprio.o badblocks.o partitions/ blk-rq-qos.o
|
||||
genhd.o ioprio.o badblocks.o partitions/ blk-rq-qos.o \
|
||||
disk-events.o
|
||||
|
||||
obj-$(CONFIG_BOUNCE) += bounce.o
|
||||
obj-$(CONFIG_BLK_SCSI_REQUEST) += scsi_ioctl.o
|
||||
@ -17,9 +18,12 @@ obj-$(CONFIG_BLK_DEV_BSGLIB) += bsg-lib.o
|
||||
obj-$(CONFIG_BLK_CGROUP) += blk-cgroup.o
|
||||
obj-$(CONFIG_BLK_CGROUP_RWSTAT) += blk-cgroup-rwstat.o
|
||||
obj-$(CONFIG_BLK_DEV_THROTTLING) += blk-throttle.o
|
||||
obj-$(CONFIG_BLK_CGROUP_IOPRIO) += blk-ioprio.o
|
||||
obj-$(CONFIG_BLK_CGROUP_IOLATENCY) += blk-iolatency.o
|
||||
obj-$(CONFIG_BLK_CGROUP_IOCOST) += blk-iocost.o
|
||||
obj-$(CONFIG_MQ_IOSCHED_DEADLINE) += mq-deadline.o
|
||||
mq-deadline-y += mq-deadline-main.o
|
||||
mq-deadline-$(CONFIG_MQ_IOSCHED_DEADLINE_CGROUP)+= mq-deadline-cgroup.o
|
||||
obj-$(CONFIG_MQ_IOSCHED_KYBER) += kyber-iosched.o
|
||||
bfq-y := bfq-iosched.o bfq-wf2q.o bfq-cgroup.o
|
||||
obj-$(CONFIG_IOSCHED_BFQ) += bfq.o
|
||||
|
@ -364,6 +364,16 @@ static int ref_wr_duration[2];
|
||||
*/
|
||||
static const unsigned long max_service_from_wr = 120000;
|
||||
|
||||
/*
|
||||
* Maximum time between the creation of two queues, for stable merge
|
||||
* to be activated (in ms)
|
||||
*/
|
||||
static const unsigned long bfq_activation_stable_merging = 600;
|
||||
/*
|
||||
* Minimum time to be waited before evaluating delayed stable merge (in ms)
|
||||
*/
|
||||
static const unsigned long bfq_late_stable_merging = 600;
|
||||
|
||||
#define RQ_BIC(rq) icq_to_bic((rq)->elv.priv[0])
|
||||
#define RQ_BFQQ(rq) ((rq)->elv.priv[1])
|
||||
|
||||
@ -1729,10 +1739,23 @@ static void bfq_bfqq_handle_idle_busy_switch(struct bfq_data *bfqd,
|
||||
bfqq->entity.new_weight == 40;
|
||||
*interactive = !in_burst && idle_for_long_time &&
|
||||
bfqq->entity.new_weight == 40;
|
||||
/*
|
||||
* Merged bfq_queues are kept out of weight-raising
|
||||
* (low-latency) mechanisms. The reason is that these queues
|
||||
* are usually created for non-interactive and
|
||||
* non-soft-real-time tasks. Yet this is not the case for
|
||||
* stably-merged queues. These queues are merged just because
|
||||
* they are created shortly after each other. So they may
|
||||
* easily serve the I/O of an interactive or soft-real time
|
||||
* application, if the application happens to spawn multiple
|
||||
* processes. So let also stably-merged queued enjoy weight
|
||||
* raising.
|
||||
*/
|
||||
wr_or_deserves_wr = bfqd->low_latency &&
|
||||
(bfqq->wr_coeff > 1 ||
|
||||
(bfq_bfqq_sync(bfqq) &&
|
||||
bfqq->bic && (*interactive || soft_rt)));
|
||||
(bfqq->bic || RQ_BIC(rq)->stably_merged) &&
|
||||
(*interactive || soft_rt)));
|
||||
|
||||
/*
|
||||
* Using the last flag, update budget and check whether bfqq
|
||||
@ -1962,14 +1985,18 @@ static void bfq_update_io_intensity(struct bfq_queue *bfqq, u64 now_ns)
|
||||
* Turning back to the detection of a waker queue, a queue Q is deemed
|
||||
* as a waker queue for bfqq if, for three consecutive times, bfqq
|
||||
* happens to become non empty right after a request of Q has been
|
||||
* completed. In particular, on the first time, Q is tentatively set
|
||||
* as a candidate waker queue, while on the third consecutive time
|
||||
* that Q is detected, the field waker_bfqq is set to Q, to confirm
|
||||
* that Q is a waker queue for bfqq. These detection steps are
|
||||
* performed only if bfqq has a long think time, so as to make it more
|
||||
* likely that bfqq's I/O is actually being blocked by a
|
||||
* synchronization. This last filter, plus the above three-times
|
||||
* requirement, make false positives less likely.
|
||||
* completed. In this respect, even if bfqq is empty, we do not check
|
||||
* for a waker if it still has some in-flight I/O. In fact, in this
|
||||
* case bfqq is actually still being served by the drive, and may
|
||||
* receive new I/O on the completion of some of the in-flight
|
||||
* requests. In particular, on the first time, Q is tentatively set as
|
||||
* a candidate waker queue, while on the third consecutive time that Q
|
||||
* is detected, the field waker_bfqq is set to Q, to confirm that Q is
|
||||
* a waker queue for bfqq. These detection steps are performed only if
|
||||
* bfqq has a long think time, so as to make it more likely that
|
||||
* bfqq's I/O is actually being blocked by a synchronization. This
|
||||
* last filter, plus the above three-times requirement, make false
|
||||
* positives less likely.
|
||||
*
|
||||
* NOTE
|
||||
*
|
||||
@ -1995,6 +2022,7 @@ static void bfq_check_waker(struct bfq_data *bfqd, struct bfq_queue *bfqq,
|
||||
if (!bfqd->last_completed_rq_bfqq ||
|
||||
bfqd->last_completed_rq_bfqq == bfqq ||
|
||||
bfq_bfqq_has_short_ttime(bfqq) ||
|
||||
bfqq->dispatched > 0 ||
|
||||
now_ns - bfqd->last_completion >= 4 * NSEC_PER_MSEC ||
|
||||
bfqd->last_completed_rq_bfqq == bfqq->waker_bfqq)
|
||||
return;
|
||||
@ -2317,9 +2345,9 @@ static bool bfq_bio_merge(struct request_queue *q, struct bio *bio,
|
||||
|
||||
ret = blk_mq_sched_try_merge(q, bio, nr_segs, &free);
|
||||
|
||||
spin_unlock_irq(&bfqd->lock);
|
||||
if (free)
|
||||
blk_mq_free_request(free);
|
||||
spin_unlock_irq(&bfqd->lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -2405,7 +2433,7 @@ static void bfq_requests_merged(struct request_queue *q, struct request *rq,
|
||||
*next_bfqq = bfq_init_rq(next);
|
||||
|
||||
if (!bfqq)
|
||||
return;
|
||||
goto remove;
|
||||
|
||||
/*
|
||||
* If next and rq belong to the same bfq_queue and next is older
|
||||
@ -2428,6 +2456,14 @@ static void bfq_requests_merged(struct request_queue *q, struct request *rq,
|
||||
bfqq->next_rq = rq;
|
||||
|
||||
bfqg_stats_update_io_merged(bfqq_group(bfqq), next->cmd_flags);
|
||||
remove:
|
||||
/* Merged request may be in the IO scheduler. Remove it. */
|
||||
if (!RB_EMPTY_NODE(&next->rb_node)) {
|
||||
bfq_remove_request(next->q, next);
|
||||
if (next_bfqq)
|
||||
bfqg_stats_update_io_remove(bfqq_group(next_bfqq),
|
||||
next->cmd_flags);
|
||||
}
|
||||
}
|
||||
|
||||
/* Must be called with bfqq != NULL */
|
||||
@ -2695,10 +2731,18 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq,
|
||||
* costly and complicated.
|
||||
*/
|
||||
if (unlikely(!bfqd->nonrot_with_queueing)) {
|
||||
if (bic->stable_merge_bfqq &&
|
||||
/*
|
||||
* Make sure also that bfqq is sync, because
|
||||
* bic->stable_merge_bfqq may point to some queue (for
|
||||
* stable merging) also if bic is associated with a
|
||||
* sync queue, but this bfqq is async
|
||||
*/
|
||||
if (bfq_bfqq_sync(bfqq) && bic->stable_merge_bfqq &&
|
||||
!bfq_bfqq_just_created(bfqq) &&
|
||||
time_is_after_jiffies(bfqq->split_time +
|
||||
msecs_to_jiffies(200))) {
|
||||
time_is_before_jiffies(bfqq->split_time +
|
||||
msecs_to_jiffies(bfq_late_stable_merging)) &&
|
||||
time_is_before_jiffies(bfqq->creation_time +
|
||||
msecs_to_jiffies(bfq_late_stable_merging))) {
|
||||
struct bfq_queue *stable_merge_bfqq =
|
||||
bic->stable_merge_bfqq;
|
||||
int proc_ref = min(bfqq_process_refs(bfqq),
|
||||
@ -5479,7 +5523,7 @@ static struct bfq_queue *bfq_do_or_sched_stable_merge(struct bfq_data *bfqd,
|
||||
*/
|
||||
if (!last_bfqq_created ||
|
||||
time_before(last_bfqq_created->creation_time +
|
||||
bfqd->bfq_burst_interval,
|
||||
msecs_to_jiffies(bfq_activation_stable_merging),
|
||||
bfqq->creation_time) ||
|
||||
bfqq->entity.parent != last_bfqq_created->entity.parent ||
|
||||
bfqq->ioprio != last_bfqq_created->ioprio ||
|
||||
@ -5925,14 +5969,16 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
|
||||
struct bfq_queue *bfqq;
|
||||
bool idle_timer_disabled = false;
|
||||
unsigned int cmd_flags;
|
||||
LIST_HEAD(free);
|
||||
|
||||
#ifdef CONFIG_BFQ_GROUP_IOSCHED
|
||||
if (!cgroup_subsys_on_dfl(io_cgrp_subsys) && rq->bio)
|
||||
bfqg_stats_update_legacy_io(q, rq);
|
||||
#endif
|
||||
spin_lock_irq(&bfqd->lock);
|
||||
if (blk_mq_sched_try_insert_merge(q, rq)) {
|
||||
if (blk_mq_sched_try_insert_merge(q, rq, &free)) {
|
||||
spin_unlock_irq(&bfqd->lock);
|
||||
blk_mq_free_requests(&free);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -6129,11 +6175,13 @@ static void bfq_completed_request(struct bfq_queue *bfqq, struct bfq_data *bfqd)
|
||||
* of other queues. But a false waker will unjustly steal
|
||||
* bandwidth to its supposedly woken queue. So considering
|
||||
* also shared queues in the waking mechanism may cause more
|
||||
* control troubles than throughput benefits. Then do not set
|
||||
* last_completed_rq_bfqq to bfqq if bfqq is a shared queue.
|
||||
* control troubles than throughput benefits. Then reset
|
||||
* last_completed_rq_bfqq if bfqq is a shared queue.
|
||||
*/
|
||||
if (!bfq_bfqq_coop(bfqq))
|
||||
bfqd->last_completed_rq_bfqq = bfqq;
|
||||
else
|
||||
bfqd->last_completed_rq_bfqq = NULL;
|
||||
|
||||
/*
|
||||
* If we are waiting to discover whether the request pattern
|
||||
@ -6376,6 +6424,7 @@ static void bfq_finish_requeue_request(struct request *rq)
|
||||
{
|
||||
struct bfq_queue *bfqq = RQ_BFQQ(rq);
|
||||
struct bfq_data *bfqd;
|
||||
unsigned long flags;
|
||||
|
||||
/*
|
||||
* rq either is not associated with any icq, or is an already
|
||||
@ -6393,39 +6442,15 @@ static void bfq_finish_requeue_request(struct request *rq)
|
||||
rq->io_start_time_ns,
|
||||
rq->cmd_flags);
|
||||
|
||||
spin_lock_irqsave(&bfqd->lock, flags);
|
||||
if (likely(rq->rq_flags & RQF_STARTED)) {
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&bfqd->lock, flags);
|
||||
|
||||
if (rq == bfqd->waited_rq)
|
||||
bfq_update_inject_limit(bfqd, bfqq);
|
||||
|
||||
bfq_completed_request(bfqq, bfqd);
|
||||
bfq_finish_requeue_request_body(bfqq);
|
||||
|
||||
spin_unlock_irqrestore(&bfqd->lock, flags);
|
||||
} else {
|
||||
/*
|
||||
* Request rq may be still/already in the scheduler,
|
||||
* in which case we need to remove it (this should
|
||||
* never happen in case of requeue). And we cannot
|
||||
* defer such a check and removal, to avoid
|
||||
* inconsistencies in the time interval from the end
|
||||
* of this function to the start of the deferred work.
|
||||
* This situation seems to occur only in process
|
||||
* context, as a consequence of a merge. In the
|
||||
* current version of the code, this implies that the
|
||||
* lock is held.
|
||||
*/
|
||||
|
||||
if (!RB_EMPTY_NODE(&rq->rb_node)) {
|
||||
bfq_remove_request(rq->q, rq);
|
||||
bfqg_stats_update_io_remove(bfqq_group(bfqq),
|
||||
rq->cmd_flags);
|
||||
}
|
||||
bfq_finish_requeue_request_body(bfqq);
|
||||
}
|
||||
bfq_finish_requeue_request_body(bfqq);
|
||||
spin_unlock_irqrestore(&bfqd->lock, flags);
|
||||
|
||||
/*
|
||||
* Reset private fields. In case of a requeue, this allows
|
||||
|
13
block/bio.c
13
block/bio.c
@ -1375,8 +1375,7 @@ static inline bool bio_remaining_done(struct bio *bio)
|
||||
*
|
||||
* bio_endio() can be called several times on a bio that has been chained
|
||||
* using bio_chain(). The ->bi_end_io() function will only be called the
|
||||
* last time. At this point the BLK_TA_COMPLETE tracing event will be
|
||||
* generated if BIO_TRACE_COMPLETION is set.
|
||||
* last time.
|
||||
**/
|
||||
void bio_endio(struct bio *bio)
|
||||
{
|
||||
@ -1389,6 +1388,11 @@ again:
|
||||
if (bio->bi_bdev)
|
||||
rq_qos_done_bio(bio->bi_bdev->bd_disk->queue, bio);
|
||||
|
||||
if (bio->bi_bdev && bio_flagged(bio, BIO_TRACE_COMPLETION)) {
|
||||
trace_block_bio_complete(bio->bi_bdev->bd_disk->queue, bio);
|
||||
bio_clear_flag(bio, BIO_TRACE_COMPLETION);
|
||||
}
|
||||
|
||||
/*
|
||||
* Need to have a real endio function for chained bios, otherwise
|
||||
* various corner cases will break (like stacking block devices that
|
||||
@ -1402,11 +1406,6 @@ again:
|
||||
goto again;
|
||||
}
|
||||
|
||||
if (bio->bi_bdev && bio_flagged(bio, BIO_TRACE_COMPLETION)) {
|
||||
trace_block_bio_complete(bio->bi_bdev->bd_disk->queue, bio);
|
||||
bio_clear_flag(bio, BIO_TRACE_COMPLETION);
|
||||
}
|
||||
|
||||
blk_throtl_bio_endio(bio);
|
||||
/* release cgroup info */
|
||||
bio_uninit(bio);
|
||||
|
@ -31,6 +31,7 @@
|
||||
#include <linux/tracehook.h>
|
||||
#include <linux/psi.h>
|
||||
#include "blk.h"
|
||||
#include "blk-ioprio.h"
|
||||
|
||||
/*
|
||||
* blkcg_pol_mutex protects blkcg_policy[] and policy [de]activation.
|
||||
@ -1183,15 +1184,18 @@ int blkcg_init_queue(struct request_queue *q)
|
||||
if (preloaded)
|
||||
radix_tree_preload_end();
|
||||
|
||||
ret = blk_iolatency_init(q);
|
||||
if (ret)
|
||||
goto err_destroy_all;
|
||||
|
||||
ret = blk_ioprio_init(q);
|
||||
if (ret)
|
||||
goto err_destroy_all;
|
||||
|
||||
ret = blk_throtl_init(q);
|
||||
if (ret)
|
||||
goto err_destroy_all;
|
||||
|
||||
ret = blk_iolatency_init(q);
|
||||
if (ret) {
|
||||
blk_throtl_exit(q);
|
||||
goto err_destroy_all;
|
||||
}
|
||||
return 0;
|
||||
|
||||
err_destroy_all:
|
||||
@ -1217,32 +1221,6 @@ void blkcg_exit_queue(struct request_queue *q)
|
||||
blk_throtl_exit(q);
|
||||
}
|
||||
|
||||
/*
|
||||
* We cannot support shared io contexts, as we have no mean to support
|
||||
* two tasks with the same ioc in two different groups without major rework
|
||||
* of the main cic data structures. For now we allow a task to change
|
||||
* its cgroup only if it's the only owner of its ioc.
|
||||
*/
|
||||
static int blkcg_can_attach(struct cgroup_taskset *tset)
|
||||
{
|
||||
struct task_struct *task;
|
||||
struct cgroup_subsys_state *dst_css;
|
||||
struct io_context *ioc;
|
||||
int ret = 0;
|
||||
|
||||
/* task_lock() is needed to avoid races with exit_io_context() */
|
||||
cgroup_taskset_for_each(task, dst_css, tset) {
|
||||
task_lock(task);
|
||||
ioc = task->io_context;
|
||||
if (ioc && atomic_read(&ioc->nr_tasks) > 1)
|
||||
ret = -EINVAL;
|
||||
task_unlock(task);
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void blkcg_bind(struct cgroup_subsys_state *root_css)
|
||||
{
|
||||
int i;
|
||||
@ -1275,7 +1253,6 @@ struct cgroup_subsys io_cgrp_subsys = {
|
||||
.css_online = blkcg_css_online,
|
||||
.css_offline = blkcg_css_offline,
|
||||
.css_free = blkcg_css_free,
|
||||
.can_attach = blkcg_can_attach,
|
||||
.css_rstat_flush = blkcg_rstat_flush,
|
||||
.bind = blkcg_bind,
|
||||
.dfl_cftypes = blkcg_files,
|
||||
|
@ -599,7 +599,6 @@ fail_q:
|
||||
kmem_cache_free(blk_requestq_cachep, q);
|
||||
return NULL;
|
||||
}
|
||||
EXPORT_SYMBOL(blk_alloc_queue);
|
||||
|
||||
/**
|
||||
* blk_get_queue - increment the request_queue refcount
|
||||
@ -1086,15 +1085,6 @@ blk_qc_t submit_bio(struct bio *bio)
|
||||
task_io_account_read(bio->bi_iter.bi_size);
|
||||
count_vm_events(PGPGIN, count);
|
||||
}
|
||||
|
||||
if (unlikely(block_dump)) {
|
||||
char b[BDEVNAME_SIZE];
|
||||
printk(KERN_DEBUG "%s(%d): %s block %Lu on %s (%u sectors)\n",
|
||||
current->comm, task_pid_nr(current),
|
||||
op_is_write(bio_op(bio)) ? "WRITE" : "READ",
|
||||
(unsigned long long)bio->bi_iter.bi_sector,
|
||||
bio_devname(bio, b), count);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1394,26 +1384,22 @@ void blk_steal_bios(struct bio_list *list, struct request *rq)
|
||||
EXPORT_SYMBOL_GPL(blk_steal_bios);
|
||||
|
||||
/**
|
||||
* blk_update_request - Special helper function for request stacking drivers
|
||||
* blk_update_request - Complete multiple bytes without completing the request
|
||||
* @req: the request being processed
|
||||
* @error: block status code
|
||||
* @nr_bytes: number of bytes to complete @req
|
||||
* @nr_bytes: number of bytes to complete for @req
|
||||
*
|
||||
* Description:
|
||||
* Ends I/O on a number of bytes attached to @req, but doesn't complete
|
||||
* the request structure even if @req doesn't have leftover.
|
||||
* If @req has leftover, sets it up for the next range of segments.
|
||||
*
|
||||
* This special helper function is only for request stacking drivers
|
||||
* (e.g. request-based dm) so that they can handle partial completion.
|
||||
* Actual device drivers should use blk_mq_end_request instead.
|
||||
*
|
||||
* Passing the result of blk_rq_bytes() as @nr_bytes guarantees
|
||||
* %false return from this function.
|
||||
*
|
||||
* Note:
|
||||
* The RQF_SPECIAL_PAYLOAD flag is ignored on purpose in both
|
||||
* blk_rq_bytes() and in blk_update_request().
|
||||
* The RQF_SPECIAL_PAYLOAD flag is ignored on purpose in this function
|
||||
* except in the consistency check at the end of this function.
|
||||
*
|
||||
* Return:
|
||||
* %false - this request doesn't have any more data
|
||||
|
@ -219,8 +219,6 @@ static void flush_end_io(struct request *flush_rq, blk_status_t error)
|
||||
unsigned long flags = 0;
|
||||
struct blk_flush_queue *fq = blk_get_flush_queue(q, flush_rq->mq_ctx);
|
||||
|
||||
blk_account_io_flush(flush_rq);
|
||||
|
||||
/* release the tag's ownership to the req cloned from */
|
||||
spin_lock_irqsave(&fq->mq_flush_lock, flags);
|
||||
|
||||
@ -230,6 +228,7 @@ static void flush_end_io(struct request *flush_rq, blk_status_t error)
|
||||
return;
|
||||
}
|
||||
|
||||
blk_account_io_flush(flush_rq);
|
||||
/*
|
||||
* Flush request has to be marked as IDLE when it is really ended
|
||||
* because its .end_io() is called from timeout code path too for
|
||||
|
262
block/blk-ioprio.c
Normal file
262
block/blk-ioprio.c
Normal file
@ -0,0 +1,262 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Block rq-qos policy for assigning an I/O priority class to requests.
|
||||
*
|
||||
* Using an rq-qos policy for assigning I/O priority class has two advantages
|
||||
* over using the ioprio_set() system call:
|
||||
*
|
||||
* - This policy is cgroup based so it has all the advantages of cgroups.
|
||||
* - While ioprio_set() does not affect page cache writeback I/O, this rq-qos
|
||||
* controller affects page cache writeback I/O for filesystems that support
|
||||
* assiociating a cgroup with writeback I/O. See also
|
||||
* Documentation/admin-guide/cgroup-v2.rst.
|
||||
*/
|
||||
|
||||
#include <linux/blk-cgroup.h>
|
||||
#include <linux/blk-mq.h>
|
||||
#include <linux/blk_types.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include "blk-ioprio.h"
|
||||
#include "blk-rq-qos.h"
|
||||
|
||||
/**
|
||||
* enum prio_policy - I/O priority class policy.
|
||||
* @POLICY_NO_CHANGE: (default) do not modify the I/O priority class.
|
||||
* @POLICY_NONE_TO_RT: modify IOPRIO_CLASS_NONE into IOPRIO_CLASS_RT.
|
||||
* @POLICY_RESTRICT_TO_BE: modify IOPRIO_CLASS_NONE and IOPRIO_CLASS_RT into
|
||||
* IOPRIO_CLASS_BE.
|
||||
* @POLICY_ALL_TO_IDLE: change the I/O priority class into IOPRIO_CLASS_IDLE.
|
||||
*
|
||||
* See also <linux/ioprio.h>.
|
||||
*/
|
||||
enum prio_policy {
|
||||
POLICY_NO_CHANGE = 0,
|
||||
POLICY_NONE_TO_RT = 1,
|
||||
POLICY_RESTRICT_TO_BE = 2,
|
||||
POLICY_ALL_TO_IDLE = 3,
|
||||
};
|
||||
|
||||
static const char *policy_name[] = {
|
||||
[POLICY_NO_CHANGE] = "no-change",
|
||||
[POLICY_NONE_TO_RT] = "none-to-rt",
|
||||
[POLICY_RESTRICT_TO_BE] = "restrict-to-be",
|
||||
[POLICY_ALL_TO_IDLE] = "idle",
|
||||
};
|
||||
|
||||
static struct blkcg_policy ioprio_policy;
|
||||
|
||||
/**
|
||||
* struct ioprio_blkg - Per (cgroup, request queue) data.
|
||||
* @pd: blkg_policy_data structure.
|
||||
*/
|
||||
struct ioprio_blkg {
|
||||
struct blkg_policy_data pd;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct ioprio_blkcg - Per cgroup data.
|
||||
* @cpd: blkcg_policy_data structure.
|
||||
* @prio_policy: One of the IOPRIO_CLASS_* values. See also <linux/ioprio.h>.
|
||||
*/
|
||||
struct ioprio_blkcg {
|
||||
struct blkcg_policy_data cpd;
|
||||
enum prio_policy prio_policy;
|
||||
};
|
||||
|
||||
static inline struct ioprio_blkg *pd_to_ioprio(struct blkg_policy_data *pd)
|
||||
{
|
||||
return pd ? container_of(pd, struct ioprio_blkg, pd) : NULL;
|
||||
}
|
||||
|
||||
static struct ioprio_blkcg *blkcg_to_ioprio_blkcg(struct blkcg *blkcg)
|
||||
{
|
||||
return container_of(blkcg_to_cpd(blkcg, &ioprio_policy),
|
||||
struct ioprio_blkcg, cpd);
|
||||
}
|
||||
|
||||
static struct ioprio_blkcg *
|
||||
ioprio_blkcg_from_css(struct cgroup_subsys_state *css)
|
||||
{
|
||||
return blkcg_to_ioprio_blkcg(css_to_blkcg(css));
|
||||
}
|
||||
|
||||
static struct ioprio_blkcg *ioprio_blkcg_from_bio(struct bio *bio)
|
||||
{
|
||||
struct blkg_policy_data *pd = blkg_to_pd(bio->bi_blkg, &ioprio_policy);
|
||||
|
||||
if (!pd)
|
||||
return NULL;
|
||||
|
||||
return blkcg_to_ioprio_blkcg(pd->blkg->blkcg);
|
||||
}
|
||||
|
||||
static int ioprio_show_prio_policy(struct seq_file *sf, void *v)
|
||||
{
|
||||
struct ioprio_blkcg *blkcg = ioprio_blkcg_from_css(seq_css(sf));
|
||||
|
||||
seq_printf(sf, "%s\n", policy_name[blkcg->prio_policy]);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static ssize_t ioprio_set_prio_policy(struct kernfs_open_file *of, char *buf,
|
||||
size_t nbytes, loff_t off)
|
||||
{
|
||||
struct ioprio_blkcg *blkcg = ioprio_blkcg_from_css(of_css(of));
|
||||
int ret;
|
||||
|
||||
if (off != 0)
|
||||
return -EIO;
|
||||
/* kernfs_fop_write_iter() terminates 'buf' with '\0'. */
|
||||
ret = sysfs_match_string(policy_name, buf);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
blkcg->prio_policy = ret;
|
||||
|
||||
return nbytes;
|
||||
}
|
||||
|
||||
static struct blkg_policy_data *
|
||||
ioprio_alloc_pd(gfp_t gfp, struct request_queue *q, struct blkcg *blkcg)
|
||||
{
|
||||
struct ioprio_blkg *ioprio_blkg;
|
||||
|
||||
ioprio_blkg = kzalloc(sizeof(*ioprio_blkg), gfp);
|
||||
if (!ioprio_blkg)
|
||||
return NULL;
|
||||
|
||||
return &ioprio_blkg->pd;
|
||||
}
|
||||
|
||||
static void ioprio_free_pd(struct blkg_policy_data *pd)
|
||||
{
|
||||
struct ioprio_blkg *ioprio_blkg = pd_to_ioprio(pd);
|
||||
|
||||
kfree(ioprio_blkg);
|
||||
}
|
||||
|
||||
static struct blkcg_policy_data *ioprio_alloc_cpd(gfp_t gfp)
|
||||
{
|
||||
struct ioprio_blkcg *blkcg;
|
||||
|
||||
blkcg = kzalloc(sizeof(*blkcg), gfp);
|
||||
if (!blkcg)
|
||||
return NULL;
|
||||
blkcg->prio_policy = POLICY_NO_CHANGE;
|
||||
return &blkcg->cpd;
|
||||
}
|
||||
|
||||
static void ioprio_free_cpd(struct blkcg_policy_data *cpd)
|
||||
{
|
||||
struct ioprio_blkcg *blkcg = container_of(cpd, typeof(*blkcg), cpd);
|
||||
|
||||
kfree(blkcg);
|
||||
}
|
||||
|
||||
#define IOPRIO_ATTRS \
|
||||
{ \
|
||||
.name = "prio.class", \
|
||||
.seq_show = ioprio_show_prio_policy, \
|
||||
.write = ioprio_set_prio_policy, \
|
||||
}, \
|
||||
{ } /* sentinel */
|
||||
|
||||
/* cgroup v2 attributes */
|
||||
static struct cftype ioprio_files[] = {
|
||||
IOPRIO_ATTRS
|
||||
};
|
||||
|
||||
/* cgroup v1 attributes */
|
||||
static struct cftype ioprio_legacy_files[] = {
|
||||
IOPRIO_ATTRS
|
||||
};
|
||||
|
||||
static struct blkcg_policy ioprio_policy = {
|
||||
.dfl_cftypes = ioprio_files,
|
||||
.legacy_cftypes = ioprio_legacy_files,
|
||||
|
||||
.cpd_alloc_fn = ioprio_alloc_cpd,
|
||||
.cpd_free_fn = ioprio_free_cpd,
|
||||
|
||||
.pd_alloc_fn = ioprio_alloc_pd,
|
||||
.pd_free_fn = ioprio_free_pd,
|
||||
};
|
||||
|
||||
struct blk_ioprio {
|
||||
struct rq_qos rqos;
|
||||
};
|
||||
|
||||
static void blkcg_ioprio_track(struct rq_qos *rqos, struct request *rq,
|
||||
struct bio *bio)
|
||||
{
|
||||
struct ioprio_blkcg *blkcg = ioprio_blkcg_from_bio(bio);
|
||||
|
||||
/*
|
||||
* Except for IOPRIO_CLASS_NONE, higher I/O priority numbers
|
||||
* correspond to a lower priority. Hence, the max_t() below selects
|
||||
* the lower priority of bi_ioprio and the cgroup I/O priority class.
|
||||
* If the cgroup policy has been set to POLICY_NO_CHANGE == 0, the
|
||||
* bio I/O priority is not modified. If the bio I/O priority equals
|
||||
* IOPRIO_CLASS_NONE, the cgroup I/O priority is assigned to the bio.
|
||||
*/
|
||||
bio->bi_ioprio = max_t(u16, bio->bi_ioprio,
|
||||
IOPRIO_PRIO_VALUE(blkcg->prio_policy, 0));
|
||||
}
|
||||
|
||||
static void blkcg_ioprio_exit(struct rq_qos *rqos)
|
||||
{
|
||||
struct blk_ioprio *blkioprio_blkg =
|
||||
container_of(rqos, typeof(*blkioprio_blkg), rqos);
|
||||
|
||||
blkcg_deactivate_policy(rqos->q, &ioprio_policy);
|
||||
kfree(blkioprio_blkg);
|
||||
}
|
||||
|
||||
static struct rq_qos_ops blkcg_ioprio_ops = {
|
||||
.track = blkcg_ioprio_track,
|
||||
.exit = blkcg_ioprio_exit,
|
||||
};
|
||||
|
||||
int blk_ioprio_init(struct request_queue *q)
|
||||
{
|
||||
struct blk_ioprio *blkioprio_blkg;
|
||||
struct rq_qos *rqos;
|
||||
int ret;
|
||||
|
||||
blkioprio_blkg = kzalloc(sizeof(*blkioprio_blkg), GFP_KERNEL);
|
||||
if (!blkioprio_blkg)
|
||||
return -ENOMEM;
|
||||
|
||||
ret = blkcg_activate_policy(q, &ioprio_policy);
|
||||
if (ret) {
|
||||
kfree(blkioprio_blkg);
|
||||
return ret;
|
||||
}
|
||||
|
||||
rqos = &blkioprio_blkg->rqos;
|
||||
rqos->id = RQ_QOS_IOPRIO;
|
||||
rqos->ops = &blkcg_ioprio_ops;
|
||||
rqos->q = q;
|
||||
|
||||
/*
|
||||
* Registering the rq-qos policy after activating the blk-cgroup
|
||||
* policy guarantees that ioprio_blkcg_from_bio(bio) != NULL in the
|
||||
* rq-qos callbacks.
|
||||
*/
|
||||
rq_qos_add(q, rqos);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __init ioprio_init(void)
|
||||
{
|
||||
return blkcg_policy_register(&ioprio_policy);
|
||||
}
|
||||
|
||||
static void __exit ioprio_exit(void)
|
||||
{
|
||||
blkcg_policy_unregister(&ioprio_policy);
|
||||
}
|
||||
|
||||
module_init(ioprio_init);
|
||||
module_exit(ioprio_exit);
|
19
block/blk-ioprio.h
Normal file
19
block/blk-ioprio.h
Normal file
@ -0,0 +1,19 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
|
||||
#ifndef _BLK_IOPRIO_H_
|
||||
#define _BLK_IOPRIO_H_
|
||||
|
||||
#include <linux/kconfig.h>
|
||||
|
||||
struct request_queue;
|
||||
|
||||
#ifdef CONFIG_BLK_CGROUP_IOPRIO
|
||||
int blk_ioprio_init(struct request_queue *q);
|
||||
#else
|
||||
static inline int blk_ioprio_init(struct request_queue *q)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _BLK_IOPRIO_H_ */
|
@ -559,10 +559,14 @@ static inline unsigned int blk_rq_get_max_segments(struct request *rq)
|
||||
static inline int ll_new_hw_segment(struct request *req, struct bio *bio,
|
||||
unsigned int nr_phys_segs)
|
||||
{
|
||||
if (req->nr_phys_segments + nr_phys_segs > blk_rq_get_max_segments(req))
|
||||
if (blk_integrity_merge_bio(req->q, req, bio) == false)
|
||||
goto no_merge;
|
||||
|
||||
if (blk_integrity_merge_bio(req->q, req, bio) == false)
|
||||
/* discard request merge won't add new segment */
|
||||
if (req_op(req) == REQ_OP_DISCARD)
|
||||
return 1;
|
||||
|
||||
if (req->nr_phys_segments + nr_phys_segs > blk_rq_get_max_segments(req))
|
||||
goto no_merge;
|
||||
|
||||
/*
|
||||
@ -846,18 +850,15 @@ static struct request *attempt_front_merge(struct request_queue *q,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int blk_attempt_req_merge(struct request_queue *q, struct request *rq,
|
||||
struct request *next)
|
||||
/*
|
||||
* Try to merge 'next' into 'rq'. Return true if the merge happened, false
|
||||
* otherwise. The caller is responsible for freeing 'next' if the merge
|
||||
* happened.
|
||||
*/
|
||||
bool blk_attempt_req_merge(struct request_queue *q, struct request *rq,
|
||||
struct request *next)
|
||||
{
|
||||
struct request *free;
|
||||
|
||||
free = attempt_merge(q, rq, next);
|
||||
if (free) {
|
||||
blk_put_request(free);
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
return attempt_merge(q, rq, next);
|
||||
}
|
||||
|
||||
bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
|
||||
|
@ -937,6 +937,21 @@ void blk_mq_debugfs_unregister_sched(struct request_queue *q)
|
||||
q->sched_debugfs_dir = NULL;
|
||||
}
|
||||
|
||||
static const char *rq_qos_id_to_name(enum rq_qos_id id)
|
||||
{
|
||||
switch (id) {
|
||||
case RQ_QOS_WBT:
|
||||
return "wbt";
|
||||
case RQ_QOS_LATENCY:
|
||||
return "latency";
|
||||
case RQ_QOS_COST:
|
||||
return "cost";
|
||||
case RQ_QOS_IOPRIO:
|
||||
return "ioprio";
|
||||
}
|
||||
return "unknown";
|
||||
}
|
||||
|
||||
void blk_mq_debugfs_unregister_rqos(struct rq_qos *rqos)
|
||||
{
|
||||
debugfs_remove_recursive(rqos->debugfs_dir);
|
||||
|
@ -168,9 +168,19 @@ static int __blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
|
||||
* in blk_mq_dispatch_rq_list().
|
||||
*/
|
||||
list_add_tail(&rq->queuelist, &rq_list);
|
||||
count++;
|
||||
if (rq->mq_hctx != hctx)
|
||||
multi_hctxs = true;
|
||||
} while (++count < max_dispatch);
|
||||
|
||||
/*
|
||||
* If we cannot get tag for the request, stop dequeueing
|
||||
* requests from the IO scheduler. We are unlikely to be able
|
||||
* to submit them anyway and it creates false impression for
|
||||
* scheduling heuristics that the device can take more IO.
|
||||
*/
|
||||
if (!blk_mq_get_driver_tag(rq))
|
||||
break;
|
||||
} while (count < max_dispatch);
|
||||
|
||||
if (!count) {
|
||||
if (run_queue)
|
||||
@ -284,8 +294,7 @@ static int blk_mq_do_dispatch_ctx(struct blk_mq_hw_ctx *hctx)
|
||||
static int __blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
|
||||
{
|
||||
struct request_queue *q = hctx->queue;
|
||||
struct elevator_queue *e = q->elevator;
|
||||
const bool has_sched_dispatch = e && e->type->ops.dispatch_request;
|
||||
const bool has_sched = q->elevator;
|
||||
int ret = 0;
|
||||
LIST_HEAD(rq_list);
|
||||
|
||||
@ -316,12 +325,12 @@ static int __blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
|
||||
if (!list_empty(&rq_list)) {
|
||||
blk_mq_sched_mark_restart_hctx(hctx);
|
||||
if (blk_mq_dispatch_rq_list(hctx, &rq_list, 0)) {
|
||||
if (has_sched_dispatch)
|
||||
if (has_sched)
|
||||
ret = blk_mq_do_dispatch_sched(hctx);
|
||||
else
|
||||
ret = blk_mq_do_dispatch_ctx(hctx);
|
||||
}
|
||||
} else if (has_sched_dispatch) {
|
||||
} else if (has_sched) {
|
||||
ret = blk_mq_do_dispatch_sched(hctx);
|
||||
} else if (hctx->dispatch_busy) {
|
||||
/* dequeue request one by one from sw queue if queue is busy */
|
||||
@ -390,9 +399,10 @@ bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio,
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq)
|
||||
bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq,
|
||||
struct list_head *free)
|
||||
{
|
||||
return rq_mergeable(rq) && elv_attempt_insert_merge(q, rq);
|
||||
return rq_mergeable(rq) && elv_attempt_insert_merge(q, rq, free);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_mq_sched_try_insert_merge);
|
||||
|
||||
@ -453,7 +463,7 @@ void blk_mq_sched_insert_request(struct request *rq, bool at_head,
|
||||
goto run;
|
||||
}
|
||||
|
||||
if (e && e->type->ops.insert_requests) {
|
||||
if (e) {
|
||||
LIST_HEAD(list);
|
||||
|
||||
list_add(&rq->queuelist, &list);
|
||||
@ -484,9 +494,9 @@ void blk_mq_sched_insert_requests(struct blk_mq_hw_ctx *hctx,
|
||||
percpu_ref_get(&q->q_usage_counter);
|
||||
|
||||
e = hctx->queue->elevator;
|
||||
if (e && e->type->ops.insert_requests)
|
||||
if (e) {
|
||||
e->type->ops.insert_requests(hctx, list, false);
|
||||
else {
|
||||
} else {
|
||||
/*
|
||||
* try to issue requests directly if the hw queue isn't
|
||||
* busy in case of 'none' scheduler, and this way may save
|
||||
@ -509,11 +519,9 @@ static void blk_mq_sched_free_tags(struct blk_mq_tag_set *set,
|
||||
struct blk_mq_hw_ctx *hctx,
|
||||
unsigned int hctx_idx)
|
||||
{
|
||||
unsigned int flags = set->flags & ~BLK_MQ_F_TAG_HCTX_SHARED;
|
||||
|
||||
if (hctx->sched_tags) {
|
||||
blk_mq_free_rqs(set, hctx->sched_tags, hctx_idx);
|
||||
blk_mq_free_rq_map(hctx->sched_tags, flags);
|
||||
blk_mq_free_rq_map(hctx->sched_tags, set->flags);
|
||||
hctx->sched_tags = NULL;
|
||||
}
|
||||
}
|
||||
@ -523,12 +531,10 @@ static int blk_mq_sched_alloc_tags(struct request_queue *q,
|
||||
unsigned int hctx_idx)
|
||||
{
|
||||
struct blk_mq_tag_set *set = q->tag_set;
|
||||
/* Clear HCTX_SHARED so tags are init'ed */
|
||||
unsigned int flags = set->flags & ~BLK_MQ_F_TAG_HCTX_SHARED;
|
||||
int ret;
|
||||
|
||||
hctx->sched_tags = blk_mq_alloc_rq_map(set, hctx_idx, q->nr_requests,
|
||||
set->reserved_tags, flags);
|
||||
set->reserved_tags, set->flags);
|
||||
if (!hctx->sched_tags)
|
||||
return -ENOMEM;
|
||||
|
||||
@ -546,16 +552,50 @@ static void blk_mq_sched_tags_teardown(struct request_queue *q)
|
||||
int i;
|
||||
|
||||
queue_for_each_hw_ctx(q, hctx, i) {
|
||||
/* Clear HCTX_SHARED so tags are freed */
|
||||
unsigned int flags = hctx->flags & ~BLK_MQ_F_TAG_HCTX_SHARED;
|
||||
|
||||
if (hctx->sched_tags) {
|
||||
blk_mq_free_rq_map(hctx->sched_tags, flags);
|
||||
blk_mq_free_rq_map(hctx->sched_tags, hctx->flags);
|
||||
hctx->sched_tags = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int blk_mq_init_sched_shared_sbitmap(struct request_queue *queue)
|
||||
{
|
||||
struct blk_mq_tag_set *set = queue->tag_set;
|
||||
int alloc_policy = BLK_MQ_FLAG_TO_ALLOC_POLICY(set->flags);
|
||||
struct blk_mq_hw_ctx *hctx;
|
||||
int ret, i;
|
||||
|
||||
/*
|
||||
* Set initial depth at max so that we don't need to reallocate for
|
||||
* updating nr_requests.
|
||||
*/
|
||||
ret = blk_mq_init_bitmaps(&queue->sched_bitmap_tags,
|
||||
&queue->sched_breserved_tags,
|
||||
MAX_SCHED_RQ, set->reserved_tags,
|
||||
set->numa_node, alloc_policy);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
queue_for_each_hw_ctx(queue, hctx, i) {
|
||||
hctx->sched_tags->bitmap_tags =
|
||||
&queue->sched_bitmap_tags;
|
||||
hctx->sched_tags->breserved_tags =
|
||||
&queue->sched_breserved_tags;
|
||||
}
|
||||
|
||||
sbitmap_queue_resize(&queue->sched_bitmap_tags,
|
||||
queue->nr_requests - set->reserved_tags);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void blk_mq_exit_sched_shared_sbitmap(struct request_queue *queue)
|
||||
{
|
||||
sbitmap_queue_free(&queue->sched_bitmap_tags);
|
||||
sbitmap_queue_free(&queue->sched_breserved_tags);
|
||||
}
|
||||
|
||||
int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
|
||||
{
|
||||
struct blk_mq_hw_ctx *hctx;
|
||||
@ -580,12 +620,18 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
|
||||
queue_for_each_hw_ctx(q, hctx, i) {
|
||||
ret = blk_mq_sched_alloc_tags(q, hctx, i);
|
||||
if (ret)
|
||||
goto err;
|
||||
goto err_free_tags;
|
||||
}
|
||||
|
||||
if (blk_mq_is_sbitmap_shared(q->tag_set->flags)) {
|
||||
ret = blk_mq_init_sched_shared_sbitmap(q);
|
||||
if (ret)
|
||||
goto err_free_tags;
|
||||
}
|
||||
|
||||
ret = e->ops.init_sched(q, e);
|
||||
if (ret)
|
||||
goto err;
|
||||
goto err_free_sbitmap;
|
||||
|
||||
blk_mq_debugfs_register_sched(q);
|
||||
|
||||
@ -605,7 +651,10 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
|
||||
|
||||
return 0;
|
||||
|
||||
err:
|
||||
err_free_sbitmap:
|
||||
if (blk_mq_is_sbitmap_shared(q->tag_set->flags))
|
||||
blk_mq_exit_sched_shared_sbitmap(q);
|
||||
err_free_tags:
|
||||
blk_mq_sched_free_requests(q);
|
||||
blk_mq_sched_tags_teardown(q);
|
||||
q->elevator = NULL;
|
||||
@ -631,6 +680,7 @@ void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e)
|
||||
{
|
||||
struct blk_mq_hw_ctx *hctx;
|
||||
unsigned int i;
|
||||
unsigned int flags = 0;
|
||||
|
||||
queue_for_each_hw_ctx(q, hctx, i) {
|
||||
blk_mq_debugfs_unregister_sched_hctx(hctx);
|
||||
@ -638,10 +688,13 @@ void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e)
|
||||
e->type->ops.exit_hctx(hctx, i);
|
||||
hctx->sched_data = NULL;
|
||||
}
|
||||
flags = hctx->flags;
|
||||
}
|
||||
blk_mq_debugfs_unregister_sched(q);
|
||||
if (e->type->ops.exit_sched)
|
||||
e->type->ops.exit_sched(e);
|
||||
blk_mq_sched_tags_teardown(q);
|
||||
if (blk_mq_is_sbitmap_shared(flags))
|
||||
blk_mq_exit_sched_shared_sbitmap(q);
|
||||
q->elevator = NULL;
|
||||
}
|
||||
|
@ -5,13 +5,16 @@
|
||||
#include "blk-mq.h"
|
||||
#include "blk-mq-tag.h"
|
||||
|
||||
#define MAX_SCHED_RQ (16 * BLKDEV_MAX_RQ)
|
||||
|
||||
void blk_mq_sched_assign_ioc(struct request *rq);
|
||||
|
||||
bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio,
|
||||
unsigned int nr_segs, struct request **merged_request);
|
||||
bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio,
|
||||
unsigned int nr_segs);
|
||||
bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq);
|
||||
bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq,
|
||||
struct list_head *free);
|
||||
void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx);
|
||||
void blk_mq_sched_restart(struct blk_mq_hw_ctx *hctx);
|
||||
|
||||
|
@ -13,6 +13,7 @@
|
||||
#include <linux/delay.h>
|
||||
#include "blk.h"
|
||||
#include "blk-mq.h"
|
||||
#include "blk-mq-sched.h"
|
||||
#include "blk-mq-tag.h"
|
||||
|
||||
/*
|
||||
@ -199,6 +200,20 @@ struct bt_iter_data {
|
||||
bool reserved;
|
||||
};
|
||||
|
||||
static struct request *blk_mq_find_and_get_req(struct blk_mq_tags *tags,
|
||||
unsigned int bitnr)
|
||||
{
|
||||
struct request *rq;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&tags->lock, flags);
|
||||
rq = tags->rqs[bitnr];
|
||||
if (!rq || !refcount_inc_not_zero(&rq->ref))
|
||||
rq = NULL;
|
||||
spin_unlock_irqrestore(&tags->lock, flags);
|
||||
return rq;
|
||||
}
|
||||
|
||||
static bool bt_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
|
||||
{
|
||||
struct bt_iter_data *iter_data = data;
|
||||
@ -206,18 +221,22 @@ static bool bt_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
|
||||
struct blk_mq_tags *tags = hctx->tags;
|
||||
bool reserved = iter_data->reserved;
|
||||
struct request *rq;
|
||||
bool ret = true;
|
||||
|
||||
if (!reserved)
|
||||
bitnr += tags->nr_reserved_tags;
|
||||
rq = tags->rqs[bitnr];
|
||||
|
||||
/*
|
||||
* We can hit rq == NULL here, because the tagging functions
|
||||
* test and set the bit before assigning ->rqs[].
|
||||
*/
|
||||
if (rq && rq->q == hctx->queue && rq->mq_hctx == hctx)
|
||||
return iter_data->fn(hctx, rq, iter_data->data, reserved);
|
||||
return true;
|
||||
rq = blk_mq_find_and_get_req(tags, bitnr);
|
||||
if (!rq)
|
||||
return true;
|
||||
|
||||
if (rq->q == hctx->queue && rq->mq_hctx == hctx)
|
||||
ret = iter_data->fn(hctx, rq, iter_data->data, reserved);
|
||||
blk_mq_put_rq_ref(rq);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -264,6 +283,8 @@ static bool bt_tags_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
|
||||
struct blk_mq_tags *tags = iter_data->tags;
|
||||
bool reserved = iter_data->flags & BT_TAG_ITER_RESERVED;
|
||||
struct request *rq;
|
||||
bool ret = true;
|
||||
bool iter_static_rqs = !!(iter_data->flags & BT_TAG_ITER_STATIC_RQS);
|
||||
|
||||
if (!reserved)
|
||||
bitnr += tags->nr_reserved_tags;
|
||||
@ -272,16 +293,19 @@ static bool bt_tags_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
|
||||
* We can hit rq == NULL here, because the tagging functions
|
||||
* test and set the bit before assigning ->rqs[].
|
||||
*/
|
||||
if (iter_data->flags & BT_TAG_ITER_STATIC_RQS)
|
||||
if (iter_static_rqs)
|
||||
rq = tags->static_rqs[bitnr];
|
||||
else
|
||||
rq = tags->rqs[bitnr];
|
||||
rq = blk_mq_find_and_get_req(tags, bitnr);
|
||||
if (!rq)
|
||||
return true;
|
||||
if ((iter_data->flags & BT_TAG_ITER_STARTED) &&
|
||||
!blk_mq_request_started(rq))
|
||||
return true;
|
||||
return iter_data->fn(rq, iter_data->data, reserved);
|
||||
|
||||
if (!(iter_data->flags & BT_TAG_ITER_STARTED) ||
|
||||
blk_mq_request_started(rq))
|
||||
ret = iter_data->fn(rq, iter_data->data, reserved);
|
||||
if (!iter_static_rqs)
|
||||
blk_mq_put_rq_ref(rq);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -348,6 +372,9 @@ void blk_mq_all_tag_iter(struct blk_mq_tags *tags, busy_tag_iter_fn *fn,
|
||||
* indicates whether or not @rq is a reserved request. Return
|
||||
* true to continue iterating tags, false to stop.
|
||||
* @priv: Will be passed as second argument to @fn.
|
||||
*
|
||||
* We grab one request reference before calling @fn and release it after
|
||||
* @fn returns.
|
||||
*/
|
||||
void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset,
|
||||
busy_tag_iter_fn *fn, void *priv)
|
||||
@ -445,39 +472,54 @@ static int bt_alloc(struct sbitmap_queue *bt, unsigned int depth,
|
||||
node);
|
||||
}
|
||||
|
||||
int blk_mq_init_bitmaps(struct sbitmap_queue *bitmap_tags,
|
||||
struct sbitmap_queue *breserved_tags,
|
||||
unsigned int queue_depth, unsigned int reserved,
|
||||
int node, int alloc_policy)
|
||||
{
|
||||
unsigned int depth = queue_depth - reserved;
|
||||
bool round_robin = alloc_policy == BLK_TAG_ALLOC_RR;
|
||||
|
||||
if (bt_alloc(bitmap_tags, depth, round_robin, node))
|
||||
return -ENOMEM;
|
||||
if (bt_alloc(breserved_tags, reserved, round_robin, node))
|
||||
goto free_bitmap_tags;
|
||||
|
||||
return 0;
|
||||
|
||||
free_bitmap_tags:
|
||||
sbitmap_queue_free(bitmap_tags);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
static int blk_mq_init_bitmap_tags(struct blk_mq_tags *tags,
|
||||
int node, int alloc_policy)
|
||||
{
|
||||
unsigned int depth = tags->nr_tags - tags->nr_reserved_tags;
|
||||
bool round_robin = alloc_policy == BLK_TAG_ALLOC_RR;
|
||||
int ret;
|
||||
|
||||
if (bt_alloc(&tags->__bitmap_tags, depth, round_robin, node))
|
||||
return -ENOMEM;
|
||||
if (bt_alloc(&tags->__breserved_tags, tags->nr_reserved_tags,
|
||||
round_robin, node))
|
||||
goto free_bitmap_tags;
|
||||
ret = blk_mq_init_bitmaps(&tags->__bitmap_tags,
|
||||
&tags->__breserved_tags,
|
||||
tags->nr_tags, tags->nr_reserved_tags,
|
||||
node, alloc_policy);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
tags->bitmap_tags = &tags->__bitmap_tags;
|
||||
tags->breserved_tags = &tags->__breserved_tags;
|
||||
|
||||
return 0;
|
||||
free_bitmap_tags:
|
||||
sbitmap_queue_free(&tags->__bitmap_tags);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
int blk_mq_init_shared_sbitmap(struct blk_mq_tag_set *set, unsigned int flags)
|
||||
int blk_mq_init_shared_sbitmap(struct blk_mq_tag_set *set)
|
||||
{
|
||||
unsigned int depth = set->queue_depth - set->reserved_tags;
|
||||
int alloc_policy = BLK_MQ_FLAG_TO_ALLOC_POLICY(set->flags);
|
||||
bool round_robin = alloc_policy == BLK_TAG_ALLOC_RR;
|
||||
int i, node = set->numa_node;
|
||||
int i, ret;
|
||||
|
||||
if (bt_alloc(&set->__bitmap_tags, depth, round_robin, node))
|
||||
return -ENOMEM;
|
||||
if (bt_alloc(&set->__breserved_tags, set->reserved_tags,
|
||||
round_robin, node))
|
||||
goto free_bitmap_tags;
|
||||
ret = blk_mq_init_bitmaps(&set->__bitmap_tags, &set->__breserved_tags,
|
||||
set->queue_depth, set->reserved_tags,
|
||||
set->numa_node, alloc_policy);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
for (i = 0; i < set->nr_hw_queues; i++) {
|
||||
struct blk_mq_tags *tags = set->tags[i];
|
||||
@ -487,9 +529,6 @@ int blk_mq_init_shared_sbitmap(struct blk_mq_tag_set *set, unsigned int flags)
|
||||
}
|
||||
|
||||
return 0;
|
||||
free_bitmap_tags:
|
||||
sbitmap_queue_free(&set->__bitmap_tags);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
void blk_mq_exit_shared_sbitmap(struct blk_mq_tag_set *set)
|
||||
@ -516,6 +555,7 @@ struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags,
|
||||
|
||||
tags->nr_tags = total_tags;
|
||||
tags->nr_reserved_tags = reserved_tags;
|
||||
spin_lock_init(&tags->lock);
|
||||
|
||||
if (blk_mq_is_sbitmap_shared(flags))
|
||||
return tags;
|
||||
@ -551,8 +591,6 @@ int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx,
|
||||
*/
|
||||
if (tdepth > tags->nr_tags) {
|
||||
struct blk_mq_tag_set *set = hctx->queue->tag_set;
|
||||
/* Only sched tags can grow, so clear HCTX_SHARED flag */
|
||||
unsigned int flags = set->flags & ~BLK_MQ_F_TAG_HCTX_SHARED;
|
||||
struct blk_mq_tags *new;
|
||||
bool ret;
|
||||
|
||||
@ -563,21 +601,21 @@ int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx,
|
||||
* We need some sort of upper limit, set it high enough that
|
||||
* no valid use cases should require more.
|
||||
*/
|
||||
if (tdepth > 16 * BLKDEV_MAX_RQ)
|
||||
if (tdepth > MAX_SCHED_RQ)
|
||||
return -EINVAL;
|
||||
|
||||
new = blk_mq_alloc_rq_map(set, hctx->queue_num, tdepth,
|
||||
tags->nr_reserved_tags, flags);
|
||||
tags->nr_reserved_tags, set->flags);
|
||||
if (!new)
|
||||
return -ENOMEM;
|
||||
ret = blk_mq_alloc_rqs(set, new, hctx->queue_num, tdepth);
|
||||
if (ret) {
|
||||
blk_mq_free_rq_map(new, flags);
|
||||
blk_mq_free_rq_map(new, set->flags);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
blk_mq_free_rqs(set, *tagsptr, hctx->queue_num);
|
||||
blk_mq_free_rq_map(*tagsptr, flags);
|
||||
blk_mq_free_rq_map(*tagsptr, set->flags);
|
||||
*tagsptr = new;
|
||||
} else {
|
||||
/*
|
||||
|
@ -20,17 +20,26 @@ struct blk_mq_tags {
|
||||
struct request **rqs;
|
||||
struct request **static_rqs;
|
||||
struct list_head page_list;
|
||||
|
||||
/*
|
||||
* used to clear request reference in rqs[] before freeing one
|
||||
* request pool
|
||||
*/
|
||||
spinlock_t lock;
|
||||
};
|
||||
|
||||
extern struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags,
|
||||
unsigned int reserved_tags,
|
||||
int node, unsigned int flags);
|
||||
extern void blk_mq_free_tags(struct blk_mq_tags *tags, unsigned int flags);
|
||||
extern int blk_mq_init_bitmaps(struct sbitmap_queue *bitmap_tags,
|
||||
struct sbitmap_queue *breserved_tags,
|
||||
unsigned int queue_depth,
|
||||
unsigned int reserved,
|
||||
int node, int alloc_policy);
|
||||
|
||||
extern int blk_mq_init_shared_sbitmap(struct blk_mq_tag_set *set,
|
||||
unsigned int flags);
|
||||
extern int blk_mq_init_shared_sbitmap(struct blk_mq_tag_set *set);
|
||||
extern void blk_mq_exit_shared_sbitmap(struct blk_mq_tag_set *set);
|
||||
|
||||
extern unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data);
|
||||
extern void blk_mq_put_tag(struct blk_mq_tags *tags, struct blk_mq_ctx *ctx,
|
||||
unsigned int tag);
|
||||
|
206
block/blk-mq.c
206
block/blk-mq.c
@ -909,6 +909,14 @@ static bool blk_mq_req_expired(struct request *rq, unsigned long *next)
|
||||
return false;
|
||||
}
|
||||
|
||||
void blk_mq_put_rq_ref(struct request *rq)
|
||||
{
|
||||
if (is_flush_rq(rq, rq->mq_hctx))
|
||||
rq->end_io(rq, 0);
|
||||
else if (refcount_dec_and_test(&rq->ref))
|
||||
__blk_mq_free_request(rq);
|
||||
}
|
||||
|
||||
static bool blk_mq_check_expired(struct blk_mq_hw_ctx *hctx,
|
||||
struct request *rq, void *priv, bool reserved)
|
||||
{
|
||||
@ -942,11 +950,7 @@ static bool blk_mq_check_expired(struct blk_mq_hw_ctx *hctx,
|
||||
if (blk_mq_req_expired(rq, next))
|
||||
blk_mq_rq_timed_out(rq, reserved);
|
||||
|
||||
if (is_flush_rq(rq, hctx))
|
||||
rq->end_io(rq, 0);
|
||||
else if (refcount_dec_and_test(&rq->ref))
|
||||
__blk_mq_free_request(rq);
|
||||
|
||||
blk_mq_put_rq_ref(rq);
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -1100,7 +1104,7 @@ static bool __blk_mq_get_driver_tag(struct request *rq)
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool blk_mq_get_driver_tag(struct request *rq)
|
||||
bool blk_mq_get_driver_tag(struct request *rq)
|
||||
{
|
||||
struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
|
||||
|
||||
@ -1220,9 +1224,6 @@ static void blk_mq_update_dispatch_busy(struct blk_mq_hw_ctx *hctx, bool busy)
|
||||
{
|
||||
unsigned int ewma;
|
||||
|
||||
if (hctx->queue->elevator)
|
||||
return;
|
||||
|
||||
ewma = hctx->dispatch_busy;
|
||||
|
||||
if (!ewma && !busy)
|
||||
@ -2303,6 +2304,45 @@ queue_exit:
|
||||
return BLK_QC_T_NONE;
|
||||
}
|
||||
|
||||
static size_t order_to_size(unsigned int order)
|
||||
{
|
||||
return (size_t)PAGE_SIZE << order;
|
||||
}
|
||||
|
||||
/* called before freeing request pool in @tags */
|
||||
static void blk_mq_clear_rq_mapping(struct blk_mq_tag_set *set,
|
||||
struct blk_mq_tags *tags, unsigned int hctx_idx)
|
||||
{
|
||||
struct blk_mq_tags *drv_tags = set->tags[hctx_idx];
|
||||
struct page *page;
|
||||
unsigned long flags;
|
||||
|
||||
list_for_each_entry(page, &tags->page_list, lru) {
|
||||
unsigned long start = (unsigned long)page_address(page);
|
||||
unsigned long end = start + order_to_size(page->private);
|
||||
int i;
|
||||
|
||||
for (i = 0; i < set->queue_depth; i++) {
|
||||
struct request *rq = drv_tags->rqs[i];
|
||||
unsigned long rq_addr = (unsigned long)rq;
|
||||
|
||||
if (rq_addr >= start && rq_addr < end) {
|
||||
WARN_ON_ONCE(refcount_read(&rq->ref) != 0);
|
||||
cmpxchg(&drv_tags->rqs[i], rq, NULL);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Wait until all pending iteration is done.
|
||||
*
|
||||
* Request reference is cleared and it is guaranteed to be observed
|
||||
* after the ->lock is released.
|
||||
*/
|
||||
spin_lock_irqsave(&drv_tags->lock, flags);
|
||||
spin_unlock_irqrestore(&drv_tags->lock, flags);
|
||||
}
|
||||
|
||||
void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
|
||||
unsigned int hctx_idx)
|
||||
{
|
||||
@ -2321,6 +2361,8 @@ void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
|
||||
}
|
||||
}
|
||||
|
||||
blk_mq_clear_rq_mapping(set, tags, hctx_idx);
|
||||
|
||||
while (!list_empty(&tags->page_list)) {
|
||||
page = list_first_entry(&tags->page_list, struct page, lru);
|
||||
list_del_init(&page->lru);
|
||||
@ -2380,11 +2422,6 @@ struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set,
|
||||
return tags;
|
||||
}
|
||||
|
||||
static size_t order_to_size(unsigned int order)
|
||||
{
|
||||
return (size_t)PAGE_SIZE << order;
|
||||
}
|
||||
|
||||
static int blk_mq_init_request(struct blk_mq_tag_set *set, struct request *rq,
|
||||
unsigned int hctx_idx, int node)
|
||||
{
|
||||
@ -2603,16 +2640,49 @@ static void blk_mq_remove_cpuhp(struct blk_mq_hw_ctx *hctx)
|
||||
&hctx->cpuhp_dead);
|
||||
}
|
||||
|
||||
/*
|
||||
* Before freeing hw queue, clearing the flush request reference in
|
||||
* tags->rqs[] for avoiding potential UAF.
|
||||
*/
|
||||
static void blk_mq_clear_flush_rq_mapping(struct blk_mq_tags *tags,
|
||||
unsigned int queue_depth, struct request *flush_rq)
|
||||
{
|
||||
int i;
|
||||
unsigned long flags;
|
||||
|
||||
/* The hw queue may not be mapped yet */
|
||||
if (!tags)
|
||||
return;
|
||||
|
||||
WARN_ON_ONCE(refcount_read(&flush_rq->ref) != 0);
|
||||
|
||||
for (i = 0; i < queue_depth; i++)
|
||||
cmpxchg(&tags->rqs[i], flush_rq, NULL);
|
||||
|
||||
/*
|
||||
* Wait until all pending iteration is done.
|
||||
*
|
||||
* Request reference is cleared and it is guaranteed to be observed
|
||||
* after the ->lock is released.
|
||||
*/
|
||||
spin_lock_irqsave(&tags->lock, flags);
|
||||
spin_unlock_irqrestore(&tags->lock, flags);
|
||||
}
|
||||
|
||||
/* hctx->ctxs will be freed in queue's release handler */
|
||||
static void blk_mq_exit_hctx(struct request_queue *q,
|
||||
struct blk_mq_tag_set *set,
|
||||
struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
|
||||
{
|
||||
struct request *flush_rq = hctx->fq->flush_rq;
|
||||
|
||||
if (blk_mq_hw_queue_mapped(hctx))
|
||||
blk_mq_tag_idle(hctx);
|
||||
|
||||
blk_mq_clear_flush_rq_mapping(set->tags[hctx_idx],
|
||||
set->queue_depth, flush_rq);
|
||||
if (set->ops->exit_request)
|
||||
set->ops->exit_request(set, hctx->fq->flush_rq, hctx_idx);
|
||||
set->ops->exit_request(set, flush_rq, hctx_idx);
|
||||
|
||||
if (set->ops->exit_hctx)
|
||||
set->ops->exit_hctx(hctx, hctx_idx);
|
||||
@ -3042,21 +3112,18 @@ void blk_mq_release(struct request_queue *q)
|
||||
struct request_queue *blk_mq_init_queue_data(struct blk_mq_tag_set *set,
|
||||
void *queuedata)
|
||||
{
|
||||
struct request_queue *uninit_q, *q;
|
||||
struct request_queue *q;
|
||||
int ret;
|
||||
|
||||
uninit_q = blk_alloc_queue(set->numa_node);
|
||||
if (!uninit_q)
|
||||
q = blk_alloc_queue(set->numa_node);
|
||||
if (!q)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
uninit_q->queuedata = queuedata;
|
||||
|
||||
/*
|
||||
* Initialize the queue without an elevator. device_add_disk() will do
|
||||
* the initialization.
|
||||
*/
|
||||
q = blk_mq_init_allocated_queue(set, uninit_q, false);
|
||||
if (IS_ERR(q))
|
||||
blk_cleanup_queue(uninit_q);
|
||||
|
||||
q->queuedata = queuedata;
|
||||
ret = blk_mq_init_allocated_queue(set, q);
|
||||
if (ret) {
|
||||
blk_cleanup_queue(q);
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
return q;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_mq_init_queue_data);
|
||||
@ -3067,39 +3134,24 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
|
||||
}
|
||||
EXPORT_SYMBOL(blk_mq_init_queue);
|
||||
|
||||
/*
|
||||
* Helper for setting up a queue with mq ops, given queue depth, and
|
||||
* the passed in mq ops flags.
|
||||
*/
|
||||
struct request_queue *blk_mq_init_sq_queue(struct blk_mq_tag_set *set,
|
||||
const struct blk_mq_ops *ops,
|
||||
unsigned int queue_depth,
|
||||
unsigned int set_flags)
|
||||
struct gendisk *__blk_mq_alloc_disk(struct blk_mq_tag_set *set, void *queuedata)
|
||||
{
|
||||
struct request_queue *q;
|
||||
int ret;
|
||||
struct gendisk *disk;
|
||||
|
||||
memset(set, 0, sizeof(*set));
|
||||
set->ops = ops;
|
||||
set->nr_hw_queues = 1;
|
||||
set->nr_maps = 1;
|
||||
set->queue_depth = queue_depth;
|
||||
set->numa_node = NUMA_NO_NODE;
|
||||
set->flags = set_flags;
|
||||
q = blk_mq_init_queue_data(set, queuedata);
|
||||
if (IS_ERR(q))
|
||||
return ERR_CAST(q);
|
||||
|
||||
ret = blk_mq_alloc_tag_set(set);
|
||||
if (ret)
|
||||
return ERR_PTR(ret);
|
||||
|
||||
q = blk_mq_init_queue(set);
|
||||
if (IS_ERR(q)) {
|
||||
blk_mq_free_tag_set(set);
|
||||
return q;
|
||||
disk = __alloc_disk_node(0, set->numa_node);
|
||||
if (!disk) {
|
||||
blk_cleanup_queue(q);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
return q;
|
||||
disk->queue = q;
|
||||
return disk;
|
||||
}
|
||||
EXPORT_SYMBOL(blk_mq_init_sq_queue);
|
||||
EXPORT_SYMBOL(__blk_mq_alloc_disk);
|
||||
|
||||
static struct blk_mq_hw_ctx *blk_mq_alloc_and_init_hctx(
|
||||
struct blk_mq_tag_set *set, struct request_queue *q,
|
||||
@ -3212,9 +3264,8 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
|
||||
mutex_unlock(&q->sysfs_lock);
|
||||
}
|
||||
|
||||
struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
|
||||
struct request_queue *q,
|
||||
bool elevator_init)
|
||||
int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
|
||||
struct request_queue *q)
|
||||
{
|
||||
/* mark the queue as mq asap */
|
||||
q->mq_ops = set->ops;
|
||||
@ -3264,11 +3315,7 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
|
||||
blk_mq_init_cpu_queues(q, set->nr_hw_queues);
|
||||
blk_mq_add_queue_tag_set(set, q);
|
||||
blk_mq_map_swqueue(q);
|
||||
|
||||
if (elevator_init)
|
||||
elevator_init_mq(q);
|
||||
|
||||
return q;
|
||||
return 0;
|
||||
|
||||
err_hctxs:
|
||||
kfree(q->queue_hw_ctx);
|
||||
@ -3279,7 +3326,7 @@ err_poll:
|
||||
q->poll_cb = NULL;
|
||||
err_exit:
|
||||
q->mq_ops = NULL;
|
||||
return ERR_PTR(-ENOMEM);
|
||||
return -ENOMEM;
|
||||
}
|
||||
EXPORT_SYMBOL(blk_mq_init_allocated_queue);
|
||||
|
||||
@ -3491,7 +3538,7 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
|
||||
if (blk_mq_is_sbitmap_shared(set->flags)) {
|
||||
atomic_set(&set->active_queues_shared_sbitmap, 0);
|
||||
|
||||
if (blk_mq_init_shared_sbitmap(set, set->flags)) {
|
||||
if (blk_mq_init_shared_sbitmap(set)) {
|
||||
ret = -ENOMEM;
|
||||
goto out_free_mq_rq_maps;
|
||||
}
|
||||
@ -3516,6 +3563,22 @@ out_free_mq_map:
|
||||
}
|
||||
EXPORT_SYMBOL(blk_mq_alloc_tag_set);
|
||||
|
||||
/* allocate and initialize a tagset for a simple single-queue device */
|
||||
int blk_mq_alloc_sq_tag_set(struct blk_mq_tag_set *set,
|
||||
const struct blk_mq_ops *ops, unsigned int queue_depth,
|
||||
unsigned int set_flags)
|
||||
{
|
||||
memset(set, 0, sizeof(*set));
|
||||
set->ops = ops;
|
||||
set->nr_hw_queues = 1;
|
||||
set->nr_maps = 1;
|
||||
set->queue_depth = queue_depth;
|
||||
set->numa_node = NUMA_NO_NODE;
|
||||
set->flags = set_flags;
|
||||
return blk_mq_alloc_tag_set(set);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_mq_alloc_sq_tag_set);
|
||||
|
||||
void blk_mq_free_tag_set(struct blk_mq_tag_set *set)
|
||||
{
|
||||
int i, j;
|
||||
@ -3567,15 +3630,24 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr)
|
||||
} else {
|
||||
ret = blk_mq_tag_update_depth(hctx, &hctx->sched_tags,
|
||||
nr, true);
|
||||
if (blk_mq_is_sbitmap_shared(set->flags)) {
|
||||
hctx->sched_tags->bitmap_tags =
|
||||
&q->sched_bitmap_tags;
|
||||
hctx->sched_tags->breserved_tags =
|
||||
&q->sched_breserved_tags;
|
||||
}
|
||||
}
|
||||
if (ret)
|
||||
break;
|
||||
if (q->elevator && q->elevator->type->ops.depth_updated)
|
||||
q->elevator->type->ops.depth_updated(hctx);
|
||||
}
|
||||
|
||||
if (!ret)
|
||||
if (!ret) {
|
||||
q->nr_requests = nr;
|
||||
if (q->elevator && blk_mq_is_sbitmap_shared(set->flags))
|
||||
sbitmap_queue_resize(&q->sched_bitmap_tags,
|
||||
nr - set->reserved_tags);
|
||||
}
|
||||
|
||||
blk_mq_unquiesce_queue(q);
|
||||
blk_mq_unfreeze_queue(q);
|
||||
|
@ -47,6 +47,7 @@ void blk_mq_add_to_requeue_list(struct request *rq, bool at_head,
|
||||
void blk_mq_flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list);
|
||||
struct request *blk_mq_dequeue_from_ctx(struct blk_mq_hw_ctx *hctx,
|
||||
struct blk_mq_ctx *start);
|
||||
void blk_mq_put_rq_ref(struct request *rq);
|
||||
|
||||
/*
|
||||
* Internal helpers for allocating/freeing the request map
|
||||
@ -259,6 +260,8 @@ static inline void blk_mq_put_driver_tag(struct request *rq)
|
||||
__blk_mq_put_driver_tag(rq->mq_hctx, rq);
|
||||
}
|
||||
|
||||
bool blk_mq_get_driver_tag(struct request *rq);
|
||||
|
||||
static inline void blk_mq_clear_mq_map(struct blk_mq_queue_map *qmap)
|
||||
{
|
||||
int cpu;
|
||||
@ -299,6 +302,17 @@ static inline struct blk_plug *blk_mq_plug(struct request_queue *q,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Free all requests on the list */
|
||||
static inline void blk_mq_free_requests(struct list_head *list)
|
||||
{
|
||||
while (!list_empty(list)) {
|
||||
struct request *rq = list_entry_rq(list->next);
|
||||
|
||||
list_del_init(&rq->queuelist);
|
||||
blk_mq_free_request(rq);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* For shared tag users, we track the number of currently active users
|
||||
* and attempt to provide a fair share of the tag depth for each of them.
|
||||
|
@ -266,8 +266,8 @@ void rq_qos_wait(struct rq_wait *rqw, void *private_data,
|
||||
if (!has_sleeper && acquire_inflight_cb(rqw, private_data))
|
||||
return;
|
||||
|
||||
prepare_to_wait_exclusive(&rqw->wait, &data.wq, TASK_UNINTERRUPTIBLE);
|
||||
has_sleeper = !wq_has_single_sleeper(&rqw->wait);
|
||||
has_sleeper = !prepare_to_wait_exclusive(&rqw->wait, &data.wq,
|
||||
TASK_UNINTERRUPTIBLE);
|
||||
do {
|
||||
/* The memory barrier in set_task_state saves us here. */
|
||||
if (data.got_token)
|
||||
|
@ -7,6 +7,7 @@
|
||||
#include <linux/blk_types.h>
|
||||
#include <linux/atomic.h>
|
||||
#include <linux/wait.h>
|
||||
#include <linux/blk-mq.h>
|
||||
|
||||
#include "blk-mq-debugfs.h"
|
||||
|
||||
@ -16,6 +17,7 @@ enum rq_qos_id {
|
||||
RQ_QOS_WBT,
|
||||
RQ_QOS_LATENCY,
|
||||
RQ_QOS_COST,
|
||||
RQ_QOS_IOPRIO,
|
||||
};
|
||||
|
||||
struct rq_wait {
|
||||
@ -78,19 +80,6 @@ static inline struct rq_qos *blkcg_rq_qos(struct request_queue *q)
|
||||
return rq_qos_id(q, RQ_QOS_LATENCY);
|
||||
}
|
||||
|
||||
static inline const char *rq_qos_id_to_name(enum rq_qos_id id)
|
||||
{
|
||||
switch (id) {
|
||||
case RQ_QOS_WBT:
|
||||
return "wbt";
|
||||
case RQ_QOS_LATENCY:
|
||||
return "latency";
|
||||
case RQ_QOS_COST:
|
||||
return "cost";
|
||||
}
|
||||
return "unknown";
|
||||
}
|
||||
|
||||
static inline void rq_wait_init(struct rq_wait *rq_wait)
|
||||
{
|
||||
atomic_set(&rq_wait->inflight, 0);
|
||||
@ -99,8 +88,21 @@ static inline void rq_wait_init(struct rq_wait *rq_wait)
|
||||
|
||||
static inline void rq_qos_add(struct request_queue *q, struct rq_qos *rqos)
|
||||
{
|
||||
/*
|
||||
* No IO can be in-flight when adding rqos, so freeze queue, which
|
||||
* is fine since we only support rq_qos for blk-mq queue.
|
||||
*
|
||||
* Reuse ->queue_lock for protecting against other concurrent
|
||||
* rq_qos adding/deleting
|
||||
*/
|
||||
blk_mq_freeze_queue(q);
|
||||
|
||||
spin_lock_irq(&q->queue_lock);
|
||||
rqos->next = q->rq_qos;
|
||||
q->rq_qos = rqos;
|
||||
spin_unlock_irq(&q->queue_lock);
|
||||
|
||||
blk_mq_unfreeze_queue(q);
|
||||
|
||||
if (rqos->ops->debugfs_attrs)
|
||||
blk_mq_debugfs_register_rqos(rqos);
|
||||
@ -110,12 +112,22 @@ static inline void rq_qos_del(struct request_queue *q, struct rq_qos *rqos)
|
||||
{
|
||||
struct rq_qos **cur;
|
||||
|
||||
/*
|
||||
* See comment in rq_qos_add() about freezing queue & using
|
||||
* ->queue_lock.
|
||||
*/
|
||||
blk_mq_freeze_queue(q);
|
||||
|
||||
spin_lock_irq(&q->queue_lock);
|
||||
for (cur = &q->rq_qos; *cur; cur = &(*cur)->next) {
|
||||
if (*cur == rqos) {
|
||||
*cur = rqos->next;
|
||||
break;
|
||||
}
|
||||
}
|
||||
spin_unlock_irq(&q->queue_lock);
|
||||
|
||||
blk_mq_unfreeze_queue(q);
|
||||
|
||||
blk_mq_debugfs_unregister_rqos(rqos);
|
||||
}
|
||||
|
@ -91,7 +91,7 @@ static ssize_t queue_ra_show(struct request_queue *q, char *page)
|
||||
unsigned long ra_kb = q->backing_dev_info->ra_pages <<
|
||||
(PAGE_SHIFT - 10);
|
||||
|
||||
return queue_var_show(ra_kb, (page));
|
||||
return queue_var_show(ra_kb, page);
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
@ -112,28 +112,28 @@ static ssize_t queue_max_sectors_show(struct request_queue *q, char *page)
|
||||
{
|
||||
int max_sectors_kb = queue_max_sectors(q) >> 1;
|
||||
|
||||
return queue_var_show(max_sectors_kb, (page));
|
||||
return queue_var_show(max_sectors_kb, page);
|
||||
}
|
||||
|
||||
static ssize_t queue_max_segments_show(struct request_queue *q, char *page)
|
||||
{
|
||||
return queue_var_show(queue_max_segments(q), (page));
|
||||
return queue_var_show(queue_max_segments(q), page);
|
||||
}
|
||||
|
||||
static ssize_t queue_max_discard_segments_show(struct request_queue *q,
|
||||
char *page)
|
||||
{
|
||||
return queue_var_show(queue_max_discard_segments(q), (page));
|
||||
return queue_var_show(queue_max_discard_segments(q), page);
|
||||
}
|
||||
|
||||
static ssize_t queue_max_integrity_segments_show(struct request_queue *q, char *page)
|
||||
{
|
||||
return queue_var_show(q->limits.max_integrity_segments, (page));
|
||||
return queue_var_show(q->limits.max_integrity_segments, page);
|
||||
}
|
||||
|
||||
static ssize_t queue_max_segment_size_show(struct request_queue *q, char *page)
|
||||
{
|
||||
return queue_var_show(queue_max_segment_size(q), (page));
|
||||
return queue_var_show(queue_max_segment_size(q), page);
|
||||
}
|
||||
|
||||
static ssize_t queue_logical_block_size_show(struct request_queue *q, char *page)
|
||||
@ -261,12 +261,12 @@ static ssize_t queue_max_hw_sectors_show(struct request_queue *q, char *page)
|
||||
{
|
||||
int max_hw_sectors_kb = queue_max_hw_sectors(q) >> 1;
|
||||
|
||||
return queue_var_show(max_hw_sectors_kb, (page));
|
||||
return queue_var_show(max_hw_sectors_kb, page);
|
||||
}
|
||||
|
||||
static ssize_t queue_virt_boundary_mask_show(struct request_queue *q, char *page)
|
||||
{
|
||||
return queue_var_show(q->limits.virt_boundary_mask, (page));
|
||||
return queue_var_show(q->limits.virt_boundary_mask, page);
|
||||
}
|
||||
|
||||
#define QUEUE_SYSFS_BIT_FNS(name, flag, neg) \
|
||||
@ -866,20 +866,6 @@ int blk_register_queue(struct gendisk *disk)
|
||||
"%s is registering an already registered queue\n",
|
||||
kobject_name(&dev->kobj));
|
||||
|
||||
/*
|
||||
* SCSI probing may synchronously create and destroy a lot of
|
||||
* request_queues for non-existent devices. Shutting down a fully
|
||||
* functional queue takes measureable wallclock time as RCU grace
|
||||
* periods are involved. To avoid excessive latency in these
|
||||
* cases, a request_queue starts out in a degraded mode which is
|
||||
* faster to shut down and is made fully functional here as
|
||||
* request_queues for non-existent devices never get registered.
|
||||
*/
|
||||
if (!blk_queue_init_done(q)) {
|
||||
blk_queue_flag_set(QUEUE_FLAG_INIT_DONE, q);
|
||||
percpu_ref_switch_to_percpu(&q->q_usage_counter);
|
||||
}
|
||||
|
||||
blk_queue_update_readahead(q);
|
||||
|
||||
ret = blk_trace_init_sysfs(dev);
|
||||
@ -938,6 +924,21 @@ int blk_register_queue(struct gendisk *disk)
|
||||
ret = 0;
|
||||
unlock:
|
||||
mutex_unlock(&q->sysfs_dir_lock);
|
||||
|
||||
/*
|
||||
* SCSI probing may synchronously create and destroy a lot of
|
||||
* request_queues for non-existent devices. Shutting down a fully
|
||||
* functional queue takes measureable wallclock time as RCU grace
|
||||
* periods are involved. To avoid excessive latency in these
|
||||
* cases, a request_queue starts out in a degraded mode which is
|
||||
* faster to shut down and is made fully functional here as
|
||||
* request_queues for non-existent devices never get registered.
|
||||
*/
|
||||
if (!blk_queue_init_done(q)) {
|
||||
blk_queue_flag_set(QUEUE_FLAG_INIT_DONE, q);
|
||||
percpu_ref_switch_to_percpu(&q->q_usage_counter);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_register_queue);
|
||||
|
@ -77,7 +77,8 @@ enum {
|
||||
|
||||
static inline bool rwb_enabled(struct rq_wb *rwb)
|
||||
{
|
||||
return rwb && rwb->wb_normal != 0;
|
||||
return rwb && rwb->enable_state != WBT_STATE_OFF_DEFAULT &&
|
||||
rwb->wb_normal != 0;
|
||||
}
|
||||
|
||||
static void wb_timestamp(struct rq_wb *rwb, unsigned long *var)
|
||||
@ -563,7 +564,6 @@ static void wbt_cleanup(struct rq_qos *rqos, struct bio *bio)
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns true if the IO request should be accounted, false if not.
|
||||
* May sleep, if we have exceeded the writeback limits. Caller can pass
|
||||
* in an irq held spinlock, if it holds one when calling this function.
|
||||
* If we do sleep, we'll release and re-grab it.
|
||||
@ -636,9 +636,13 @@ void wbt_set_write_cache(struct request_queue *q, bool write_cache_on)
|
||||
void wbt_enable_default(struct request_queue *q)
|
||||
{
|
||||
struct rq_qos *rqos = wbt_rq_qos(q);
|
||||
|
||||
/* Throttling already enabled? */
|
||||
if (rqos)
|
||||
if (rqos) {
|
||||
if (RQWB(rqos)->enable_state == WBT_STATE_OFF_DEFAULT)
|
||||
RQWB(rqos)->enable_state = WBT_STATE_ON_DEFAULT;
|
||||
return;
|
||||
}
|
||||
|
||||
/* Queue not registered? Maybe shutting down... */
|
||||
if (!blk_queue_registered(q))
|
||||
@ -702,7 +706,7 @@ void wbt_disable_default(struct request_queue *q)
|
||||
rwb = RQWB(rqos);
|
||||
if (rwb->enable_state == WBT_STATE_ON_DEFAULT) {
|
||||
blk_stat_deactivate(rwb->cb);
|
||||
rwb->wb_normal = 0;
|
||||
rwb->enable_state = WBT_STATE_OFF_DEFAULT;
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(wbt_disable_default);
|
||||
|
@ -34,6 +34,7 @@ enum {
|
||||
enum {
|
||||
WBT_STATE_ON_DEFAULT = 1,
|
||||
WBT_STATE_ON_MANUAL = 2,
|
||||
WBT_STATE_OFF_DEFAULT
|
||||
};
|
||||
|
||||
struct rq_wb {
|
||||
|
17
block/blk.h
17
block/blk.h
@ -192,7 +192,6 @@ void blk_account_io_done(struct request *req, u64 now);
|
||||
|
||||
void blk_insert_flush(struct request *rq);
|
||||
|
||||
void elevator_init_mq(struct request_queue *q);
|
||||
int elevator_switch_mq(struct request_queue *q,
|
||||
struct elevator_type *new_e);
|
||||
void __elevator_exit(struct request_queue *, struct elevator_queue *);
|
||||
@ -225,7 +224,7 @@ ssize_t part_timeout_store(struct device *, struct device_attribute *,
|
||||
void __blk_queue_split(struct bio **bio, unsigned int *nr_segs);
|
||||
int ll_back_merge_fn(struct request *req, struct bio *bio,
|
||||
unsigned int nr_segs);
|
||||
int blk_attempt_req_merge(struct request_queue *q, struct request *rq,
|
||||
bool blk_attempt_req_merge(struct request_queue *q, struct request *rq,
|
||||
struct request *next);
|
||||
unsigned int blk_recalc_rq_segments(struct request *rq);
|
||||
void blk_rq_set_mixed_merge(struct request *rq);
|
||||
@ -343,8 +342,8 @@ static inline void blk_queue_free_zone_bitmaps(struct request_queue *q) {}
|
||||
static inline void blk_queue_clear_zone_settings(struct request_queue *q) {}
|
||||
#endif
|
||||
|
||||
int blk_alloc_devt(struct block_device *part, dev_t *devt);
|
||||
void blk_free_devt(dev_t devt);
|
||||
int blk_alloc_ext_minor(void);
|
||||
void blk_free_ext_minor(unsigned int minor);
|
||||
char *disk_name(struct gendisk *hd, int partno, char *buf);
|
||||
#define ADDPART_FLAG_NONE 0
|
||||
#define ADDPART_FLAG_RAID 1
|
||||
@ -359,4 +358,14 @@ int bio_add_hw_page(struct request_queue *q, struct bio *bio,
|
||||
struct page *page, unsigned int len, unsigned int offset,
|
||||
unsigned int max_sectors, bool *same_page);
|
||||
|
||||
struct request_queue *blk_alloc_queue(int node_id);
|
||||
|
||||
void disk_alloc_events(struct gendisk *disk);
|
||||
void disk_add_events(struct gendisk *disk);
|
||||
void disk_del_events(struct gendisk *disk);
|
||||
void disk_release_events(struct gendisk *disk);
|
||||
extern struct device_attribute dev_attr_events;
|
||||
extern struct device_attribute dev_attr_events_async;
|
||||
extern struct device_attribute dev_attr_events_poll_msecs;
|
||||
|
||||
#endif /* BLK_INTERNAL_H */
|
||||
|
469
block/disk-events.c
Normal file
469
block/disk-events.c
Normal file
@ -0,0 +1,469 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Disk events - monitor disk events like media change and eject request.
|
||||
*/
|
||||
#include <linux/export.h>
|
||||
#include <linux/moduleparam.h>
|
||||
#include <linux/genhd.h>
|
||||
#include "blk.h"
|
||||
|
||||
struct disk_events {
|
||||
struct list_head node; /* all disk_event's */
|
||||
struct gendisk *disk; /* the associated disk */
|
||||
spinlock_t lock;
|
||||
|
||||
struct mutex block_mutex; /* protects blocking */
|
||||
int block; /* event blocking depth */
|
||||
unsigned int pending; /* events already sent out */
|
||||
unsigned int clearing; /* events being cleared */
|
||||
|
||||
long poll_msecs; /* interval, -1 for default */
|
||||
struct delayed_work dwork;
|
||||
};
|
||||
|
||||
static const char *disk_events_strs[] = {
|
||||
[ilog2(DISK_EVENT_MEDIA_CHANGE)] = "media_change",
|
||||
[ilog2(DISK_EVENT_EJECT_REQUEST)] = "eject_request",
|
||||
};
|
||||
|
||||
static char *disk_uevents[] = {
|
||||
[ilog2(DISK_EVENT_MEDIA_CHANGE)] = "DISK_MEDIA_CHANGE=1",
|
||||
[ilog2(DISK_EVENT_EJECT_REQUEST)] = "DISK_EJECT_REQUEST=1",
|
||||
};
|
||||
|
||||
/* list of all disk_events */
|
||||
static DEFINE_MUTEX(disk_events_mutex);
|
||||
static LIST_HEAD(disk_events);
|
||||
|
||||
/* disable in-kernel polling by default */
|
||||
static unsigned long disk_events_dfl_poll_msecs;
|
||||
|
||||
static unsigned long disk_events_poll_jiffies(struct gendisk *disk)
|
||||
{
|
||||
struct disk_events *ev = disk->ev;
|
||||
long intv_msecs = 0;
|
||||
|
||||
/*
|
||||
* If device-specific poll interval is set, always use it. If
|
||||
* the default is being used, poll if the POLL flag is set.
|
||||
*/
|
||||
if (ev->poll_msecs >= 0)
|
||||
intv_msecs = ev->poll_msecs;
|
||||
else if (disk->event_flags & DISK_EVENT_FLAG_POLL)
|
||||
intv_msecs = disk_events_dfl_poll_msecs;
|
||||
|
||||
return msecs_to_jiffies(intv_msecs);
|
||||
}
|
||||
|
||||
/**
|
||||
* disk_block_events - block and flush disk event checking
|
||||
* @disk: disk to block events for
|
||||
*
|
||||
* On return from this function, it is guaranteed that event checking
|
||||
* isn't in progress and won't happen until unblocked by
|
||||
* disk_unblock_events(). Events blocking is counted and the actual
|
||||
* unblocking happens after the matching number of unblocks are done.
|
||||
*
|
||||
* Note that this intentionally does not block event checking from
|
||||
* disk_clear_events().
|
||||
*
|
||||
* CONTEXT:
|
||||
* Might sleep.
|
||||
*/
|
||||
void disk_block_events(struct gendisk *disk)
|
||||
{
|
||||
struct disk_events *ev = disk->ev;
|
||||
unsigned long flags;
|
||||
bool cancel;
|
||||
|
||||
if (!ev)
|
||||
return;
|
||||
|
||||
/*
|
||||
* Outer mutex ensures that the first blocker completes canceling
|
||||
* the event work before further blockers are allowed to finish.
|
||||
*/
|
||||
mutex_lock(&ev->block_mutex);
|
||||
|
||||
spin_lock_irqsave(&ev->lock, flags);
|
||||
cancel = !ev->block++;
|
||||
spin_unlock_irqrestore(&ev->lock, flags);
|
||||
|
||||
if (cancel)
|
||||
cancel_delayed_work_sync(&disk->ev->dwork);
|
||||
|
||||
mutex_unlock(&ev->block_mutex);
|
||||
}
|
||||
|
||||
static void __disk_unblock_events(struct gendisk *disk, bool check_now)
|
||||
{
|
||||
struct disk_events *ev = disk->ev;
|
||||
unsigned long intv;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&ev->lock, flags);
|
||||
|
||||
if (WARN_ON_ONCE(ev->block <= 0))
|
||||
goto out_unlock;
|
||||
|
||||
if (--ev->block)
|
||||
goto out_unlock;
|
||||
|
||||
intv = disk_events_poll_jiffies(disk);
|
||||
if (check_now)
|
||||
queue_delayed_work(system_freezable_power_efficient_wq,
|
||||
&ev->dwork, 0);
|
||||
else if (intv)
|
||||
queue_delayed_work(system_freezable_power_efficient_wq,
|
||||
&ev->dwork, intv);
|
||||
out_unlock:
|
||||
spin_unlock_irqrestore(&ev->lock, flags);
|
||||
}
|
||||
|
||||
/**
|
||||
* disk_unblock_events - unblock disk event checking
|
||||
* @disk: disk to unblock events for
|
||||
*
|
||||
* Undo disk_block_events(). When the block count reaches zero, it
|
||||
* starts events polling if configured.
|
||||
*
|
||||
* CONTEXT:
|
||||
* Don't care. Safe to call from irq context.
|
||||
*/
|
||||
void disk_unblock_events(struct gendisk *disk)
|
||||
{
|
||||
if (disk->ev)
|
||||
__disk_unblock_events(disk, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* disk_flush_events - schedule immediate event checking and flushing
|
||||
* @disk: disk to check and flush events for
|
||||
* @mask: events to flush
|
||||
*
|
||||
* Schedule immediate event checking on @disk if not blocked. Events in
|
||||
* @mask are scheduled to be cleared from the driver. Note that this
|
||||
* doesn't clear the events from @disk->ev.
|
||||
*
|
||||
* CONTEXT:
|
||||
* If @mask is non-zero must be called with disk->open_mutex held.
|
||||
*/
|
||||
void disk_flush_events(struct gendisk *disk, unsigned int mask)
|
||||
{
|
||||
struct disk_events *ev = disk->ev;
|
||||
|
||||
if (!ev)
|
||||
return;
|
||||
|
||||
spin_lock_irq(&ev->lock);
|
||||
ev->clearing |= mask;
|
||||
if (!ev->block)
|
||||
mod_delayed_work(system_freezable_power_efficient_wq,
|
||||
&ev->dwork, 0);
|
||||
spin_unlock_irq(&ev->lock);
|
||||
}
|
||||
|
||||
static void disk_check_events(struct disk_events *ev,
|
||||
unsigned int *clearing_ptr)
|
||||
{
|
||||
struct gendisk *disk = ev->disk;
|
||||
char *envp[ARRAY_SIZE(disk_uevents) + 1] = { };
|
||||
unsigned int clearing = *clearing_ptr;
|
||||
unsigned int events;
|
||||
unsigned long intv;
|
||||
int nr_events = 0, i;
|
||||
|
||||
/* check events */
|
||||
events = disk->fops->check_events(disk, clearing);
|
||||
|
||||
/* accumulate pending events and schedule next poll if necessary */
|
||||
spin_lock_irq(&ev->lock);
|
||||
|
||||
events &= ~ev->pending;
|
||||
ev->pending |= events;
|
||||
*clearing_ptr &= ~clearing;
|
||||
|
||||
intv = disk_events_poll_jiffies(disk);
|
||||
if (!ev->block && intv)
|
||||
queue_delayed_work(system_freezable_power_efficient_wq,
|
||||
&ev->dwork, intv);
|
||||
|
||||
spin_unlock_irq(&ev->lock);
|
||||
|
||||
/*
|
||||
* Tell userland about new events. Only the events listed in
|
||||
* @disk->events are reported, and only if DISK_EVENT_FLAG_UEVENT
|
||||
* is set. Otherwise, events are processed internally but never
|
||||
* get reported to userland.
|
||||
*/
|
||||
for (i = 0; i < ARRAY_SIZE(disk_uevents); i++)
|
||||
if ((events & disk->events & (1 << i)) &&
|
||||
(disk->event_flags & DISK_EVENT_FLAG_UEVENT))
|
||||
envp[nr_events++] = disk_uevents[i];
|
||||
|
||||
if (nr_events)
|
||||
kobject_uevent_env(&disk_to_dev(disk)->kobj, KOBJ_CHANGE, envp);
|
||||
}
|
||||
|
||||
/**
|
||||
* disk_clear_events - synchronously check, clear and return pending events
|
||||
* @disk: disk to fetch and clear events from
|
||||
* @mask: mask of events to be fetched and cleared
|
||||
*
|
||||
* Disk events are synchronously checked and pending events in @mask
|
||||
* are cleared and returned. This ignores the block count.
|
||||
*
|
||||
* CONTEXT:
|
||||
* Might sleep.
|
||||
*/
|
||||
static unsigned int disk_clear_events(struct gendisk *disk, unsigned int mask)
|
||||
{
|
||||
struct disk_events *ev = disk->ev;
|
||||
unsigned int pending;
|
||||
unsigned int clearing = mask;
|
||||
|
||||
if (!ev)
|
||||
return 0;
|
||||
|
||||
disk_block_events(disk);
|
||||
|
||||
/*
|
||||
* store the union of mask and ev->clearing on the stack so that the
|
||||
* race with disk_flush_events does not cause ambiguity (ev->clearing
|
||||
* can still be modified even if events are blocked).
|
||||
*/
|
||||
spin_lock_irq(&ev->lock);
|
||||
clearing |= ev->clearing;
|
||||
ev->clearing = 0;
|
||||
spin_unlock_irq(&ev->lock);
|
||||
|
||||
disk_check_events(ev, &clearing);
|
||||
/*
|
||||
* if ev->clearing is not 0, the disk_flush_events got called in the
|
||||
* middle of this function, so we want to run the workfn without delay.
|
||||
*/
|
||||
__disk_unblock_events(disk, ev->clearing ? true : false);
|
||||
|
||||
/* then, fetch and clear pending events */
|
||||
spin_lock_irq(&ev->lock);
|
||||
pending = ev->pending & mask;
|
||||
ev->pending &= ~mask;
|
||||
spin_unlock_irq(&ev->lock);
|
||||
WARN_ON_ONCE(clearing & mask);
|
||||
|
||||
return pending;
|
||||
}
|
||||
|
||||
/**
|
||||
* bdev_check_media_change - check if a removable media has been changed
|
||||
* @bdev: block device to check
|
||||
*
|
||||
* Check whether a removable media has been changed, and attempt to free all
|
||||
* dentries and inodes and invalidates all block device page cache entries in
|
||||
* that case.
|
||||
*
|
||||
* Returns %true if the block device changed, or %false if not.
|
||||
*/
|
||||
bool bdev_check_media_change(struct block_device *bdev)
|
||||
{
|
||||
unsigned int events;
|
||||
|
||||
events = disk_clear_events(bdev->bd_disk, DISK_EVENT_MEDIA_CHANGE |
|
||||
DISK_EVENT_EJECT_REQUEST);
|
||||
if (!(events & DISK_EVENT_MEDIA_CHANGE))
|
||||
return false;
|
||||
|
||||
if (__invalidate_device(bdev, true))
|
||||
pr_warn("VFS: busy inodes on changed media %s\n",
|
||||
bdev->bd_disk->disk_name);
|
||||
set_bit(GD_NEED_PART_SCAN, &bdev->bd_disk->state);
|
||||
return true;
|
||||
}
|
||||
EXPORT_SYMBOL(bdev_check_media_change);
|
||||
|
||||
/*
|
||||
* Separate this part out so that a different pointer for clearing_ptr can be
|
||||
* passed in for disk_clear_events.
|
||||
*/
|
||||
static void disk_events_workfn(struct work_struct *work)
|
||||
{
|
||||
struct delayed_work *dwork = to_delayed_work(work);
|
||||
struct disk_events *ev = container_of(dwork, struct disk_events, dwork);
|
||||
|
||||
disk_check_events(ev, &ev->clearing);
|
||||
}
|
||||
|
||||
/*
|
||||
* A disk events enabled device has the following sysfs nodes under
|
||||
* its /sys/block/X/ directory.
|
||||
*
|
||||
* events : list of all supported events
|
||||
* events_async : list of events which can be detected w/o polling
|
||||
* (always empty, only for backwards compatibility)
|
||||
* events_poll_msecs : polling interval, 0: disable, -1: system default
|
||||
*/
|
||||
static ssize_t __disk_events_show(unsigned int events, char *buf)
|
||||
{
|
||||
const char *delim = "";
|
||||
ssize_t pos = 0;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(disk_events_strs); i++)
|
||||
if (events & (1 << i)) {
|
||||
pos += sprintf(buf + pos, "%s%s",
|
||||
delim, disk_events_strs[i]);
|
||||
delim = " ";
|
||||
}
|
||||
if (pos)
|
||||
pos += sprintf(buf + pos, "\n");
|
||||
return pos;
|
||||
}
|
||||
|
||||
static ssize_t disk_events_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
struct gendisk *disk = dev_to_disk(dev);
|
||||
|
||||
if (!(disk->event_flags & DISK_EVENT_FLAG_UEVENT))
|
||||
return 0;
|
||||
return __disk_events_show(disk->events, buf);
|
||||
}
|
||||
|
||||
static ssize_t disk_events_async_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static ssize_t disk_events_poll_msecs_show(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
struct gendisk *disk = dev_to_disk(dev);
|
||||
|
||||
if (!disk->ev)
|
||||
return sprintf(buf, "-1\n");
|
||||
return sprintf(buf, "%ld\n", disk->ev->poll_msecs);
|
||||
}
|
||||
|
||||
static ssize_t disk_events_poll_msecs_store(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
const char *buf, size_t count)
|
||||
{
|
||||
struct gendisk *disk = dev_to_disk(dev);
|
||||
long intv;
|
||||
|
||||
if (!count || !sscanf(buf, "%ld", &intv))
|
||||
return -EINVAL;
|
||||
|
||||
if (intv < 0 && intv != -1)
|
||||
return -EINVAL;
|
||||
|
||||
if (!disk->ev)
|
||||
return -ENODEV;
|
||||
|
||||
disk_block_events(disk);
|
||||
disk->ev->poll_msecs = intv;
|
||||
__disk_unblock_events(disk, true);
|
||||
return count;
|
||||
}
|
||||
|
||||
DEVICE_ATTR(events, 0444, disk_events_show, NULL);
|
||||
DEVICE_ATTR(events_async, 0444, disk_events_async_show, NULL);
|
||||
DEVICE_ATTR(events_poll_msecs, 0644, disk_events_poll_msecs_show,
|
||||
disk_events_poll_msecs_store);
|
||||
|
||||
/*
|
||||
* The default polling interval can be specified by the kernel
|
||||
* parameter block.events_dfl_poll_msecs which defaults to 0
|
||||
* (disable). This can also be modified runtime by writing to
|
||||
* /sys/module/block/parameters/events_dfl_poll_msecs.
|
||||
*/
|
||||
static int disk_events_set_dfl_poll_msecs(const char *val,
|
||||
const struct kernel_param *kp)
|
||||
{
|
||||
struct disk_events *ev;
|
||||
int ret;
|
||||
|
||||
ret = param_set_ulong(val, kp);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
mutex_lock(&disk_events_mutex);
|
||||
list_for_each_entry(ev, &disk_events, node)
|
||||
disk_flush_events(ev->disk, 0);
|
||||
mutex_unlock(&disk_events_mutex);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct kernel_param_ops disk_events_dfl_poll_msecs_param_ops = {
|
||||
.set = disk_events_set_dfl_poll_msecs,
|
||||
.get = param_get_ulong,
|
||||
};
|
||||
|
||||
#undef MODULE_PARAM_PREFIX
|
||||
#define MODULE_PARAM_PREFIX "block."
|
||||
|
||||
module_param_cb(events_dfl_poll_msecs, &disk_events_dfl_poll_msecs_param_ops,
|
||||
&disk_events_dfl_poll_msecs, 0644);
|
||||
|
||||
/*
|
||||
* disk_{alloc|add|del|release}_events - initialize and destroy disk_events.
|
||||
*/
|
||||
void disk_alloc_events(struct gendisk *disk)
|
||||
{
|
||||
struct disk_events *ev;
|
||||
|
||||
if (!disk->fops->check_events || !disk->events)
|
||||
return;
|
||||
|
||||
ev = kzalloc(sizeof(*ev), GFP_KERNEL);
|
||||
if (!ev) {
|
||||
pr_warn("%s: failed to initialize events\n", disk->disk_name);
|
||||
return;
|
||||
}
|
||||
|
||||
INIT_LIST_HEAD(&ev->node);
|
||||
ev->disk = disk;
|
||||
spin_lock_init(&ev->lock);
|
||||
mutex_init(&ev->block_mutex);
|
||||
ev->block = 1;
|
||||
ev->poll_msecs = -1;
|
||||
INIT_DELAYED_WORK(&ev->dwork, disk_events_workfn);
|
||||
|
||||
disk->ev = ev;
|
||||
}
|
||||
|
||||
void disk_add_events(struct gendisk *disk)
|
||||
{
|
||||
if (!disk->ev)
|
||||
return;
|
||||
|
||||
mutex_lock(&disk_events_mutex);
|
||||
list_add_tail(&disk->ev->node, &disk_events);
|
||||
mutex_unlock(&disk_events_mutex);
|
||||
|
||||
/*
|
||||
* Block count is initialized to 1 and the following initial
|
||||
* unblock kicks it into action.
|
||||
*/
|
||||
__disk_unblock_events(disk, true);
|
||||
}
|
||||
|
||||
void disk_del_events(struct gendisk *disk)
|
||||
{
|
||||
if (disk->ev) {
|
||||
disk_block_events(disk);
|
||||
|
||||
mutex_lock(&disk_events_mutex);
|
||||
list_del_init(&disk->ev->node);
|
||||
mutex_unlock(&disk_events_mutex);
|
||||
}
|
||||
}
|
||||
|
||||
void disk_release_events(struct gendisk *disk)
|
||||
{
|
||||
/* the block count should be 1 from disk_del_events() */
|
||||
WARN_ON_ONCE(disk->ev && disk->ev->block != 1);
|
||||
kfree(disk->ev);
|
||||
}
|
@ -350,9 +350,11 @@ enum elv_merge elv_merge(struct request_queue *q, struct request **req,
|
||||
* we can append 'rq' to an existing request, so we can throw 'rq' away
|
||||
* afterwards.
|
||||
*
|
||||
* Returns true if we merged, false otherwise
|
||||
* Returns true if we merged, false otherwise. 'free' will contain all
|
||||
* requests that need to be freed.
|
||||
*/
|
||||
bool elv_attempt_insert_merge(struct request_queue *q, struct request *rq)
|
||||
bool elv_attempt_insert_merge(struct request_queue *q, struct request *rq,
|
||||
struct list_head *free)
|
||||
{
|
||||
struct request *__rq;
|
||||
bool ret;
|
||||
@ -363,8 +365,10 @@ bool elv_attempt_insert_merge(struct request_queue *q, struct request *rq)
|
||||
/*
|
||||
* First try one-hit cache.
|
||||
*/
|
||||
if (q->last_merge && blk_attempt_req_merge(q, q->last_merge, rq))
|
||||
if (q->last_merge && blk_attempt_req_merge(q, q->last_merge, rq)) {
|
||||
list_add(&rq->queuelist, free);
|
||||
return true;
|
||||
}
|
||||
|
||||
if (blk_queue_noxmerges(q))
|
||||
return false;
|
||||
@ -378,6 +382,7 @@ bool elv_attempt_insert_merge(struct request_queue *q, struct request *rq)
|
||||
if (!__rq || !blk_attempt_req_merge(q, __rq, rq))
|
||||
break;
|
||||
|
||||
list_add(&rq->queuelist, free);
|
||||
/* The merged request could be merged with others, try again */
|
||||
ret = true;
|
||||
rq = __rq;
|
||||
@ -522,6 +527,10 @@ void elv_unregister_queue(struct request_queue *q)
|
||||
|
||||
int elv_register(struct elevator_type *e)
|
||||
{
|
||||
/* insert_requests and dispatch_request are mandatory */
|
||||
if (WARN_ON_ONCE(!e->ops.insert_requests || !e->ops.dispatch_request))
|
||||
return -EINVAL;
|
||||
|
||||
/* create icq_cache if requested */
|
||||
if (e->icq_size) {
|
||||
if (WARN_ON(e->icq_size < sizeof(struct io_cq)) ||
|
||||
@ -693,7 +702,7 @@ void elevator_init_mq(struct request_queue *q)
|
||||
elevator_put(e);
|
||||
}
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL_GPL(elevator_init_mq); /* only for dm-rq */
|
||||
|
||||
/*
|
||||
* switch to new_e io scheduler. be careful not to introduce deadlocks -
|
||||
|
703
block/genhd.c
703
block/genhd.c
@ -33,13 +33,6 @@ static struct kobject *block_depr;
|
||||
#define NR_EXT_DEVT (1 << MINORBITS)
|
||||
static DEFINE_IDA(ext_devt_ida);
|
||||
|
||||
static void disk_check_events(struct disk_events *ev,
|
||||
unsigned int *clearing_ptr);
|
||||
static void disk_alloc_events(struct gendisk *disk);
|
||||
static void disk_add_events(struct gendisk *disk);
|
||||
static void disk_del_events(struct gendisk *disk);
|
||||
static void disk_release_events(struct gendisk *disk);
|
||||
|
||||
void set_capacity(struct gendisk *disk, sector_t sectors)
|
||||
{
|
||||
struct block_device *bdev = disk->part0;
|
||||
@ -333,52 +326,22 @@ static int blk_mangle_minor(int minor)
|
||||
return minor;
|
||||
}
|
||||
|
||||
/**
|
||||
* blk_alloc_devt - allocate a dev_t for a block device
|
||||
* @bdev: block device to allocate dev_t for
|
||||
* @devt: out parameter for resulting dev_t
|
||||
*
|
||||
* Allocate a dev_t for block device.
|
||||
*
|
||||
* RETURNS:
|
||||
* 0 on success, allocated dev_t is returned in *@devt. -errno on
|
||||
* failure.
|
||||
*
|
||||
* CONTEXT:
|
||||
* Might sleep.
|
||||
*/
|
||||
int blk_alloc_devt(struct block_device *bdev, dev_t *devt)
|
||||
int blk_alloc_ext_minor(void)
|
||||
{
|
||||
struct gendisk *disk = bdev->bd_disk;
|
||||
int idx;
|
||||
|
||||
/* in consecutive minor range? */
|
||||
if (bdev->bd_partno < disk->minors) {
|
||||
*devt = MKDEV(disk->major, disk->first_minor + bdev->bd_partno);
|
||||
return 0;
|
||||
}
|
||||
|
||||
idx = ida_alloc_range(&ext_devt_ida, 0, NR_EXT_DEVT, GFP_KERNEL);
|
||||
if (idx < 0)
|
||||
return idx == -ENOSPC ? -EBUSY : idx;
|
||||
|
||||
*devt = MKDEV(BLOCK_EXT_MAJOR, blk_mangle_minor(idx));
|
||||
return 0;
|
||||
if (idx < 0) {
|
||||
if (idx == -ENOSPC)
|
||||
return -EBUSY;
|
||||
return idx;
|
||||
}
|
||||
return blk_mangle_minor(idx);
|
||||
}
|
||||
|
||||
/**
|
||||
* blk_free_devt - free a dev_t
|
||||
* @devt: dev_t to free
|
||||
*
|
||||
* Free @devt which was allocated using blk_alloc_devt().
|
||||
*
|
||||
* CONTEXT:
|
||||
* Might sleep.
|
||||
*/
|
||||
void blk_free_devt(dev_t devt)
|
||||
void blk_free_ext_minor(unsigned int minor)
|
||||
{
|
||||
if (MAJOR(devt) == BLOCK_EXT_MAJOR)
|
||||
ida_free(&ext_devt_ida, blk_mangle_minor(MINOR(devt)));
|
||||
ida_free(&ext_devt_ida, blk_mangle_minor(minor));
|
||||
}
|
||||
|
||||
static char *bdevt_str(dev_t devt, char *buf)
|
||||
@ -499,8 +462,7 @@ static void __device_add_disk(struct device *parent, struct gendisk *disk,
|
||||
const struct attribute_group **groups,
|
||||
bool register_queue)
|
||||
{
|
||||
dev_t devt;
|
||||
int retval;
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* The disk queue should now be all set with enough information about
|
||||
@ -511,24 +473,36 @@ static void __device_add_disk(struct device *parent, struct gendisk *disk,
|
||||
if (register_queue)
|
||||
elevator_init_mq(disk->queue);
|
||||
|
||||
/* minors == 0 indicates to use ext devt from part0 and should
|
||||
* be accompanied with EXT_DEVT flag. Make sure all
|
||||
* parameters make sense.
|
||||
/*
|
||||
* If the driver provides an explicit major number it also must provide
|
||||
* the number of minors numbers supported, and those will be used to
|
||||
* setup the gendisk.
|
||||
* Otherwise just allocate the device numbers for both the whole device
|
||||
* and all partitions from the extended dev_t space.
|
||||
*/
|
||||
WARN_ON(disk->minors && !(disk->major || disk->first_minor));
|
||||
WARN_ON(!disk->minors &&
|
||||
!(disk->flags & (GENHD_FL_EXT_DEVT | GENHD_FL_HIDDEN)));
|
||||
if (disk->major) {
|
||||
WARN_ON(!disk->minors);
|
||||
|
||||
if (disk->minors > DISK_MAX_PARTS) {
|
||||
pr_err("block: can't allocate more than %d partitions\n",
|
||||
DISK_MAX_PARTS);
|
||||
disk->minors = DISK_MAX_PARTS;
|
||||
}
|
||||
} else {
|
||||
WARN_ON(disk->minors);
|
||||
|
||||
ret = blk_alloc_ext_minor();
|
||||
if (ret < 0) {
|
||||
WARN_ON(1);
|
||||
return;
|
||||
}
|
||||
disk->major = BLOCK_EXT_MAJOR;
|
||||
disk->first_minor = MINOR(ret);
|
||||
disk->flags |= GENHD_FL_EXT_DEVT;
|
||||
}
|
||||
|
||||
disk->flags |= GENHD_FL_UP;
|
||||
|
||||
retval = blk_alloc_devt(disk->part0, &devt);
|
||||
if (retval) {
|
||||
WARN_ON(1);
|
||||
return;
|
||||
}
|
||||
disk->major = MAJOR(devt);
|
||||
disk->first_minor = MINOR(devt);
|
||||
|
||||
disk_alloc_events(disk);
|
||||
|
||||
if (disk->flags & GENHD_FL_HIDDEN) {
|
||||
@ -541,14 +515,14 @@ static void __device_add_disk(struct device *parent, struct gendisk *disk,
|
||||
} else {
|
||||
struct backing_dev_info *bdi = disk->queue->backing_dev_info;
|
||||
struct device *dev = disk_to_dev(disk);
|
||||
int ret;
|
||||
|
||||
/* Register BDI before referencing it from bdev */
|
||||
dev->devt = devt;
|
||||
ret = bdi_register(bdi, "%u:%u", MAJOR(devt), MINOR(devt));
|
||||
dev->devt = MKDEV(disk->major, disk->first_minor);
|
||||
ret = bdi_register(bdi, "%u:%u",
|
||||
disk->major, disk->first_minor);
|
||||
WARN_ON(ret);
|
||||
bdi_set_owner(bdi, dev);
|
||||
bdev_add(disk->part0, devt);
|
||||
bdev_add(disk->part0, dev->devt);
|
||||
}
|
||||
register_disk(parent, disk, groups);
|
||||
if (register_queue)
|
||||
@ -558,7 +532,10 @@ static void __device_add_disk(struct device *parent, struct gendisk *disk,
|
||||
* Take an extra ref on queue which will be put on disk_release()
|
||||
* so that it sticks around as long as @disk is there.
|
||||
*/
|
||||
WARN_ON_ONCE(!blk_get_queue(disk->queue));
|
||||
if (blk_get_queue(disk->queue))
|
||||
set_bit(GD_QUEUE_REF, &disk->state);
|
||||
else
|
||||
WARN_ON_ONCE(1);
|
||||
|
||||
disk_add_events(disk);
|
||||
blk_integrity_add(disk);
|
||||
@ -607,10 +584,10 @@ void del_gendisk(struct gendisk *disk)
|
||||
blk_integrity_del(disk);
|
||||
disk_del_events(disk);
|
||||
|
||||
mutex_lock(&disk->part0->bd_mutex);
|
||||
mutex_lock(&disk->open_mutex);
|
||||
disk->flags &= ~GENHD_FL_UP;
|
||||
blk_drop_partitions(disk);
|
||||
mutex_unlock(&disk->part0->bd_mutex);
|
||||
mutex_unlock(&disk->open_mutex);
|
||||
|
||||
fsync_bdev(disk->part0);
|
||||
__invalidate_device(disk->part0, true);
|
||||
@ -692,32 +669,6 @@ void blk_request_module(dev_t devt)
|
||||
request_module("block-major-%d", MAJOR(devt));
|
||||
}
|
||||
|
||||
/**
|
||||
* bdget_disk - do bdget() by gendisk and partition number
|
||||
* @disk: gendisk of interest
|
||||
* @partno: partition number
|
||||
*
|
||||
* Find partition @partno from @disk, do bdget() on it.
|
||||
*
|
||||
* CONTEXT:
|
||||
* Don't care.
|
||||
*
|
||||
* RETURNS:
|
||||
* Resulting block_device on success, NULL on failure.
|
||||
*/
|
||||
struct block_device *bdget_disk(struct gendisk *disk, int partno)
|
||||
{
|
||||
struct block_device *bdev = NULL;
|
||||
|
||||
rcu_read_lock();
|
||||
bdev = xa_load(&disk->part_tbl, partno);
|
||||
if (bdev && !bdgrab(bdev))
|
||||
bdev = NULL;
|
||||
rcu_read_unlock();
|
||||
|
||||
return bdev;
|
||||
}
|
||||
|
||||
/*
|
||||
* print a full list of all partitions - intended for places where the root
|
||||
* filesystem can't be mounted and thus to give the victim some idea of what
|
||||
@ -1071,6 +1022,9 @@ static struct attribute *disk_attrs[] = {
|
||||
&dev_attr_stat.attr,
|
||||
&dev_attr_inflight.attr,
|
||||
&dev_attr_badblocks.attr,
|
||||
&dev_attr_events.attr,
|
||||
&dev_attr_events_async.attr,
|
||||
&dev_attr_events_poll_msecs.attr,
|
||||
#ifdef CONFIG_FAIL_MAKE_REQUEST
|
||||
&dev_attr_fail.attr,
|
||||
#endif
|
||||
@ -1120,12 +1074,13 @@ static void disk_release(struct device *dev)
|
||||
|
||||
might_sleep();
|
||||
|
||||
blk_free_devt(dev->devt);
|
||||
if (MAJOR(dev->devt) == BLOCK_EXT_MAJOR)
|
||||
blk_free_ext_minor(MINOR(dev->devt));
|
||||
disk_release_events(disk);
|
||||
kfree(disk->random);
|
||||
xa_destroy(&disk->part_tbl);
|
||||
bdput(disk->part0);
|
||||
if (disk->queue)
|
||||
if (test_bit(GD_QUEUE_REF, &disk->state) && disk->queue)
|
||||
blk_put_queue(disk->queue);
|
||||
kfree(disk);
|
||||
}
|
||||
@ -1242,6 +1197,20 @@ static int __init proc_genhd_init(void)
|
||||
module_init(proc_genhd_init);
|
||||
#endif /* CONFIG_PROC_FS */
|
||||
|
||||
dev_t part_devt(struct gendisk *disk, u8 partno)
|
||||
{
|
||||
struct block_device *part;
|
||||
dev_t devt = 0;
|
||||
|
||||
rcu_read_lock();
|
||||
part = xa_load(&disk->part_tbl, partno);
|
||||
if (part)
|
||||
devt = part->bd_dev;
|
||||
rcu_read_unlock();
|
||||
|
||||
return devt;
|
||||
}
|
||||
|
||||
dev_t blk_lookup_devt(const char *name, int partno)
|
||||
{
|
||||
dev_t devt = MKDEV(0, 0);
|
||||
@ -1251,7 +1220,6 @@ dev_t blk_lookup_devt(const char *name, int partno)
|
||||
class_dev_iter_init(&iter, &block_class, NULL, &disk_type);
|
||||
while ((dev = class_dev_iter_next(&iter))) {
|
||||
struct gendisk *disk = dev_to_disk(dev);
|
||||
struct block_device *part;
|
||||
|
||||
if (strcmp(dev_name(dev), name))
|
||||
continue;
|
||||
@ -1262,13 +1230,10 @@ dev_t blk_lookup_devt(const char *name, int partno)
|
||||
*/
|
||||
devt = MKDEV(MAJOR(dev->devt),
|
||||
MINOR(dev->devt) + partno);
|
||||
break;
|
||||
}
|
||||
part = bdget_disk(disk, partno);
|
||||
if (part) {
|
||||
devt = part->bd_dev;
|
||||
bdput(part);
|
||||
break;
|
||||
} else {
|
||||
devt = part_devt(disk, partno);
|
||||
if (devt)
|
||||
break;
|
||||
}
|
||||
}
|
||||
class_dev_iter_exit(&iter);
|
||||
@ -1279,13 +1244,6 @@ struct gendisk *__alloc_disk_node(int minors, int node_id)
|
||||
{
|
||||
struct gendisk *disk;
|
||||
|
||||
if (minors > DISK_MAX_PARTS) {
|
||||
printk(KERN_ERR
|
||||
"block: can't allocate more than %d partitions\n",
|
||||
DISK_MAX_PARTS);
|
||||
minors = DISK_MAX_PARTS;
|
||||
}
|
||||
|
||||
disk = kzalloc_node(sizeof(struct gendisk), GFP_KERNEL, node_id);
|
||||
if (!disk)
|
||||
return NULL;
|
||||
@ -1295,6 +1253,7 @@ struct gendisk *__alloc_disk_node(int minors, int node_id)
|
||||
goto out_free_disk;
|
||||
|
||||
disk->node_id = node_id;
|
||||
mutex_init(&disk->open_mutex);
|
||||
xa_init(&disk->part_tbl);
|
||||
if (xa_insert(&disk->part_tbl, 0, disk->part0, GFP_KERNEL))
|
||||
goto out_destroy_part_tbl;
|
||||
@ -1315,6 +1274,25 @@ out_free_disk:
|
||||
}
|
||||
EXPORT_SYMBOL(__alloc_disk_node);
|
||||
|
||||
struct gendisk *__blk_alloc_disk(int node)
|
||||
{
|
||||
struct request_queue *q;
|
||||
struct gendisk *disk;
|
||||
|
||||
q = blk_alloc_queue(node);
|
||||
if (!q)
|
||||
return NULL;
|
||||
|
||||
disk = __alloc_disk_node(0, node);
|
||||
if (!disk) {
|
||||
blk_cleanup_queue(q);
|
||||
return NULL;
|
||||
}
|
||||
disk->queue = q;
|
||||
return disk;
|
||||
}
|
||||
EXPORT_SYMBOL(__blk_alloc_disk);
|
||||
|
||||
/**
|
||||
* put_disk - decrements the gendisk refcount
|
||||
* @disk: the struct gendisk to decrement the refcount for
|
||||
@ -1332,6 +1310,22 @@ void put_disk(struct gendisk *disk)
|
||||
}
|
||||
EXPORT_SYMBOL(put_disk);
|
||||
|
||||
/**
|
||||
* blk_cleanup_disk - shutdown a gendisk allocated by blk_alloc_disk
|
||||
* @disk: gendisk to shutdown
|
||||
*
|
||||
* Mark the queue hanging off @disk DYING, drain all pending requests, then mark
|
||||
* the queue DEAD, destroy and put it and the gendisk structure.
|
||||
*
|
||||
* Context: can sleep
|
||||
*/
|
||||
void blk_cleanup_disk(struct gendisk *disk)
|
||||
{
|
||||
blk_cleanup_queue(disk->queue);
|
||||
put_disk(disk);
|
||||
}
|
||||
EXPORT_SYMBOL(blk_cleanup_disk);
|
||||
|
||||
static void set_disk_ro_uevent(struct gendisk *gd, int ro)
|
||||
{
|
||||
char event[] = "DISK_RO=1";
|
||||
@ -1369,488 +1363,3 @@ int bdev_read_only(struct block_device *bdev)
|
||||
return bdev->bd_read_only || get_disk_ro(bdev->bd_disk);
|
||||
}
|
||||
EXPORT_SYMBOL(bdev_read_only);
|
||||
|
||||
/*
|
||||
* Disk events - monitor disk events like media change and eject request.
|
||||
*/
|
||||
struct disk_events {
|
||||
struct list_head node; /* all disk_event's */
|
||||
struct gendisk *disk; /* the associated disk */
|
||||
spinlock_t lock;
|
||||
|
||||
struct mutex block_mutex; /* protects blocking */
|
||||
int block; /* event blocking depth */
|
||||
unsigned int pending; /* events already sent out */
|
||||
unsigned int clearing; /* events being cleared */
|
||||
|
||||
long poll_msecs; /* interval, -1 for default */
|
||||
struct delayed_work dwork;
|
||||
};
|
||||
|
||||
static const char *disk_events_strs[] = {
|
||||
[ilog2(DISK_EVENT_MEDIA_CHANGE)] = "media_change",
|
||||
[ilog2(DISK_EVENT_EJECT_REQUEST)] = "eject_request",
|
||||
};
|
||||
|
||||
static char *disk_uevents[] = {
|
||||
[ilog2(DISK_EVENT_MEDIA_CHANGE)] = "DISK_MEDIA_CHANGE=1",
|
||||
[ilog2(DISK_EVENT_EJECT_REQUEST)] = "DISK_EJECT_REQUEST=1",
|
||||
};
|
||||
|
||||
/* list of all disk_events */
|
||||
static DEFINE_MUTEX(disk_events_mutex);
|
||||
static LIST_HEAD(disk_events);
|
||||
|
||||
/* disable in-kernel polling by default */
|
||||
static unsigned long disk_events_dfl_poll_msecs;
|
||||
|
||||
static unsigned long disk_events_poll_jiffies(struct gendisk *disk)
|
||||
{
|
||||
struct disk_events *ev = disk->ev;
|
||||
long intv_msecs = 0;
|
||||
|
||||
/*
|
||||
* If device-specific poll interval is set, always use it. If
|
||||
* the default is being used, poll if the POLL flag is set.
|
||||
*/
|
||||
if (ev->poll_msecs >= 0)
|
||||
intv_msecs = ev->poll_msecs;
|
||||
else if (disk->event_flags & DISK_EVENT_FLAG_POLL)
|
||||
intv_msecs = disk_events_dfl_poll_msecs;
|
||||
|
||||
return msecs_to_jiffies(intv_msecs);
|
||||
}
|
||||
|
||||
/**
|
||||
* disk_block_events - block and flush disk event checking
|
||||
* @disk: disk to block events for
|
||||
*
|
||||
* On return from this function, it is guaranteed that event checking
|
||||
* isn't in progress and won't happen until unblocked by
|
||||
* disk_unblock_events(). Events blocking is counted and the actual
|
||||
* unblocking happens after the matching number of unblocks are done.
|
||||
*
|
||||
* Note that this intentionally does not block event checking from
|
||||
* disk_clear_events().
|
||||
*
|
||||
* CONTEXT:
|
||||
* Might sleep.
|
||||
*/
|
||||
void disk_block_events(struct gendisk *disk)
|
||||
{
|
||||
struct disk_events *ev = disk->ev;
|
||||
unsigned long flags;
|
||||
bool cancel;
|
||||
|
||||
if (!ev)
|
||||
return;
|
||||
|
||||
/*
|
||||
* Outer mutex ensures that the first blocker completes canceling
|
||||
* the event work before further blockers are allowed to finish.
|
||||
*/
|
||||
mutex_lock(&ev->block_mutex);
|
||||
|
||||
spin_lock_irqsave(&ev->lock, flags);
|
||||
cancel = !ev->block++;
|
||||
spin_unlock_irqrestore(&ev->lock, flags);
|
||||
|
||||
if (cancel)
|
||||
cancel_delayed_work_sync(&disk->ev->dwork);
|
||||
|
||||
mutex_unlock(&ev->block_mutex);
|
||||
}
|
||||
|
||||
static void __disk_unblock_events(struct gendisk *disk, bool check_now)
|
||||
{
|
||||
struct disk_events *ev = disk->ev;
|
||||
unsigned long intv;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&ev->lock, flags);
|
||||
|
||||
if (WARN_ON_ONCE(ev->block <= 0))
|
||||
goto out_unlock;
|
||||
|
||||
if (--ev->block)
|
||||
goto out_unlock;
|
||||
|
||||
intv = disk_events_poll_jiffies(disk);
|
||||
if (check_now)
|
||||
queue_delayed_work(system_freezable_power_efficient_wq,
|
||||
&ev->dwork, 0);
|
||||
else if (intv)
|
||||
queue_delayed_work(system_freezable_power_efficient_wq,
|
||||
&ev->dwork, intv);
|
||||
out_unlock:
|
||||
spin_unlock_irqrestore(&ev->lock, flags);
|
||||
}
|
||||
|
||||
/**
|
||||
* disk_unblock_events - unblock disk event checking
|
||||
* @disk: disk to unblock events for
|
||||
*
|
||||
* Undo disk_block_events(). When the block count reaches zero, it
|
||||
* starts events polling if configured.
|
||||
*
|
||||
* CONTEXT:
|
||||
* Don't care. Safe to call from irq context.
|
||||
*/
|
||||
void disk_unblock_events(struct gendisk *disk)
|
||||
{
|
||||
if (disk->ev)
|
||||
__disk_unblock_events(disk, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* disk_flush_events - schedule immediate event checking and flushing
|
||||
* @disk: disk to check and flush events for
|
||||
* @mask: events to flush
|
||||
*
|
||||
* Schedule immediate event checking on @disk if not blocked. Events in
|
||||
* @mask are scheduled to be cleared from the driver. Note that this
|
||||
* doesn't clear the events from @disk->ev.
|
||||
*
|
||||
* CONTEXT:
|
||||
* If @mask is non-zero must be called with bdev->bd_mutex held.
|
||||
*/
|
||||
void disk_flush_events(struct gendisk *disk, unsigned int mask)
|
||||
{
|
||||
struct disk_events *ev = disk->ev;
|
||||
|
||||
if (!ev)
|
||||
return;
|
||||
|
||||
spin_lock_irq(&ev->lock);
|
||||
ev->clearing |= mask;
|
||||
if (!ev->block)
|
||||
mod_delayed_work(system_freezable_power_efficient_wq,
|
||||
&ev->dwork, 0);
|
||||
spin_unlock_irq(&ev->lock);
|
||||
}
|
||||
|
||||
/**
|
||||
* disk_clear_events - synchronously check, clear and return pending events
|
||||
* @disk: disk to fetch and clear events from
|
||||
* @mask: mask of events to be fetched and cleared
|
||||
*
|
||||
* Disk events are synchronously checked and pending events in @mask
|
||||
* are cleared and returned. This ignores the block count.
|
||||
*
|
||||
* CONTEXT:
|
||||
* Might sleep.
|
||||
*/
|
||||
static unsigned int disk_clear_events(struct gendisk *disk, unsigned int mask)
|
||||
{
|
||||
struct disk_events *ev = disk->ev;
|
||||
unsigned int pending;
|
||||
unsigned int clearing = mask;
|
||||
|
||||
if (!ev)
|
||||
return 0;
|
||||
|
||||
disk_block_events(disk);
|
||||
|
||||
/*
|
||||
* store the union of mask and ev->clearing on the stack so that the
|
||||
* race with disk_flush_events does not cause ambiguity (ev->clearing
|
||||
* can still be modified even if events are blocked).
|
||||
*/
|
||||
spin_lock_irq(&ev->lock);
|
||||
clearing |= ev->clearing;
|
||||
ev->clearing = 0;
|
||||
spin_unlock_irq(&ev->lock);
|
||||
|
||||
disk_check_events(ev, &clearing);
|
||||
/*
|
||||
* if ev->clearing is not 0, the disk_flush_events got called in the
|
||||
* middle of this function, so we want to run the workfn without delay.
|
||||
*/
|
||||
__disk_unblock_events(disk, ev->clearing ? true : false);
|
||||
|
||||
/* then, fetch and clear pending events */
|
||||
spin_lock_irq(&ev->lock);
|
||||
pending = ev->pending & mask;
|
||||
ev->pending &= ~mask;
|
||||
spin_unlock_irq(&ev->lock);
|
||||
WARN_ON_ONCE(clearing & mask);
|
||||
|
||||
return pending;
|
||||
}
|
||||
|
||||
/**
|
||||
* bdev_check_media_change - check if a removable media has been changed
|
||||
* @bdev: block device to check
|
||||
*
|
||||
* Check whether a removable media has been changed, and attempt to free all
|
||||
* dentries and inodes and invalidates all block device page cache entries in
|
||||
* that case.
|
||||
*
|
||||
* Returns %true if the block device changed, or %false if not.
|
||||
*/
|
||||
bool bdev_check_media_change(struct block_device *bdev)
|
||||
{
|
||||
unsigned int events;
|
||||
|
||||
events = disk_clear_events(bdev->bd_disk, DISK_EVENT_MEDIA_CHANGE |
|
||||
DISK_EVENT_EJECT_REQUEST);
|
||||
if (!(events & DISK_EVENT_MEDIA_CHANGE))
|
||||
return false;
|
||||
|
||||
if (__invalidate_device(bdev, true))
|
||||
pr_warn("VFS: busy inodes on changed media %s\n",
|
||||
bdev->bd_disk->disk_name);
|
||||
set_bit(GD_NEED_PART_SCAN, &bdev->bd_disk->state);
|
||||
return true;
|
||||
}
|
||||
EXPORT_SYMBOL(bdev_check_media_change);
|
||||
|
||||
/*
|
||||
* Separate this part out so that a different pointer for clearing_ptr can be
|
||||
* passed in for disk_clear_events.
|
||||
*/
|
||||
static void disk_events_workfn(struct work_struct *work)
|
||||
{
|
||||
struct delayed_work *dwork = to_delayed_work(work);
|
||||
struct disk_events *ev = container_of(dwork, struct disk_events, dwork);
|
||||
|
||||
disk_check_events(ev, &ev->clearing);
|
||||
}
|
||||
|
||||
static void disk_check_events(struct disk_events *ev,
|
||||
unsigned int *clearing_ptr)
|
||||
{
|
||||
struct gendisk *disk = ev->disk;
|
||||
char *envp[ARRAY_SIZE(disk_uevents) + 1] = { };
|
||||
unsigned int clearing = *clearing_ptr;
|
||||
unsigned int events;
|
||||
unsigned long intv;
|
||||
int nr_events = 0, i;
|
||||
|
||||
/* check events */
|
||||
events = disk->fops->check_events(disk, clearing);
|
||||
|
||||
/* accumulate pending events and schedule next poll if necessary */
|
||||
spin_lock_irq(&ev->lock);
|
||||
|
||||
events &= ~ev->pending;
|
||||
ev->pending |= events;
|
||||
*clearing_ptr &= ~clearing;
|
||||
|
||||
intv = disk_events_poll_jiffies(disk);
|
||||
if (!ev->block && intv)
|
||||
queue_delayed_work(system_freezable_power_efficient_wq,
|
||||
&ev->dwork, intv);
|
||||
|
||||
spin_unlock_irq(&ev->lock);
|
||||
|
||||
/*
|
||||
* Tell userland about new events. Only the events listed in
|
||||
* @disk->events are reported, and only if DISK_EVENT_FLAG_UEVENT
|
||||
* is set. Otherwise, events are processed internally but never
|
||||
* get reported to userland.
|
||||
*/
|
||||
for (i = 0; i < ARRAY_SIZE(disk_uevents); i++)
|
||||
if ((events & disk->events & (1 << i)) &&
|
||||
(disk->event_flags & DISK_EVENT_FLAG_UEVENT))
|
||||
envp[nr_events++] = disk_uevents[i];
|
||||
|
||||
if (nr_events)
|
||||
kobject_uevent_env(&disk_to_dev(disk)->kobj, KOBJ_CHANGE, envp);
|
||||
}
|
||||
|
||||
/*
|
||||
* A disk events enabled device has the following sysfs nodes under
|
||||
* its /sys/block/X/ directory.
|
||||
*
|
||||
* events : list of all supported events
|
||||
* events_async : list of events which can be detected w/o polling
|
||||
* (always empty, only for backwards compatibility)
|
||||
* events_poll_msecs : polling interval, 0: disable, -1: system default
|
||||
*/
|
||||
static ssize_t __disk_events_show(unsigned int events, char *buf)
|
||||
{
|
||||
const char *delim = "";
|
||||
ssize_t pos = 0;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(disk_events_strs); i++)
|
||||
if (events & (1 << i)) {
|
||||
pos += sprintf(buf + pos, "%s%s",
|
||||
delim, disk_events_strs[i]);
|
||||
delim = " ";
|
||||
}
|
||||
if (pos)
|
||||
pos += sprintf(buf + pos, "\n");
|
||||
return pos;
|
||||
}
|
||||
|
||||
static ssize_t disk_events_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
struct gendisk *disk = dev_to_disk(dev);
|
||||
|
||||
if (!(disk->event_flags & DISK_EVENT_FLAG_UEVENT))
|
||||
return 0;
|
||||
|
||||
return __disk_events_show(disk->events, buf);
|
||||
}
|
||||
|
||||
static ssize_t disk_events_async_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static ssize_t disk_events_poll_msecs_show(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
struct gendisk *disk = dev_to_disk(dev);
|
||||
|
||||
if (!disk->ev)
|
||||
return sprintf(buf, "-1\n");
|
||||
|
||||
return sprintf(buf, "%ld\n", disk->ev->poll_msecs);
|
||||
}
|
||||
|
||||
static ssize_t disk_events_poll_msecs_store(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
const char *buf, size_t count)
|
||||
{
|
||||
struct gendisk *disk = dev_to_disk(dev);
|
||||
long intv;
|
||||
|
||||
if (!count || !sscanf(buf, "%ld", &intv))
|
||||
return -EINVAL;
|
||||
|
||||
if (intv < 0 && intv != -1)
|
||||
return -EINVAL;
|
||||
|
||||
if (!disk->ev)
|
||||
return -ENODEV;
|
||||
|
||||
disk_block_events(disk);
|
||||
disk->ev->poll_msecs = intv;
|
||||
__disk_unblock_events(disk, true);
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
static const DEVICE_ATTR(events, 0444, disk_events_show, NULL);
|
||||
static const DEVICE_ATTR(events_async, 0444, disk_events_async_show, NULL);
|
||||
static const DEVICE_ATTR(events_poll_msecs, 0644,
|
||||
disk_events_poll_msecs_show,
|
||||
disk_events_poll_msecs_store);
|
||||
|
||||
static const struct attribute *disk_events_attrs[] = {
|
||||
&dev_attr_events.attr,
|
||||
&dev_attr_events_async.attr,
|
||||
&dev_attr_events_poll_msecs.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
/*
|
||||
* The default polling interval can be specified by the kernel
|
||||
* parameter block.events_dfl_poll_msecs which defaults to 0
|
||||
* (disable). This can also be modified runtime by writing to
|
||||
* /sys/module/block/parameters/events_dfl_poll_msecs.
|
||||
*/
|
||||
static int disk_events_set_dfl_poll_msecs(const char *val,
|
||||
const struct kernel_param *kp)
|
||||
{
|
||||
struct disk_events *ev;
|
||||
int ret;
|
||||
|
||||
ret = param_set_ulong(val, kp);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
mutex_lock(&disk_events_mutex);
|
||||
|
||||
list_for_each_entry(ev, &disk_events, node)
|
||||
disk_flush_events(ev->disk, 0);
|
||||
|
||||
mutex_unlock(&disk_events_mutex);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct kernel_param_ops disk_events_dfl_poll_msecs_param_ops = {
|
||||
.set = disk_events_set_dfl_poll_msecs,
|
||||
.get = param_get_ulong,
|
||||
};
|
||||
|
||||
#undef MODULE_PARAM_PREFIX
|
||||
#define MODULE_PARAM_PREFIX "block."
|
||||
|
||||
module_param_cb(events_dfl_poll_msecs, &disk_events_dfl_poll_msecs_param_ops,
|
||||
&disk_events_dfl_poll_msecs, 0644);
|
||||
|
||||
/*
|
||||
* disk_{alloc|add|del|release}_events - initialize and destroy disk_events.
|
||||
*/
|
||||
static void disk_alloc_events(struct gendisk *disk)
|
||||
{
|
||||
struct disk_events *ev;
|
||||
|
||||
if (!disk->fops->check_events || !disk->events)
|
||||
return;
|
||||
|
||||
ev = kzalloc(sizeof(*ev), GFP_KERNEL);
|
||||
if (!ev) {
|
||||
pr_warn("%s: failed to initialize events\n", disk->disk_name);
|
||||
return;
|
||||
}
|
||||
|
||||
INIT_LIST_HEAD(&ev->node);
|
||||
ev->disk = disk;
|
||||
spin_lock_init(&ev->lock);
|
||||
mutex_init(&ev->block_mutex);
|
||||
ev->block = 1;
|
||||
ev->poll_msecs = -1;
|
||||
INIT_DELAYED_WORK(&ev->dwork, disk_events_workfn);
|
||||
|
||||
disk->ev = ev;
|
||||
}
|
||||
|
||||
static void disk_add_events(struct gendisk *disk)
|
||||
{
|
||||
/* FIXME: error handling */
|
||||
if (sysfs_create_files(&disk_to_dev(disk)->kobj, disk_events_attrs) < 0)
|
||||
pr_warn("%s: failed to create sysfs files for events\n",
|
||||
disk->disk_name);
|
||||
|
||||
if (!disk->ev)
|
||||
return;
|
||||
|
||||
mutex_lock(&disk_events_mutex);
|
||||
list_add_tail(&disk->ev->node, &disk_events);
|
||||
mutex_unlock(&disk_events_mutex);
|
||||
|
||||
/*
|
||||
* Block count is initialized to 1 and the following initial
|
||||
* unblock kicks it into action.
|
||||
*/
|
||||
__disk_unblock_events(disk, true);
|
||||
}
|
||||
|
||||
static void disk_del_events(struct gendisk *disk)
|
||||
{
|
||||
if (disk->ev) {
|
||||
disk_block_events(disk);
|
||||
|
||||
mutex_lock(&disk_events_mutex);
|
||||
list_del_init(&disk->ev->node);
|
||||
mutex_unlock(&disk_events_mutex);
|
||||
}
|
||||
|
||||
sysfs_remove_files(&disk_to_dev(disk)->kobj, disk_events_attrs);
|
||||
}
|
||||
|
||||
static void disk_release_events(struct gendisk *disk)
|
||||
{
|
||||
/* the block count should be 1 from disk_del_events() */
|
||||
WARN_ON_ONCE(disk->ev && disk->ev->block != 1);
|
||||
kfree(disk->ev);
|
||||
}
|
||||
|
@ -89,7 +89,7 @@ static int blkdev_reread_part(struct block_device *bdev, fmode_t mode)
|
||||
return -EINVAL;
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EACCES;
|
||||
if (bdev->bd_part_count)
|
||||
if (bdev->bd_disk->open_partitions)
|
||||
return -EBUSY;
|
||||
|
||||
/*
|
||||
|
126
block/mq-deadline-cgroup.c
Normal file
126
block/mq-deadline-cgroup.c
Normal file
@ -0,0 +1,126 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
#include <linux/blk-cgroup.h>
|
||||
#include <linux/ioprio.h>
|
||||
|
||||
#include "mq-deadline-cgroup.h"
|
||||
|
||||
static struct blkcg_policy dd_blkcg_policy;
|
||||
|
||||
static struct blkcg_policy_data *dd_cpd_alloc(gfp_t gfp)
|
||||
{
|
||||
struct dd_blkcg *pd;
|
||||
|
||||
pd = kzalloc(sizeof(*pd), gfp);
|
||||
if (!pd)
|
||||
return NULL;
|
||||
pd->stats = alloc_percpu_gfp(typeof(*pd->stats),
|
||||
GFP_KERNEL | __GFP_ZERO);
|
||||
if (!pd->stats) {
|
||||
kfree(pd);
|
||||
return NULL;
|
||||
}
|
||||
return &pd->cpd;
|
||||
}
|
||||
|
||||
static void dd_cpd_free(struct blkcg_policy_data *cpd)
|
||||
{
|
||||
struct dd_blkcg *dd_blkcg = container_of(cpd, typeof(*dd_blkcg), cpd);
|
||||
|
||||
free_percpu(dd_blkcg->stats);
|
||||
kfree(dd_blkcg);
|
||||
}
|
||||
|
||||
static struct dd_blkcg *dd_blkcg_from_pd(struct blkg_policy_data *pd)
|
||||
{
|
||||
return container_of(blkcg_to_cpd(pd->blkg->blkcg, &dd_blkcg_policy),
|
||||
struct dd_blkcg, cpd);
|
||||
}
|
||||
|
||||
/*
|
||||
* Convert an association between a block cgroup and a request queue into a
|
||||
* pointer to the mq-deadline information associated with a (blkcg, queue) pair.
|
||||
*/
|
||||
struct dd_blkcg *dd_blkcg_from_bio(struct bio *bio)
|
||||
{
|
||||
struct blkg_policy_data *pd;
|
||||
|
||||
pd = blkg_to_pd(bio->bi_blkg, &dd_blkcg_policy);
|
||||
if (!pd)
|
||||
return NULL;
|
||||
|
||||
return dd_blkcg_from_pd(pd);
|
||||
}
|
||||
|
||||
static size_t dd_pd_stat(struct blkg_policy_data *pd, char *buf, size_t size)
|
||||
{
|
||||
static const char *const prio_class_name[] = {
|
||||
[IOPRIO_CLASS_NONE] = "NONE",
|
||||
[IOPRIO_CLASS_RT] = "RT",
|
||||
[IOPRIO_CLASS_BE] = "BE",
|
||||
[IOPRIO_CLASS_IDLE] = "IDLE",
|
||||
};
|
||||
struct dd_blkcg *blkcg = dd_blkcg_from_pd(pd);
|
||||
int res = 0;
|
||||
u8 prio;
|
||||
|
||||
for (prio = 0; prio < ARRAY_SIZE(blkcg->stats->stats); prio++)
|
||||
res += scnprintf(buf + res, size - res,
|
||||
" [%s] dispatched=%u inserted=%u merged=%u",
|
||||
prio_class_name[prio],
|
||||
ddcg_sum(blkcg, dispatched, prio) +
|
||||
ddcg_sum(blkcg, merged, prio) -
|
||||
ddcg_sum(blkcg, completed, prio),
|
||||
ddcg_sum(blkcg, inserted, prio) -
|
||||
ddcg_sum(blkcg, completed, prio),
|
||||
ddcg_sum(blkcg, merged, prio));
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
static struct blkg_policy_data *dd_pd_alloc(gfp_t gfp, struct request_queue *q,
|
||||
struct blkcg *blkcg)
|
||||
{
|
||||
struct dd_blkg *pd;
|
||||
|
||||
pd = kzalloc(sizeof(*pd), gfp);
|
||||
if (!pd)
|
||||
return NULL;
|
||||
return &pd->pd;
|
||||
}
|
||||
|
||||
static void dd_pd_free(struct blkg_policy_data *pd)
|
||||
{
|
||||
struct dd_blkg *dd_blkg = container_of(pd, typeof(*dd_blkg), pd);
|
||||
|
||||
kfree(dd_blkg);
|
||||
}
|
||||
|
||||
static struct blkcg_policy dd_blkcg_policy = {
|
||||
.cpd_alloc_fn = dd_cpd_alloc,
|
||||
.cpd_free_fn = dd_cpd_free,
|
||||
|
||||
.pd_alloc_fn = dd_pd_alloc,
|
||||
.pd_free_fn = dd_pd_free,
|
||||
.pd_stat_fn = dd_pd_stat,
|
||||
};
|
||||
|
||||
int dd_activate_policy(struct request_queue *q)
|
||||
{
|
||||
return blkcg_activate_policy(q, &dd_blkcg_policy);
|
||||
}
|
||||
|
||||
void dd_deactivate_policy(struct request_queue *q)
|
||||
{
|
||||
blkcg_deactivate_policy(q, &dd_blkcg_policy);
|
||||
}
|
||||
|
||||
int __init dd_blkcg_init(void)
|
||||
{
|
||||
return blkcg_policy_register(&dd_blkcg_policy);
|
||||
}
|
||||
|
||||
void __exit dd_blkcg_exit(void)
|
||||
{
|
||||
blkcg_policy_unregister(&dd_blkcg_policy);
|
||||
}
|
114
block/mq-deadline-cgroup.h
Normal file
114
block/mq-deadline-cgroup.h
Normal file
@ -0,0 +1,114 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
|
||||
#if !defined(_MQ_DEADLINE_CGROUP_H_)
|
||||
#define _MQ_DEADLINE_CGROUP_H_
|
||||
|
||||
#include <linux/blk-cgroup.h>
|
||||
|
||||
struct request_queue;
|
||||
|
||||
/**
|
||||
* struct io_stats_per_prio - I/O statistics per I/O priority class.
|
||||
* @inserted: Number of inserted requests.
|
||||
* @merged: Number of merged requests.
|
||||
* @dispatched: Number of dispatched requests.
|
||||
* @completed: Number of I/O completions.
|
||||
*/
|
||||
struct io_stats_per_prio {
|
||||
local_t inserted;
|
||||
local_t merged;
|
||||
local_t dispatched;
|
||||
local_t completed;
|
||||
};
|
||||
|
||||
/* I/O statistics per I/O cgroup per I/O priority class (IOPRIO_CLASS_*). */
|
||||
struct blkcg_io_stats {
|
||||
struct io_stats_per_prio stats[4];
|
||||
};
|
||||
|
||||
/**
|
||||
* struct dd_blkcg - Per cgroup data.
|
||||
* @cpd: blkcg_policy_data structure.
|
||||
* @stats: I/O statistics.
|
||||
*/
|
||||
struct dd_blkcg {
|
||||
struct blkcg_policy_data cpd; /* must be the first member */
|
||||
struct blkcg_io_stats __percpu *stats;
|
||||
};
|
||||
|
||||
/*
|
||||
* Count one event of type 'event_type' and with I/O priority class
|
||||
* 'prio_class'.
|
||||
*/
|
||||
#define ddcg_count(ddcg, event_type, prio_class) do { \
|
||||
if (ddcg) { \
|
||||
struct blkcg_io_stats *io_stats = get_cpu_ptr((ddcg)->stats); \
|
||||
\
|
||||
BUILD_BUG_ON(!__same_type((ddcg), struct dd_blkcg *)); \
|
||||
BUILD_BUG_ON(!__same_type((prio_class), u8)); \
|
||||
local_inc(&io_stats->stats[(prio_class)].event_type); \
|
||||
put_cpu_ptr(io_stats); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
* Returns the total number of ddcg_count(ddcg, event_type, prio_class) calls
|
||||
* across all CPUs. No locking or barriers since it is fine if the returned
|
||||
* sum is slightly outdated.
|
||||
*/
|
||||
#define ddcg_sum(ddcg, event_type, prio) ({ \
|
||||
unsigned int cpu; \
|
||||
u32 sum = 0; \
|
||||
\
|
||||
BUILD_BUG_ON(!__same_type((ddcg), struct dd_blkcg *)); \
|
||||
BUILD_BUG_ON(!__same_type((prio), u8)); \
|
||||
for_each_present_cpu(cpu) \
|
||||
sum += local_read(&per_cpu_ptr((ddcg)->stats, cpu)-> \
|
||||
stats[(prio)].event_type); \
|
||||
sum; \
|
||||
})
|
||||
|
||||
#ifdef CONFIG_BLK_CGROUP
|
||||
|
||||
/**
|
||||
* struct dd_blkg - Per (cgroup, request queue) data.
|
||||
* @pd: blkg_policy_data structure.
|
||||
*/
|
||||
struct dd_blkg {
|
||||
struct blkg_policy_data pd; /* must be the first member */
|
||||
};
|
||||
|
||||
struct dd_blkcg *dd_blkcg_from_bio(struct bio *bio);
|
||||
int dd_activate_policy(struct request_queue *q);
|
||||
void dd_deactivate_policy(struct request_queue *q);
|
||||
int __init dd_blkcg_init(void);
|
||||
void __exit dd_blkcg_exit(void);
|
||||
|
||||
#else /* CONFIG_BLK_CGROUP */
|
||||
|
||||
static inline struct dd_blkcg *dd_blkcg_from_bio(struct bio *bio)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline int dd_activate_policy(struct request_queue *q)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void dd_deactivate_policy(struct request_queue *q)
|
||||
{
|
||||
}
|
||||
|
||||
static inline int dd_blkcg_init(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void dd_blkcg_exit(void)
|
||||
{
|
||||
}
|
||||
|
||||
#endif /* CONFIG_BLK_CGROUP */
|
||||
|
||||
#endif /* _MQ_DEADLINE_CGROUP_H_ */
|
1175
block/mq-deadline-main.c
Normal file
1175
block/mq-deadline-main.c
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,815 +0,0 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* MQ Deadline i/o scheduler - adaptation of the legacy deadline scheduler,
|
||||
* for the blk-mq scheduling framework
|
||||
*
|
||||
* Copyright (C) 2016 Jens Axboe <axboe@kernel.dk>
|
||||
*/
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/blk-mq.h>
|
||||
#include <linux/elevator.h>
|
||||
#include <linux/bio.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/compiler.h>
|
||||
#include <linux/rbtree.h>
|
||||
#include <linux/sbitmap.h>
|
||||
|
||||
#include <trace/events/block.h>
|
||||
|
||||
#include "blk.h"
|
||||
#include "blk-mq.h"
|
||||
#include "blk-mq-debugfs.h"
|
||||
#include "blk-mq-tag.h"
|
||||
#include "blk-mq-sched.h"
|
||||
|
||||
/*
|
||||
* See Documentation/block/deadline-iosched.rst
|
||||
*/
|
||||
static const int read_expire = HZ / 2; /* max time before a read is submitted. */
|
||||
static const int write_expire = 5 * HZ; /* ditto for writes, these limits are SOFT! */
|
||||
static const int writes_starved = 2; /* max times reads can starve a write */
|
||||
static const int fifo_batch = 16; /* # of sequential requests treated as one
|
||||
by the above parameters. For throughput. */
|
||||
|
||||
struct deadline_data {
|
||||
/*
|
||||
* run time data
|
||||
*/
|
||||
|
||||
/*
|
||||
* requests (deadline_rq s) are present on both sort_list and fifo_list
|
||||
*/
|
||||
struct rb_root sort_list[2];
|
||||
struct list_head fifo_list[2];
|
||||
|
||||
/*
|
||||
* next in sort order. read, write or both are NULL
|
||||
*/
|
||||
struct request *next_rq[2];
|
||||
unsigned int batching; /* number of sequential requests made */
|
||||
unsigned int starved; /* times reads have starved writes */
|
||||
|
||||
/*
|
||||
* settings that change how the i/o scheduler behaves
|
||||
*/
|
||||
int fifo_expire[2];
|
||||
int fifo_batch;
|
||||
int writes_starved;
|
||||
int front_merges;
|
||||
|
||||
spinlock_t lock;
|
||||
spinlock_t zone_lock;
|
||||
struct list_head dispatch;
|
||||
};
|
||||
|
||||
static inline struct rb_root *
|
||||
deadline_rb_root(struct deadline_data *dd, struct request *rq)
|
||||
{
|
||||
return &dd->sort_list[rq_data_dir(rq)];
|
||||
}
|
||||
|
||||
/*
|
||||
* get the request after `rq' in sector-sorted order
|
||||
*/
|
||||
static inline struct request *
|
||||
deadline_latter_request(struct request *rq)
|
||||
{
|
||||
struct rb_node *node = rb_next(&rq->rb_node);
|
||||
|
||||
if (node)
|
||||
return rb_entry_rq(node);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void
|
||||
deadline_add_rq_rb(struct deadline_data *dd, struct request *rq)
|
||||
{
|
||||
struct rb_root *root = deadline_rb_root(dd, rq);
|
||||
|
||||
elv_rb_add(root, rq);
|
||||
}
|
||||
|
||||
static inline void
|
||||
deadline_del_rq_rb(struct deadline_data *dd, struct request *rq)
|
||||
{
|
||||
const int data_dir = rq_data_dir(rq);
|
||||
|
||||
if (dd->next_rq[data_dir] == rq)
|
||||
dd->next_rq[data_dir] = deadline_latter_request(rq);
|
||||
|
||||
elv_rb_del(deadline_rb_root(dd, rq), rq);
|
||||
}
|
||||
|
||||
/*
|
||||
* remove rq from rbtree and fifo.
|
||||
*/
|
||||
static void deadline_remove_request(struct request_queue *q, struct request *rq)
|
||||
{
|
||||
struct deadline_data *dd = q->elevator->elevator_data;
|
||||
|
||||
list_del_init(&rq->queuelist);
|
||||
|
||||
/*
|
||||
* We might not be on the rbtree, if we are doing an insert merge
|
||||
*/
|
||||
if (!RB_EMPTY_NODE(&rq->rb_node))
|
||||
deadline_del_rq_rb(dd, rq);
|
||||
|
||||
elv_rqhash_del(q, rq);
|
||||
if (q->last_merge == rq)
|
||||
q->last_merge = NULL;
|
||||
}
|
||||
|
||||
static void dd_request_merged(struct request_queue *q, struct request *req,
|
||||
enum elv_merge type)
|
||||
{
|
||||
struct deadline_data *dd = q->elevator->elevator_data;
|
||||
|
||||
/*
|
||||
* if the merge was a front merge, we need to reposition request
|
||||
*/
|
||||
if (type == ELEVATOR_FRONT_MERGE) {
|
||||
elv_rb_del(deadline_rb_root(dd, req), req);
|
||||
deadline_add_rq_rb(dd, req);
|
||||
}
|
||||
}
|
||||
|
||||
static void dd_merged_requests(struct request_queue *q, struct request *req,
|
||||
struct request *next)
|
||||
{
|
||||
/*
|
||||
* if next expires before rq, assign its expire time to rq
|
||||
* and move into next position (next will be deleted) in fifo
|
||||
*/
|
||||
if (!list_empty(&req->queuelist) && !list_empty(&next->queuelist)) {
|
||||
if (time_before((unsigned long)next->fifo_time,
|
||||
(unsigned long)req->fifo_time)) {
|
||||
list_move(&req->queuelist, &next->queuelist);
|
||||
req->fifo_time = next->fifo_time;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* kill knowledge of next, this one is a goner
|
||||
*/
|
||||
deadline_remove_request(q, next);
|
||||
}
|
||||
|
||||
/*
|
||||
* move an entry to dispatch queue
|
||||
*/
|
||||
static void
|
||||
deadline_move_request(struct deadline_data *dd, struct request *rq)
|
||||
{
|
||||
const int data_dir = rq_data_dir(rq);
|
||||
|
||||
dd->next_rq[READ] = NULL;
|
||||
dd->next_rq[WRITE] = NULL;
|
||||
dd->next_rq[data_dir] = deadline_latter_request(rq);
|
||||
|
||||
/*
|
||||
* take it off the sort and fifo list
|
||||
*/
|
||||
deadline_remove_request(rq->q, rq);
|
||||
}
|
||||
|
||||
/*
|
||||
* deadline_check_fifo returns 0 if there are no expired requests on the fifo,
|
||||
* 1 otherwise. Requires !list_empty(&dd->fifo_list[data_dir])
|
||||
*/
|
||||
static inline int deadline_check_fifo(struct deadline_data *dd, int ddir)
|
||||
{
|
||||
struct request *rq = rq_entry_fifo(dd->fifo_list[ddir].next);
|
||||
|
||||
/*
|
||||
* rq is expired!
|
||||
*/
|
||||
if (time_after_eq(jiffies, (unsigned long)rq->fifo_time))
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* For the specified data direction, return the next request to
|
||||
* dispatch using arrival ordered lists.
|
||||
*/
|
||||
static struct request *
|
||||
deadline_fifo_request(struct deadline_data *dd, int data_dir)
|
||||
{
|
||||
struct request *rq;
|
||||
unsigned long flags;
|
||||
|
||||
if (WARN_ON_ONCE(data_dir != READ && data_dir != WRITE))
|
||||
return NULL;
|
||||
|
||||
if (list_empty(&dd->fifo_list[data_dir]))
|
||||
return NULL;
|
||||
|
||||
rq = rq_entry_fifo(dd->fifo_list[data_dir].next);
|
||||
if (data_dir == READ || !blk_queue_is_zoned(rq->q))
|
||||
return rq;
|
||||
|
||||
/*
|
||||
* Look for a write request that can be dispatched, that is one with
|
||||
* an unlocked target zone.
|
||||
*/
|
||||
spin_lock_irqsave(&dd->zone_lock, flags);
|
||||
list_for_each_entry(rq, &dd->fifo_list[WRITE], queuelist) {
|
||||
if (blk_req_can_dispatch_to_zone(rq))
|
||||
goto out;
|
||||
}
|
||||
rq = NULL;
|
||||
out:
|
||||
spin_unlock_irqrestore(&dd->zone_lock, flags);
|
||||
|
||||
return rq;
|
||||
}
|
||||
|
||||
/*
|
||||
* For the specified data direction, return the next request to
|
||||
* dispatch using sector position sorted lists.
|
||||
*/
|
||||
static struct request *
|
||||
deadline_next_request(struct deadline_data *dd, int data_dir)
|
||||
{
|
||||
struct request *rq;
|
||||
unsigned long flags;
|
||||
|
||||
if (WARN_ON_ONCE(data_dir != READ && data_dir != WRITE))
|
||||
return NULL;
|
||||
|
||||
rq = dd->next_rq[data_dir];
|
||||
if (!rq)
|
||||
return NULL;
|
||||
|
||||
if (data_dir == READ || !blk_queue_is_zoned(rq->q))
|
||||
return rq;
|
||||
|
||||
/*
|
||||
* Look for a write request that can be dispatched, that is one with
|
||||
* an unlocked target zone.
|
||||
*/
|
||||
spin_lock_irqsave(&dd->zone_lock, flags);
|
||||
while (rq) {
|
||||
if (blk_req_can_dispatch_to_zone(rq))
|
||||
break;
|
||||
rq = deadline_latter_request(rq);
|
||||
}
|
||||
spin_unlock_irqrestore(&dd->zone_lock, flags);
|
||||
|
||||
return rq;
|
||||
}
|
||||
|
||||
/*
|
||||
* deadline_dispatch_requests selects the best request according to
|
||||
* read/write expire, fifo_batch, etc
|
||||
*/
|
||||
static struct request *__dd_dispatch_request(struct deadline_data *dd)
|
||||
{
|
||||
struct request *rq, *next_rq;
|
||||
bool reads, writes;
|
||||
int data_dir;
|
||||
|
||||
if (!list_empty(&dd->dispatch)) {
|
||||
rq = list_first_entry(&dd->dispatch, struct request, queuelist);
|
||||
list_del_init(&rq->queuelist);
|
||||
goto done;
|
||||
}
|
||||
|
||||
reads = !list_empty(&dd->fifo_list[READ]);
|
||||
writes = !list_empty(&dd->fifo_list[WRITE]);
|
||||
|
||||
/*
|
||||
* batches are currently reads XOR writes
|
||||
*/
|
||||
rq = deadline_next_request(dd, WRITE);
|
||||
if (!rq)
|
||||
rq = deadline_next_request(dd, READ);
|
||||
|
||||
if (rq && dd->batching < dd->fifo_batch)
|
||||
/* we have a next request are still entitled to batch */
|
||||
goto dispatch_request;
|
||||
|
||||
/*
|
||||
* at this point we are not running a batch. select the appropriate
|
||||
* data direction (read / write)
|
||||
*/
|
||||
|
||||
if (reads) {
|
||||
BUG_ON(RB_EMPTY_ROOT(&dd->sort_list[READ]));
|
||||
|
||||
if (deadline_fifo_request(dd, WRITE) &&
|
||||
(dd->starved++ >= dd->writes_starved))
|
||||
goto dispatch_writes;
|
||||
|
||||
data_dir = READ;
|
||||
|
||||
goto dispatch_find_request;
|
||||
}
|
||||
|
||||
/*
|
||||
* there are either no reads or writes have been starved
|
||||
*/
|
||||
|
||||
if (writes) {
|
||||
dispatch_writes:
|
||||
BUG_ON(RB_EMPTY_ROOT(&dd->sort_list[WRITE]));
|
||||
|
||||
dd->starved = 0;
|
||||
|
||||
data_dir = WRITE;
|
||||
|
||||
goto dispatch_find_request;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
|
||||
dispatch_find_request:
|
||||
/*
|
||||
* we are not running a batch, find best request for selected data_dir
|
||||
*/
|
||||
next_rq = deadline_next_request(dd, data_dir);
|
||||
if (deadline_check_fifo(dd, data_dir) || !next_rq) {
|
||||
/*
|
||||
* A deadline has expired, the last request was in the other
|
||||
* direction, or we have run out of higher-sectored requests.
|
||||
* Start again from the request with the earliest expiry time.
|
||||
*/
|
||||
rq = deadline_fifo_request(dd, data_dir);
|
||||
} else {
|
||||
/*
|
||||
* The last req was the same dir and we have a next request in
|
||||
* sort order. No expired requests so continue on from here.
|
||||
*/
|
||||
rq = next_rq;
|
||||
}
|
||||
|
||||
/*
|
||||
* For a zoned block device, if we only have writes queued and none of
|
||||
* them can be dispatched, rq will be NULL.
|
||||
*/
|
||||
if (!rq)
|
||||
return NULL;
|
||||
|
||||
dd->batching = 0;
|
||||
|
||||
dispatch_request:
|
||||
/*
|
||||
* rq is the selected appropriate request.
|
||||
*/
|
||||
dd->batching++;
|
||||
deadline_move_request(dd, rq);
|
||||
done:
|
||||
/*
|
||||
* If the request needs its target zone locked, do it.
|
||||
*/
|
||||
blk_req_zone_write_lock(rq);
|
||||
rq->rq_flags |= RQF_STARTED;
|
||||
return rq;
|
||||
}
|
||||
|
||||
/*
|
||||
* One confusing aspect here is that we get called for a specific
|
||||
* hardware queue, but we may return a request that is for a
|
||||
* different hardware queue. This is because mq-deadline has shared
|
||||
* state for all hardware queues, in terms of sorting, FIFOs, etc.
|
||||
*/
|
||||
static struct request *dd_dispatch_request(struct blk_mq_hw_ctx *hctx)
|
||||
{
|
||||
struct deadline_data *dd = hctx->queue->elevator->elevator_data;
|
||||
struct request *rq;
|
||||
|
||||
spin_lock(&dd->lock);
|
||||
rq = __dd_dispatch_request(dd);
|
||||
spin_unlock(&dd->lock);
|
||||
|
||||
return rq;
|
||||
}
|
||||
|
||||
static void dd_exit_queue(struct elevator_queue *e)
|
||||
{
|
||||
struct deadline_data *dd = e->elevator_data;
|
||||
|
||||
BUG_ON(!list_empty(&dd->fifo_list[READ]));
|
||||
BUG_ON(!list_empty(&dd->fifo_list[WRITE]));
|
||||
|
||||
kfree(dd);
|
||||
}
|
||||
|
||||
/*
|
||||
* initialize elevator private data (deadline_data).
|
||||
*/
|
||||
static int dd_init_queue(struct request_queue *q, struct elevator_type *e)
|
||||
{
|
||||
struct deadline_data *dd;
|
||||
struct elevator_queue *eq;
|
||||
|
||||
eq = elevator_alloc(q, e);
|
||||
if (!eq)
|
||||
return -ENOMEM;
|
||||
|
||||
dd = kzalloc_node(sizeof(*dd), GFP_KERNEL, q->node);
|
||||
if (!dd) {
|
||||
kobject_put(&eq->kobj);
|
||||
return -ENOMEM;
|
||||
}
|
||||
eq->elevator_data = dd;
|
||||
|
||||
INIT_LIST_HEAD(&dd->fifo_list[READ]);
|
||||
INIT_LIST_HEAD(&dd->fifo_list[WRITE]);
|
||||
dd->sort_list[READ] = RB_ROOT;
|
||||
dd->sort_list[WRITE] = RB_ROOT;
|
||||
dd->fifo_expire[READ] = read_expire;
|
||||
dd->fifo_expire[WRITE] = write_expire;
|
||||
dd->writes_starved = writes_starved;
|
||||
dd->front_merges = 1;
|
||||
dd->fifo_batch = fifo_batch;
|
||||
spin_lock_init(&dd->lock);
|
||||
spin_lock_init(&dd->zone_lock);
|
||||
INIT_LIST_HEAD(&dd->dispatch);
|
||||
|
||||
q->elevator = eq;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int dd_request_merge(struct request_queue *q, struct request **rq,
|
||||
struct bio *bio)
|
||||
{
|
||||
struct deadline_data *dd = q->elevator->elevator_data;
|
||||
sector_t sector = bio_end_sector(bio);
|
||||
struct request *__rq;
|
||||
|
||||
if (!dd->front_merges)
|
||||
return ELEVATOR_NO_MERGE;
|
||||
|
||||
__rq = elv_rb_find(&dd->sort_list[bio_data_dir(bio)], sector);
|
||||
if (__rq) {
|
||||
BUG_ON(sector != blk_rq_pos(__rq));
|
||||
|
||||
if (elv_bio_merge_ok(__rq, bio)) {
|
||||
*rq = __rq;
|
||||
return ELEVATOR_FRONT_MERGE;
|
||||
}
|
||||
}
|
||||
|
||||
return ELEVATOR_NO_MERGE;
|
||||
}
|
||||
|
||||
static bool dd_bio_merge(struct request_queue *q, struct bio *bio,
|
||||
unsigned int nr_segs)
|
||||
{
|
||||
struct deadline_data *dd = q->elevator->elevator_data;
|
||||
struct request *free = NULL;
|
||||
bool ret;
|
||||
|
||||
spin_lock(&dd->lock);
|
||||
ret = blk_mq_sched_try_merge(q, bio, nr_segs, &free);
|
||||
spin_unlock(&dd->lock);
|
||||
|
||||
if (free)
|
||||
blk_mq_free_request(free);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* add rq to rbtree and fifo
|
||||
*/
|
||||
static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
|
||||
bool at_head)
|
||||
{
|
||||
struct request_queue *q = hctx->queue;
|
||||
struct deadline_data *dd = q->elevator->elevator_data;
|
||||
const int data_dir = rq_data_dir(rq);
|
||||
|
||||
/*
|
||||
* This may be a requeue of a write request that has locked its
|
||||
* target zone. If it is the case, this releases the zone lock.
|
||||
*/
|
||||
blk_req_zone_write_unlock(rq);
|
||||
|
||||
if (blk_mq_sched_try_insert_merge(q, rq))
|
||||
return;
|
||||
|
||||
trace_block_rq_insert(rq);
|
||||
|
||||
if (at_head) {
|
||||
list_add(&rq->queuelist, &dd->dispatch);
|
||||
} else {
|
||||
deadline_add_rq_rb(dd, rq);
|
||||
|
||||
if (rq_mergeable(rq)) {
|
||||
elv_rqhash_add(q, rq);
|
||||
if (!q->last_merge)
|
||||
q->last_merge = rq;
|
||||
}
|
||||
|
||||
/*
|
||||
* set expire time and add to fifo list
|
||||
*/
|
||||
rq->fifo_time = jiffies + dd->fifo_expire[data_dir];
|
||||
list_add_tail(&rq->queuelist, &dd->fifo_list[data_dir]);
|
||||
}
|
||||
}
|
||||
|
||||
static void dd_insert_requests(struct blk_mq_hw_ctx *hctx,
|
||||
struct list_head *list, bool at_head)
|
||||
{
|
||||
struct request_queue *q = hctx->queue;
|
||||
struct deadline_data *dd = q->elevator->elevator_data;
|
||||
|
||||
spin_lock(&dd->lock);
|
||||
while (!list_empty(list)) {
|
||||
struct request *rq;
|
||||
|
||||
rq = list_first_entry(list, struct request, queuelist);
|
||||
list_del_init(&rq->queuelist);
|
||||
dd_insert_request(hctx, rq, at_head);
|
||||
}
|
||||
spin_unlock(&dd->lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Nothing to do here. This is defined only to ensure that .finish_request
|
||||
* method is called upon request completion.
|
||||
*/
|
||||
static void dd_prepare_request(struct request *rq)
|
||||
{
|
||||
}
|
||||
|
||||
/*
|
||||
* For zoned block devices, write unlock the target zone of
|
||||
* completed write requests. Do this while holding the zone lock
|
||||
* spinlock so that the zone is never unlocked while deadline_fifo_request()
|
||||
* or deadline_next_request() are executing. This function is called for
|
||||
* all requests, whether or not these requests complete successfully.
|
||||
*
|
||||
* For a zoned block device, __dd_dispatch_request() may have stopped
|
||||
* dispatching requests if all the queued requests are write requests directed
|
||||
* at zones that are already locked due to on-going write requests. To ensure
|
||||
* write request dispatch progress in this case, mark the queue as needing a
|
||||
* restart to ensure that the queue is run again after completion of the
|
||||
* request and zones being unlocked.
|
||||
*/
|
||||
static void dd_finish_request(struct request *rq)
|
||||
{
|
||||
struct request_queue *q = rq->q;
|
||||
|
||||
if (blk_queue_is_zoned(q)) {
|
||||
struct deadline_data *dd = q->elevator->elevator_data;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&dd->zone_lock, flags);
|
||||
blk_req_zone_write_unlock(rq);
|
||||
if (!list_empty(&dd->fifo_list[WRITE]))
|
||||
blk_mq_sched_mark_restart_hctx(rq->mq_hctx);
|
||||
spin_unlock_irqrestore(&dd->zone_lock, flags);
|
||||
}
|
||||
}
|
||||
|
||||
static bool dd_has_work(struct blk_mq_hw_ctx *hctx)
|
||||
{
|
||||
struct deadline_data *dd = hctx->queue->elevator->elevator_data;
|
||||
|
||||
return !list_empty_careful(&dd->dispatch) ||
|
||||
!list_empty_careful(&dd->fifo_list[0]) ||
|
||||
!list_empty_careful(&dd->fifo_list[1]);
|
||||
}
|
||||
|
||||
/*
|
||||
* sysfs parts below
|
||||
*/
|
||||
static ssize_t
|
||||
deadline_var_show(int var, char *page)
|
||||
{
|
||||
return sprintf(page, "%d\n", var);
|
||||
}
|
||||
|
||||
static void
|
||||
deadline_var_store(int *var, const char *page)
|
||||
{
|
||||
char *p = (char *) page;
|
||||
|
||||
*var = simple_strtol(p, &p, 10);
|
||||
}
|
||||
|
||||
#define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \
|
||||
static ssize_t __FUNC(struct elevator_queue *e, char *page) \
|
||||
{ \
|
||||
struct deadline_data *dd = e->elevator_data; \
|
||||
int __data = __VAR; \
|
||||
if (__CONV) \
|
||||
__data = jiffies_to_msecs(__data); \
|
||||
return deadline_var_show(__data, (page)); \
|
||||
}
|
||||
SHOW_FUNCTION(deadline_read_expire_show, dd->fifo_expire[READ], 1);
|
||||
SHOW_FUNCTION(deadline_write_expire_show, dd->fifo_expire[WRITE], 1);
|
||||
SHOW_FUNCTION(deadline_writes_starved_show, dd->writes_starved, 0);
|
||||
SHOW_FUNCTION(deadline_front_merges_show, dd->front_merges, 0);
|
||||
SHOW_FUNCTION(deadline_fifo_batch_show, dd->fifo_batch, 0);
|
||||
#undef SHOW_FUNCTION
|
||||
|
||||
#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \
|
||||
static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count) \
|
||||
{ \
|
||||
struct deadline_data *dd = e->elevator_data; \
|
||||
int __data; \
|
||||
deadline_var_store(&__data, (page)); \
|
||||
if (__data < (MIN)) \
|
||||
__data = (MIN); \
|
||||
else if (__data > (MAX)) \
|
||||
__data = (MAX); \
|
||||
if (__CONV) \
|
||||
*(__PTR) = msecs_to_jiffies(__data); \
|
||||
else \
|
||||
*(__PTR) = __data; \
|
||||
return count; \
|
||||
}
|
||||
STORE_FUNCTION(deadline_read_expire_store, &dd->fifo_expire[READ], 0, INT_MAX, 1);
|
||||
STORE_FUNCTION(deadline_write_expire_store, &dd->fifo_expire[WRITE], 0, INT_MAX, 1);
|
||||
STORE_FUNCTION(deadline_writes_starved_store, &dd->writes_starved, INT_MIN, INT_MAX, 0);
|
||||
STORE_FUNCTION(deadline_front_merges_store, &dd->front_merges, 0, 1, 0);
|
||||
STORE_FUNCTION(deadline_fifo_batch_store, &dd->fifo_batch, 0, INT_MAX, 0);
|
||||
#undef STORE_FUNCTION
|
||||
|
||||
#define DD_ATTR(name) \
|
||||
__ATTR(name, 0644, deadline_##name##_show, deadline_##name##_store)
|
||||
|
||||
static struct elv_fs_entry deadline_attrs[] = {
|
||||
DD_ATTR(read_expire),
|
||||
DD_ATTR(write_expire),
|
||||
DD_ATTR(writes_starved),
|
||||
DD_ATTR(front_merges),
|
||||
DD_ATTR(fifo_batch),
|
||||
__ATTR_NULL
|
||||
};
|
||||
|
||||
#ifdef CONFIG_BLK_DEBUG_FS
|
||||
#define DEADLINE_DEBUGFS_DDIR_ATTRS(ddir, name) \
|
||||
static void *deadline_##name##_fifo_start(struct seq_file *m, \
|
||||
loff_t *pos) \
|
||||
__acquires(&dd->lock) \
|
||||
{ \
|
||||
struct request_queue *q = m->private; \
|
||||
struct deadline_data *dd = q->elevator->elevator_data; \
|
||||
\
|
||||
spin_lock(&dd->lock); \
|
||||
return seq_list_start(&dd->fifo_list[ddir], *pos); \
|
||||
} \
|
||||
\
|
||||
static void *deadline_##name##_fifo_next(struct seq_file *m, void *v, \
|
||||
loff_t *pos) \
|
||||
{ \
|
||||
struct request_queue *q = m->private; \
|
||||
struct deadline_data *dd = q->elevator->elevator_data; \
|
||||
\
|
||||
return seq_list_next(v, &dd->fifo_list[ddir], pos); \
|
||||
} \
|
||||
\
|
||||
static void deadline_##name##_fifo_stop(struct seq_file *m, void *v) \
|
||||
__releases(&dd->lock) \
|
||||
{ \
|
||||
struct request_queue *q = m->private; \
|
||||
struct deadline_data *dd = q->elevator->elevator_data; \
|
||||
\
|
||||
spin_unlock(&dd->lock); \
|
||||
} \
|
||||
\
|
||||
static const struct seq_operations deadline_##name##_fifo_seq_ops = { \
|
||||
.start = deadline_##name##_fifo_start, \
|
||||
.next = deadline_##name##_fifo_next, \
|
||||
.stop = deadline_##name##_fifo_stop, \
|
||||
.show = blk_mq_debugfs_rq_show, \
|
||||
}; \
|
||||
\
|
||||
static int deadline_##name##_next_rq_show(void *data, \
|
||||
struct seq_file *m) \
|
||||
{ \
|
||||
struct request_queue *q = data; \
|
||||
struct deadline_data *dd = q->elevator->elevator_data; \
|
||||
struct request *rq = dd->next_rq[ddir]; \
|
||||
\
|
||||
if (rq) \
|
||||
__blk_mq_debugfs_rq_show(m, rq); \
|
||||
return 0; \
|
||||
}
|
||||
DEADLINE_DEBUGFS_DDIR_ATTRS(READ, read)
|
||||
DEADLINE_DEBUGFS_DDIR_ATTRS(WRITE, write)
|
||||
#undef DEADLINE_DEBUGFS_DDIR_ATTRS
|
||||
|
||||
static int deadline_batching_show(void *data, struct seq_file *m)
|
||||
{
|
||||
struct request_queue *q = data;
|
||||
struct deadline_data *dd = q->elevator->elevator_data;
|
||||
|
||||
seq_printf(m, "%u\n", dd->batching);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int deadline_starved_show(void *data, struct seq_file *m)
|
||||
{
|
||||
struct request_queue *q = data;
|
||||
struct deadline_data *dd = q->elevator->elevator_data;
|
||||
|
||||
seq_printf(m, "%u\n", dd->starved);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void *deadline_dispatch_start(struct seq_file *m, loff_t *pos)
|
||||
__acquires(&dd->lock)
|
||||
{
|
||||
struct request_queue *q = m->private;
|
||||
struct deadline_data *dd = q->elevator->elevator_data;
|
||||
|
||||
spin_lock(&dd->lock);
|
||||
return seq_list_start(&dd->dispatch, *pos);
|
||||
}
|
||||
|
||||
static void *deadline_dispatch_next(struct seq_file *m, void *v, loff_t *pos)
|
||||
{
|
||||
struct request_queue *q = m->private;
|
||||
struct deadline_data *dd = q->elevator->elevator_data;
|
||||
|
||||
return seq_list_next(v, &dd->dispatch, pos);
|
||||
}
|
||||
|
||||
static void deadline_dispatch_stop(struct seq_file *m, void *v)
|
||||
__releases(&dd->lock)
|
||||
{
|
||||
struct request_queue *q = m->private;
|
||||
struct deadline_data *dd = q->elevator->elevator_data;
|
||||
|
||||
spin_unlock(&dd->lock);
|
||||
}
|
||||
|
||||
static const struct seq_operations deadline_dispatch_seq_ops = {
|
||||
.start = deadline_dispatch_start,
|
||||
.next = deadline_dispatch_next,
|
||||
.stop = deadline_dispatch_stop,
|
||||
.show = blk_mq_debugfs_rq_show,
|
||||
};
|
||||
|
||||
#define DEADLINE_QUEUE_DDIR_ATTRS(name) \
|
||||
{#name "_fifo_list", 0400, .seq_ops = &deadline_##name##_fifo_seq_ops}, \
|
||||
{#name "_next_rq", 0400, deadline_##name##_next_rq_show}
|
||||
static const struct blk_mq_debugfs_attr deadline_queue_debugfs_attrs[] = {
|
||||
DEADLINE_QUEUE_DDIR_ATTRS(read),
|
||||
DEADLINE_QUEUE_DDIR_ATTRS(write),
|
||||
{"batching", 0400, deadline_batching_show},
|
||||
{"starved", 0400, deadline_starved_show},
|
||||
{"dispatch", 0400, .seq_ops = &deadline_dispatch_seq_ops},
|
||||
{},
|
||||
};
|
||||
#undef DEADLINE_QUEUE_DDIR_ATTRS
|
||||
#endif
|
||||
|
||||
static struct elevator_type mq_deadline = {
|
||||
.ops = {
|
||||
.insert_requests = dd_insert_requests,
|
||||
.dispatch_request = dd_dispatch_request,
|
||||
.prepare_request = dd_prepare_request,
|
||||
.finish_request = dd_finish_request,
|
||||
.next_request = elv_rb_latter_request,
|
||||
.former_request = elv_rb_former_request,
|
||||
.bio_merge = dd_bio_merge,
|
||||
.request_merge = dd_request_merge,
|
||||
.requests_merged = dd_merged_requests,
|
||||
.request_merged = dd_request_merged,
|
||||
.has_work = dd_has_work,
|
||||
.init_sched = dd_init_queue,
|
||||
.exit_sched = dd_exit_queue,
|
||||
},
|
||||
|
||||
#ifdef CONFIG_BLK_DEBUG_FS
|
||||
.queue_debugfs_attrs = deadline_queue_debugfs_attrs,
|
||||
#endif
|
||||
.elevator_attrs = deadline_attrs,
|
||||
.elevator_name = "mq-deadline",
|
||||
.elevator_alias = "deadline",
|
||||
.elevator_features = ELEVATOR_F_ZBD_SEQ_WRITE,
|
||||
.elevator_owner = THIS_MODULE,
|
||||
};
|
||||
MODULE_ALIAS("mq-deadline-iosched");
|
||||
|
||||
static int __init deadline_init(void)
|
||||
{
|
||||
return elv_register(&mq_deadline);
|
||||
}
|
||||
|
||||
static void __exit deadline_exit(void)
|
||||
{
|
||||
elv_unregister(&mq_deadline);
|
||||
}
|
||||
|
||||
module_init(deadline_init);
|
||||
module_exit(deadline_exit);
|
||||
|
||||
MODULE_AUTHOR("Jens Axboe");
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_DESCRIPTION("MQ deadline IO scheduler");
|
@ -120,8 +120,7 @@ static void free_partitions(struct parsed_partitions *state)
|
||||
kfree(state);
|
||||
}
|
||||
|
||||
static struct parsed_partitions *check_partition(struct gendisk *hd,
|
||||
struct block_device *bdev)
|
||||
static struct parsed_partitions *check_partition(struct gendisk *hd)
|
||||
{
|
||||
struct parsed_partitions *state;
|
||||
int i, res, err;
|
||||
@ -136,7 +135,7 @@ static struct parsed_partitions *check_partition(struct gendisk *hd,
|
||||
}
|
||||
state->pp_buf[0] = '\0';
|
||||
|
||||
state->bdev = bdev;
|
||||
state->bdev = hd->part0;
|
||||
disk_name(hd, 0, state->name);
|
||||
snprintf(state->pp_buf, PAGE_SIZE, " %s:", state->name);
|
||||
if (isdigit(state->name[strlen(state->name)-1]))
|
||||
@ -260,7 +259,8 @@ static const struct attribute_group *part_attr_groups[] = {
|
||||
|
||||
static void part_release(struct device *dev)
|
||||
{
|
||||
blk_free_devt(dev->devt);
|
||||
if (MAJOR(dev->devt) == BLOCK_EXT_MAJOR)
|
||||
blk_free_ext_minor(MINOR(dev->devt));
|
||||
bdput(dev_to_bdev(dev));
|
||||
}
|
||||
|
||||
@ -282,7 +282,7 @@ struct device_type part_type = {
|
||||
};
|
||||
|
||||
/*
|
||||
* Must be called either with bd_mutex held, before a disk can be opened or
|
||||
* Must be called either with open_mutex held, before a disk can be opened or
|
||||
* after all disk users are gone.
|
||||
*/
|
||||
static void delete_partition(struct block_device *part)
|
||||
@ -311,7 +311,7 @@ static ssize_t whole_disk_show(struct device *dev,
|
||||
static DEVICE_ATTR(whole_disk, 0444, whole_disk_show, NULL);
|
||||
|
||||
/*
|
||||
* Must be called either with bd_mutex held, before a disk can be opened or
|
||||
* Must be called either with open_mutex held, before a disk can be opened or
|
||||
* after all disk users are gone.
|
||||
*/
|
||||
static struct block_device *add_partition(struct gendisk *disk, int partno,
|
||||
@ -325,10 +325,8 @@ static struct block_device *add_partition(struct gendisk *disk, int partno,
|
||||
const char *dname;
|
||||
int err;
|
||||
|
||||
/*
|
||||
* disk_max_parts() won't be zero, either GENHD_FL_EXT_DEVT is set
|
||||
* or 'minors' is passed to alloc_disk().
|
||||
*/
|
||||
lockdep_assert_held(&disk->open_mutex);
|
||||
|
||||
if (partno >= disk_max_parts(disk))
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
@ -379,9 +377,15 @@ static struct block_device *add_partition(struct gendisk *disk, int partno,
|
||||
pdev->type = &part_type;
|
||||
pdev->parent = ddev;
|
||||
|
||||
err = blk_alloc_devt(bdev, &devt);
|
||||
if (err)
|
||||
goto out_put;
|
||||
/* in consecutive minor range? */
|
||||
if (bdev->bd_partno < disk->minors) {
|
||||
devt = MKDEV(disk->major, disk->first_minor + bdev->bd_partno);
|
||||
} else {
|
||||
err = blk_alloc_ext_minor();
|
||||
if (err < 0)
|
||||
goto out_put;
|
||||
devt = MKDEV(BLOCK_EXT_MAJOR, err);
|
||||
}
|
||||
pdev->devt = devt;
|
||||
|
||||
/* delay uevent until 'holders' subdir is created */
|
||||
@ -450,29 +454,27 @@ int bdev_add_partition(struct block_device *bdev, int partno,
|
||||
{
|
||||
struct block_device *part;
|
||||
|
||||
mutex_lock(&bdev->bd_mutex);
|
||||
mutex_lock(&bdev->bd_disk->open_mutex);
|
||||
if (partition_overlaps(bdev->bd_disk, start, length, -1)) {
|
||||
mutex_unlock(&bdev->bd_mutex);
|
||||
mutex_unlock(&bdev->bd_disk->open_mutex);
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
part = add_partition(bdev->bd_disk, partno, start, length,
|
||||
ADDPART_FLAG_NONE, NULL);
|
||||
mutex_unlock(&bdev->bd_mutex);
|
||||
mutex_unlock(&bdev->bd_disk->open_mutex);
|
||||
return PTR_ERR_OR_ZERO(part);
|
||||
}
|
||||
|
||||
int bdev_del_partition(struct block_device *bdev, int partno)
|
||||
{
|
||||
struct block_device *part;
|
||||
int ret;
|
||||
struct block_device *part = NULL;
|
||||
int ret = -ENXIO;
|
||||
|
||||
part = bdget_disk(bdev->bd_disk, partno);
|
||||
mutex_lock(&bdev->bd_disk->open_mutex);
|
||||
part = xa_load(&bdev->bd_disk->part_tbl, partno);
|
||||
if (!part)
|
||||
return -ENXIO;
|
||||
|
||||
mutex_lock(&part->bd_mutex);
|
||||
mutex_lock_nested(&bdev->bd_mutex, 1);
|
||||
goto out_unlock;
|
||||
|
||||
ret = -EBUSY;
|
||||
if (part->bd_openers)
|
||||
@ -481,24 +483,21 @@ int bdev_del_partition(struct block_device *bdev, int partno)
|
||||
delete_partition(part);
|
||||
ret = 0;
|
||||
out_unlock:
|
||||
mutex_unlock(&bdev->bd_mutex);
|
||||
mutex_unlock(&part->bd_mutex);
|
||||
bdput(part);
|
||||
mutex_unlock(&bdev->bd_disk->open_mutex);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bdev_resize_partition(struct block_device *bdev, int partno,
|
||||
sector_t start, sector_t length)
|
||||
{
|
||||
struct block_device *part;
|
||||
int ret = 0;
|
||||
struct block_device *part = NULL;
|
||||
int ret = -ENXIO;
|
||||
|
||||
part = bdget_disk(bdev->bd_disk, partno);
|
||||
mutex_lock(&bdev->bd_disk->open_mutex);
|
||||
part = xa_load(&bdev->bd_disk->part_tbl, partno);
|
||||
if (!part)
|
||||
return -ENXIO;
|
||||
goto out_unlock;
|
||||
|
||||
mutex_lock(&part->bd_mutex);
|
||||
mutex_lock_nested(&bdev->bd_mutex, 1);
|
||||
ret = -EINVAL;
|
||||
if (start != part->bd_start_sect)
|
||||
goto out_unlock;
|
||||
@ -511,9 +510,7 @@ int bdev_resize_partition(struct block_device *bdev, int partno,
|
||||
|
||||
ret = 0;
|
||||
out_unlock:
|
||||
mutex_unlock(&part->bd_mutex);
|
||||
mutex_unlock(&bdev->bd_mutex);
|
||||
bdput(part);
|
||||
mutex_unlock(&bdev->bd_disk->open_mutex);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -538,7 +535,7 @@ void blk_drop_partitions(struct gendisk *disk)
|
||||
struct block_device *part;
|
||||
unsigned long idx;
|
||||
|
||||
lockdep_assert_held(&disk->part0->bd_mutex);
|
||||
lockdep_assert_held(&disk->open_mutex);
|
||||
|
||||
xa_for_each_start(&disk->part_tbl, idx, part, 1) {
|
||||
if (!bdgrab(part))
|
||||
@ -548,7 +545,7 @@ void blk_drop_partitions(struct gendisk *disk)
|
||||
}
|
||||
}
|
||||
|
||||
static bool blk_add_partition(struct gendisk *disk, struct block_device *bdev,
|
||||
static bool blk_add_partition(struct gendisk *disk,
|
||||
struct parsed_partitions *state, int p)
|
||||
{
|
||||
sector_t size = state->parts[p].size;
|
||||
@ -598,7 +595,7 @@ static bool blk_add_partition(struct gendisk *disk, struct block_device *bdev,
|
||||
return true;
|
||||
}
|
||||
|
||||
int blk_add_partitions(struct gendisk *disk, struct block_device *bdev)
|
||||
static int blk_add_partitions(struct gendisk *disk)
|
||||
{
|
||||
struct parsed_partitions *state;
|
||||
int ret = -EAGAIN, p;
|
||||
@ -606,7 +603,7 @@ int blk_add_partitions(struct gendisk *disk, struct block_device *bdev)
|
||||
if (!disk_part_scan_enabled(disk))
|
||||
return 0;
|
||||
|
||||
state = check_partition(disk, bdev);
|
||||
state = check_partition(disk);
|
||||
if (!state)
|
||||
return 0;
|
||||
if (IS_ERR(state)) {
|
||||
@ -650,7 +647,7 @@ int blk_add_partitions(struct gendisk *disk, struct block_device *bdev)
|
||||
kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE);
|
||||
|
||||
for (p = 1; p < state->limit; p++)
|
||||
if (!blk_add_partition(disk, bdev, state, p))
|
||||
if (!blk_add_partition(disk, state, p))
|
||||
goto out_free_state;
|
||||
|
||||
ret = 0;
|
||||
@ -659,6 +656,58 @@ out_free_state:
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bdev_disk_changed(struct gendisk *disk, bool invalidate)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
lockdep_assert_held(&disk->open_mutex);
|
||||
|
||||
if (!(disk->flags & GENHD_FL_UP))
|
||||
return -ENXIO;
|
||||
|
||||
rescan:
|
||||
if (disk->open_partitions)
|
||||
return -EBUSY;
|
||||
sync_blockdev(disk->part0);
|
||||
invalidate_bdev(disk->part0);
|
||||
blk_drop_partitions(disk);
|
||||
|
||||
clear_bit(GD_NEED_PART_SCAN, &disk->state);
|
||||
|
||||
/*
|
||||
* Historically we only set the capacity to zero for devices that
|
||||
* support partitions (independ of actually having partitions created).
|
||||
* Doing that is rather inconsistent, but changing it broke legacy
|
||||
* udisks polling for legacy ide-cdrom devices. Use the crude check
|
||||
* below to get the sane behavior for most device while not breaking
|
||||
* userspace for this particular setup.
|
||||
*/
|
||||
if (invalidate) {
|
||||
if (disk_part_scan_enabled(disk) ||
|
||||
!(disk->flags & GENHD_FL_REMOVABLE))
|
||||
set_capacity(disk, 0);
|
||||
}
|
||||
|
||||
if (get_capacity(disk)) {
|
||||
ret = blk_add_partitions(disk);
|
||||
if (ret == -EAGAIN)
|
||||
goto rescan;
|
||||
} else if (invalidate) {
|
||||
/*
|
||||
* Tell userspace that the media / partition table may have
|
||||
* changed.
|
||||
*/
|
||||
kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
/*
|
||||
* Only exported for loop and dasd for historic reasons. Don't use in new
|
||||
* code!
|
||||
*/
|
||||
EXPORT_SYMBOL_GPL(bdev_disk_changed);
|
||||
|
||||
void *read_part_sector(struct parsed_partitions *state, sector_t n, Sector *p)
|
||||
{
|
||||
struct address_space *mapping = state->bdev->bd_inode->i_mapping;
|
||||
|
@ -622,7 +622,7 @@ int msdos_partition(struct parsed_partitions *state)
|
||||
for (slot = 1; slot <= 4; slot++, p++) {
|
||||
if (p->boot_ind != 0 && p->boot_ind != 0x80) {
|
||||
/*
|
||||
* Even without a valid boot inidicator value
|
||||
* Even without a valid boot indicator value
|
||||
* its still possible this is valid FAT filesystem
|
||||
* without a partition table.
|
||||
*/
|
||||
|
@ -1781,15 +1781,13 @@ static int fd_alloc_disk(int drive, int system)
|
||||
{
|
||||
struct gendisk *disk;
|
||||
|
||||
disk = alloc_disk(1);
|
||||
if (!disk)
|
||||
goto out;
|
||||
disk->queue = blk_mq_init_queue(&unit[drive].tag_set);
|
||||
if (IS_ERR(disk->queue))
|
||||
goto out_put_disk;
|
||||
disk = blk_mq_alloc_disk(&unit[drive].tag_set, NULL);
|
||||
if (IS_ERR(disk))
|
||||
return PTR_ERR(disk);
|
||||
|
||||
disk->major = FLOPPY_MAJOR;
|
||||
disk->first_minor = drive + system;
|
||||
disk->minors = 1;
|
||||
disk->fops = &floppy_fops;
|
||||
disk->events = DISK_EVENT_MEDIA_CHANGE;
|
||||
if (system)
|
||||
@ -1802,12 +1800,6 @@ static int fd_alloc_disk(int drive, int system)
|
||||
unit[drive].gendisk[system] = disk;
|
||||
add_disk(disk);
|
||||
return 0;
|
||||
|
||||
out_put_disk:
|
||||
disk->queue = NULL;
|
||||
put_disk(disk);
|
||||
out:
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
static int fd_alloc_drive(int drive)
|
||||
|
@ -338,14 +338,13 @@ static const struct blk_mq_ops aoeblk_mq_ops = {
|
||||
.queue_rq = aoeblk_queue_rq,
|
||||
};
|
||||
|
||||
/* alloc_disk and add_disk can sleep */
|
||||
/* blk_mq_alloc_disk and add_disk can sleep */
|
||||
void
|
||||
aoeblk_gdalloc(void *vp)
|
||||
{
|
||||
struct aoedev *d = vp;
|
||||
struct gendisk *gd;
|
||||
mempool_t *mp;
|
||||
struct request_queue *q;
|
||||
struct blk_mq_tag_set *set;
|
||||
ulong flags;
|
||||
int late = 0;
|
||||
@ -362,19 +361,12 @@ aoeblk_gdalloc(void *vp)
|
||||
if (late)
|
||||
return;
|
||||
|
||||
gd = alloc_disk(AOE_PARTITIONS);
|
||||
if (gd == NULL) {
|
||||
pr_err("aoe: cannot allocate disk structure for %ld.%d\n",
|
||||
d->aoemajor, d->aoeminor);
|
||||
goto err;
|
||||
}
|
||||
|
||||
mp = mempool_create(MIN_BUFS, mempool_alloc_slab, mempool_free_slab,
|
||||
buf_pool_cache);
|
||||
if (mp == NULL) {
|
||||
printk(KERN_ERR "aoe: cannot allocate bufpool for %ld.%d\n",
|
||||
d->aoemajor, d->aoeminor);
|
||||
goto err_disk;
|
||||
goto err;
|
||||
}
|
||||
|
||||
set = &d->tag_set;
|
||||
@ -391,12 +383,11 @@ aoeblk_gdalloc(void *vp)
|
||||
goto err_mempool;
|
||||
}
|
||||
|
||||
q = blk_mq_init_queue(set);
|
||||
if (IS_ERR(q)) {
|
||||
gd = blk_mq_alloc_disk(set, d);
|
||||
if (IS_ERR(gd)) {
|
||||
pr_err("aoe: cannot allocate block queue for %ld.%d\n",
|
||||
d->aoemajor, d->aoeminor);
|
||||
blk_mq_free_tag_set(set);
|
||||
goto err_mempool;
|
||||
goto err_tagset;
|
||||
}
|
||||
|
||||
spin_lock_irqsave(&d->lock, flags);
|
||||
@ -405,16 +396,16 @@ aoeblk_gdalloc(void *vp)
|
||||
WARN_ON(d->flags & DEVFL_TKILL);
|
||||
WARN_ON(d->gd);
|
||||
WARN_ON(d->flags & DEVFL_UP);
|
||||
blk_queue_max_hw_sectors(q, BLK_DEF_MAX_SECTORS);
|
||||
blk_queue_io_opt(q, SZ_2M);
|
||||
blk_queue_max_hw_sectors(gd->queue, BLK_DEF_MAX_SECTORS);
|
||||
blk_queue_io_opt(gd->queue, SZ_2M);
|
||||
d->bufpool = mp;
|
||||
d->blkq = gd->queue = q;
|
||||
q->queuedata = d;
|
||||
d->blkq = gd->queue;
|
||||
d->gd = gd;
|
||||
if (aoe_maxsectors)
|
||||
blk_queue_max_hw_sectors(q, aoe_maxsectors);
|
||||
blk_queue_max_hw_sectors(gd->queue, aoe_maxsectors);
|
||||
gd->major = AOE_MAJOR;
|
||||
gd->first_minor = d->sysminor;
|
||||
gd->minors = AOE_PARTITIONS;
|
||||
gd->fops = &aoe_bdops;
|
||||
gd->private_data = d;
|
||||
set_capacity(gd, d->ssize);
|
||||
@ -435,10 +426,10 @@ aoeblk_gdalloc(void *vp)
|
||||
spin_unlock_irqrestore(&d->lock, flags);
|
||||
return;
|
||||
|
||||
err_tagset:
|
||||
blk_mq_free_tag_set(set);
|
||||
err_mempool:
|
||||
mempool_destroy(mp);
|
||||
err_disk:
|
||||
put_disk(gd);
|
||||
err:
|
||||
spin_lock_irqsave(&d->lock, flags);
|
||||
d->flags &= ~DEVFL_GD_NOW;
|
||||
|
@ -1701,8 +1701,6 @@ aoecmd_init(void)
|
||||
goto ktiowq_fail;
|
||||
}
|
||||
|
||||
mutex_init(&ktio_spawn_lock);
|
||||
|
||||
for (i = 0; i < ncpus; i++) {
|
||||
INIT_LIST_HEAD(&iocq[i].head);
|
||||
spin_lock_init(&iocq[i].lock);
|
||||
|
@ -277,9 +277,8 @@ freedev(struct aoedev *d)
|
||||
if (d->gd) {
|
||||
aoedisk_rm_debugfs(d);
|
||||
del_gendisk(d->gd);
|
||||
put_disk(d->gd);
|
||||
blk_cleanup_disk(d->gd);
|
||||
blk_mq_free_tag_set(&d->tag_set);
|
||||
blk_cleanup_queue(d->blkq);
|
||||
}
|
||||
t = d->targets;
|
||||
e = t + d->ntargets;
|
||||
|
@ -1968,22 +1968,14 @@ static const struct blk_mq_ops ataflop_mq_ops = {
|
||||
static int ataflop_alloc_disk(unsigned int drive, unsigned int type)
|
||||
{
|
||||
struct gendisk *disk;
|
||||
int ret;
|
||||
|
||||
disk = alloc_disk(1);
|
||||
if (!disk)
|
||||
return -ENOMEM;
|
||||
|
||||
disk->queue = blk_mq_init_queue(&unit[drive].tag_set);
|
||||
if (IS_ERR(disk->queue)) {
|
||||
ret = PTR_ERR(disk->queue);
|
||||
disk->queue = NULL;
|
||||
put_disk(disk);
|
||||
return ret;
|
||||
}
|
||||
disk = blk_mq_alloc_disk(&unit[drive].tag_set, NULL);
|
||||
if (IS_ERR(disk))
|
||||
return PTR_ERR(disk);
|
||||
|
||||
disk->major = FLOPPY_MAJOR;
|
||||
disk->first_minor = drive + (type << 2);
|
||||
disk->minors = 1;
|
||||
sprintf(disk->disk_name, "fd%d", drive);
|
||||
disk->fops = &floppy_fops;
|
||||
disk->events = DISK_EVENT_MEDIA_CHANGE;
|
||||
|
@ -38,9 +38,7 @@
|
||||
* device).
|
||||
*/
|
||||
struct brd_device {
|
||||
int brd_number;
|
||||
|
||||
struct request_queue *brd_queue;
|
||||
int brd_number;
|
||||
struct gendisk *brd_disk;
|
||||
struct list_head brd_list;
|
||||
|
||||
@ -372,7 +370,7 @@ static LIST_HEAD(brd_devices);
|
||||
static DEFINE_MUTEX(brd_devices_mutex);
|
||||
static struct dentry *brd_debugfs_dir;
|
||||
|
||||
static struct brd_device *brd_alloc(int i)
|
||||
static int brd_alloc(int i)
|
||||
{
|
||||
struct brd_device *brd;
|
||||
struct gendisk *disk;
|
||||
@ -380,64 +378,55 @@ static struct brd_device *brd_alloc(int i)
|
||||
|
||||
brd = kzalloc(sizeof(*brd), GFP_KERNEL);
|
||||
if (!brd)
|
||||
goto out;
|
||||
return -ENOMEM;
|
||||
brd->brd_number = i;
|
||||
spin_lock_init(&brd->brd_lock);
|
||||
INIT_RADIX_TREE(&brd->brd_pages, GFP_ATOMIC);
|
||||
|
||||
brd->brd_queue = blk_alloc_queue(NUMA_NO_NODE);
|
||||
if (!brd->brd_queue)
|
||||
goto out_free_dev;
|
||||
|
||||
snprintf(buf, DISK_NAME_LEN, "ram%d", i);
|
||||
if (!IS_ERR_OR_NULL(brd_debugfs_dir))
|
||||
debugfs_create_u64(buf, 0444, brd_debugfs_dir,
|
||||
&brd->brd_nr_pages);
|
||||
|
||||
/* This is so fdisk will align partitions on 4k, because of
|
||||
* direct_access API needing 4k alignment, returning a PFN
|
||||
* (This is only a problem on very small devices <= 4M,
|
||||
* otherwise fdisk will align on 1M. Regardless this call
|
||||
* is harmless)
|
||||
*/
|
||||
blk_queue_physical_block_size(brd->brd_queue, PAGE_SIZE);
|
||||
disk = brd->brd_disk = alloc_disk(max_part);
|
||||
disk = brd->brd_disk = blk_alloc_disk(NUMA_NO_NODE);
|
||||
if (!disk)
|
||||
goto out_free_queue;
|
||||
goto out_free_dev;
|
||||
|
||||
disk->major = RAMDISK_MAJOR;
|
||||
disk->first_minor = i * max_part;
|
||||
disk->minors = max_part;
|
||||
disk->fops = &brd_fops;
|
||||
disk->private_data = brd;
|
||||
disk->flags = GENHD_FL_EXT_DEVT;
|
||||
strlcpy(disk->disk_name, buf, DISK_NAME_LEN);
|
||||
set_capacity(disk, rd_size * 2);
|
||||
|
||||
/*
|
||||
* This is so fdisk will align partitions on 4k, because of
|
||||
* direct_access API needing 4k alignment, returning a PFN
|
||||
* (This is only a problem on very small devices <= 4M,
|
||||
* otherwise fdisk will align on 1M. Regardless this call
|
||||
* is harmless)
|
||||
*/
|
||||
blk_queue_physical_block_size(disk->queue, PAGE_SIZE);
|
||||
|
||||
/* Tell the block layer that this is not a rotational device */
|
||||
blk_queue_flag_set(QUEUE_FLAG_NONROT, brd->brd_queue);
|
||||
blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, brd->brd_queue);
|
||||
blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue);
|
||||
blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, disk->queue);
|
||||
add_disk(disk);
|
||||
list_add_tail(&brd->brd_list, &brd_devices);
|
||||
|
||||
return brd;
|
||||
return 0;
|
||||
|
||||
out_free_queue:
|
||||
blk_cleanup_queue(brd->brd_queue);
|
||||
out_free_dev:
|
||||
kfree(brd);
|
||||
out:
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void brd_free(struct brd_device *brd)
|
||||
{
|
||||
put_disk(brd->brd_disk);
|
||||
blk_cleanup_queue(brd->brd_queue);
|
||||
brd_free_pages(brd);
|
||||
kfree(brd);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
static void brd_probe(dev_t dev)
|
||||
{
|
||||
struct brd_device *brd;
|
||||
int i = MINOR(dev) / max_part;
|
||||
struct brd_device *brd;
|
||||
|
||||
mutex_lock(&brd_devices_mutex);
|
||||
list_for_each_entry(brd, &brd_devices, brd_list) {
|
||||
@ -445,13 +434,7 @@ static void brd_probe(dev_t dev)
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
brd = brd_alloc(i);
|
||||
if (brd) {
|
||||
brd->brd_disk->queue = brd->brd_queue;
|
||||
add_disk(brd->brd_disk);
|
||||
list_add_tail(&brd->brd_list, &brd_devices);
|
||||
}
|
||||
|
||||
brd_alloc(i);
|
||||
out_unlock:
|
||||
mutex_unlock(&brd_devices_mutex);
|
||||
}
|
||||
@ -460,7 +443,9 @@ static void brd_del_one(struct brd_device *brd)
|
||||
{
|
||||
list_del(&brd->brd_list);
|
||||
del_gendisk(brd->brd_disk);
|
||||
brd_free(brd);
|
||||
blk_cleanup_disk(brd->brd_disk);
|
||||
brd_free_pages(brd);
|
||||
kfree(brd);
|
||||
}
|
||||
|
||||
static inline void brd_check_and_reset_par(void)
|
||||
@ -485,7 +470,7 @@ static inline void brd_check_and_reset_par(void)
|
||||
static int __init brd_init(void)
|
||||
{
|
||||
struct brd_device *brd, *next;
|
||||
int i;
|
||||
int err, i;
|
||||
|
||||
/*
|
||||
* brd module now has a feature to instantiate underlying device
|
||||
@ -511,22 +496,11 @@ static int __init brd_init(void)
|
||||
|
||||
mutex_lock(&brd_devices_mutex);
|
||||
for (i = 0; i < rd_nr; i++) {
|
||||
brd = brd_alloc(i);
|
||||
if (!brd)
|
||||
err = brd_alloc(i);
|
||||
if (err)
|
||||
goto out_free;
|
||||
list_add_tail(&brd->brd_list, &brd_devices);
|
||||
}
|
||||
|
||||
/* point of no return */
|
||||
|
||||
list_for_each_entry(brd, &brd_devices, brd_list) {
|
||||
/*
|
||||
* associate with queue just before adding disk for
|
||||
* avoiding to mess up failure path
|
||||
*/
|
||||
brd->brd_disk->queue = brd->brd_queue;
|
||||
add_disk(brd->brd_disk);
|
||||
}
|
||||
mutex_unlock(&brd_devices_mutex);
|
||||
|
||||
pr_info("brd: module loaded\n");
|
||||
@ -535,15 +509,13 @@ static int __init brd_init(void)
|
||||
out_free:
|
||||
debugfs_remove_recursive(brd_debugfs_dir);
|
||||
|
||||
list_for_each_entry_safe(brd, next, &brd_devices, brd_list) {
|
||||
list_del(&brd->brd_list);
|
||||
brd_free(brd);
|
||||
}
|
||||
list_for_each_entry_safe(brd, next, &brd_devices, brd_list)
|
||||
brd_del_one(brd);
|
||||
mutex_unlock(&brd_devices_mutex);
|
||||
unregister_blkdev(RAMDISK_MAJOR, "ramdisk");
|
||||
|
||||
pr_info("brd: module NOT loaded !!!\n");
|
||||
return -ENOMEM;
|
||||
return err;
|
||||
}
|
||||
|
||||
static void __exit brd_exit(void)
|
||||
|
@ -2231,8 +2231,7 @@ void drbd_destroy_device(struct kref *kref)
|
||||
if (device->bitmap) /* should no longer be there. */
|
||||
drbd_bm_cleanup(device);
|
||||
__free_page(device->md_io.page);
|
||||
put_disk(device->vdisk);
|
||||
blk_cleanup_queue(device->rq_queue);
|
||||
blk_cleanup_disk(device->vdisk);
|
||||
kfree(device->rs_plan_s);
|
||||
|
||||
/* not for_each_connection(connection, resource):
|
||||
@ -2701,7 +2700,6 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
|
||||
struct drbd_device *device;
|
||||
struct drbd_peer_device *peer_device, *tmp_peer_device;
|
||||
struct gendisk *disk;
|
||||
struct request_queue *q;
|
||||
int id;
|
||||
int vnr = adm_ctx->volume;
|
||||
enum drbd_ret_code err = ERR_NOMEM;
|
||||
@ -2723,29 +2721,26 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
|
||||
|
||||
drbd_init_set_defaults(device);
|
||||
|
||||
q = blk_alloc_queue(NUMA_NO_NODE);
|
||||
if (!q)
|
||||
goto out_no_q;
|
||||
device->rq_queue = q;
|
||||
|
||||
disk = alloc_disk(1);
|
||||
disk = blk_alloc_disk(NUMA_NO_NODE);
|
||||
if (!disk)
|
||||
goto out_no_disk;
|
||||
|
||||
device->vdisk = disk;
|
||||
device->rq_queue = disk->queue;
|
||||
|
||||
set_disk_ro(disk, true);
|
||||
|
||||
disk->queue = q;
|
||||
disk->major = DRBD_MAJOR;
|
||||
disk->first_minor = minor;
|
||||
disk->minors = 1;
|
||||
disk->fops = &drbd_ops;
|
||||
sprintf(disk->disk_name, "drbd%d", minor);
|
||||
disk->private_data = device;
|
||||
|
||||
blk_queue_write_cache(q, true, true);
|
||||
blk_queue_write_cache(disk->queue, true, true);
|
||||
/* Setting the max_hw_sectors to an odd value of 8kibyte here
|
||||
This triggers a max_bio_size message upon first attach or connect */
|
||||
blk_queue_max_hw_sectors(q, DRBD_MAX_BIO_SIZE_SAFE >> 8);
|
||||
blk_queue_max_hw_sectors(disk->queue, DRBD_MAX_BIO_SIZE_SAFE >> 8);
|
||||
|
||||
device->md_io.page = alloc_page(GFP_KERNEL);
|
||||
if (!device->md_io.page)
|
||||
@ -2834,10 +2829,8 @@ out_no_minor_idr:
|
||||
out_no_bitmap:
|
||||
__free_page(device->md_io.page);
|
||||
out_no_io_page:
|
||||
put_disk(disk);
|
||||
blk_cleanup_disk(disk);
|
||||
out_no_disk:
|
||||
blk_cleanup_queue(q);
|
||||
out_no_q:
|
||||
kref_put(&resource->kref, drbd_destroy_resource);
|
||||
kfree(device);
|
||||
return err;
|
||||
|
@ -4491,23 +4491,15 @@ static bool floppy_available(int drive)
|
||||
static int floppy_alloc_disk(unsigned int drive, unsigned int type)
|
||||
{
|
||||
struct gendisk *disk;
|
||||
int err;
|
||||
|
||||
disk = alloc_disk(1);
|
||||
if (!disk)
|
||||
return -ENOMEM;
|
||||
|
||||
disk->queue = blk_mq_init_queue(&tag_sets[drive]);
|
||||
if (IS_ERR(disk->queue)) {
|
||||
err = PTR_ERR(disk->queue);
|
||||
disk->queue = NULL;
|
||||
put_disk(disk);
|
||||
return err;
|
||||
}
|
||||
disk = blk_mq_alloc_disk(&tag_sets[drive], NULL);
|
||||
if (IS_ERR(disk))
|
||||
return PTR_ERR(disk);
|
||||
|
||||
blk_queue_max_hw_sectors(disk->queue, 64);
|
||||
disk->major = FLOPPY_MAJOR;
|
||||
disk->first_minor = TOMINOR(drive) | (type << 2);
|
||||
disk->minors = 1;
|
||||
disk->fops = &floppy_fops;
|
||||
disk->events = DISK_EVENT_MEDIA_CHANGE;
|
||||
if (type)
|
||||
@ -4727,10 +4719,8 @@ out_put_disk:
|
||||
if (!disks[drive][0])
|
||||
break;
|
||||
del_timer_sync(&motor_off_timer[drive]);
|
||||
blk_cleanup_queue(disks[drive][0]->queue);
|
||||
disks[drive][0]->queue = NULL;
|
||||
blk_cleanup_disk(disks[drive][0]);
|
||||
blk_mq_free_tag_set(&tag_sets[drive]);
|
||||
put_disk(disks[drive][0]);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
@ -644,14 +644,13 @@ static inline void loop_update_dio(struct loop_device *lo)
|
||||
lo->use_dio);
|
||||
}
|
||||
|
||||
static void loop_reread_partitions(struct loop_device *lo,
|
||||
struct block_device *bdev)
|
||||
static void loop_reread_partitions(struct loop_device *lo)
|
||||
{
|
||||
int rc;
|
||||
|
||||
mutex_lock(&bdev->bd_mutex);
|
||||
rc = bdev_disk_changed(bdev, false);
|
||||
mutex_unlock(&bdev->bd_mutex);
|
||||
mutex_lock(&lo->lo_disk->open_mutex);
|
||||
rc = bdev_disk_changed(lo->lo_disk, false);
|
||||
mutex_unlock(&lo->lo_disk->open_mutex);
|
||||
if (rc)
|
||||
pr_warn("%s: partition scan of loop%d (%s) failed (rc=%d)\n",
|
||||
__func__, lo->lo_number, lo->lo_file_name, rc);
|
||||
@ -744,12 +743,12 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
|
||||
mutex_unlock(&lo->lo_mutex);
|
||||
/*
|
||||
* We must drop file reference outside of lo_mutex as dropping
|
||||
* the file ref can take bd_mutex which creates circular locking
|
||||
* the file ref can take open_mutex which creates circular locking
|
||||
* dependency.
|
||||
*/
|
||||
fput(old_file);
|
||||
if (partscan)
|
||||
loop_reread_partitions(lo, bdev);
|
||||
loop_reread_partitions(lo);
|
||||
return 0;
|
||||
|
||||
out_err:
|
||||
@ -1255,7 +1254,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
|
||||
bdgrab(bdev);
|
||||
mutex_unlock(&lo->lo_mutex);
|
||||
if (partscan)
|
||||
loop_reread_partitions(lo, bdev);
|
||||
loop_reread_partitions(lo);
|
||||
if (!(mode & FMODE_EXCL))
|
||||
bd_abort_claiming(bdev, loop_configure);
|
||||
return 0;
|
||||
@ -1353,7 +1352,7 @@ out_unlock:
|
||||
mutex_unlock(&lo->lo_mutex);
|
||||
if (partscan) {
|
||||
/*
|
||||
* bd_mutex has been held already in release path, so don't
|
||||
* open_mutex has been held already in release path, so don't
|
||||
* acquire it if this function is called in such case.
|
||||
*
|
||||
* If the reread partition isn't from release path, lo_refcnt
|
||||
@ -1361,10 +1360,10 @@ out_unlock:
|
||||
* current holder is released.
|
||||
*/
|
||||
if (!release)
|
||||
mutex_lock(&bdev->bd_mutex);
|
||||
err = bdev_disk_changed(bdev, false);
|
||||
mutex_lock(&lo->lo_disk->open_mutex);
|
||||
err = bdev_disk_changed(lo->lo_disk, false);
|
||||
if (!release)
|
||||
mutex_unlock(&bdev->bd_mutex);
|
||||
mutex_unlock(&lo->lo_disk->open_mutex);
|
||||
if (err)
|
||||
pr_warn("%s: partition scan of loop%d failed (rc=%d)\n",
|
||||
__func__, lo_number, err);
|
||||
@ -1391,7 +1390,7 @@ out_unlock:
|
||||
/*
|
||||
* Need not hold lo_mutex to fput backing file. Calling fput holding
|
||||
* lo_mutex triggers a circular lock dependency possibility warning as
|
||||
* fput can take bd_mutex which is usually taken before lo_mutex.
|
||||
* fput can take open_mutex which is usually taken before lo_mutex.
|
||||
*/
|
||||
if (filp)
|
||||
fput(filp);
|
||||
@ -1509,7 +1508,7 @@ out_unfreeze:
|
||||
out_unlock:
|
||||
mutex_unlock(&lo->lo_mutex);
|
||||
if (partscan)
|
||||
loop_reread_partitions(lo, bdev);
|
||||
loop_reread_partitions(lo);
|
||||
|
||||
return err;
|
||||
}
|
||||
@ -2275,12 +2274,12 @@ static int loop_add(struct loop_device **l, int i)
|
||||
if (err)
|
||||
goto out_free_idr;
|
||||
|
||||
lo->lo_queue = blk_mq_init_queue(&lo->tag_set);
|
||||
if (IS_ERR(lo->lo_queue)) {
|
||||
err = PTR_ERR(lo->lo_queue);
|
||||
disk = lo->lo_disk = blk_mq_alloc_disk(&lo->tag_set, lo);
|
||||
if (IS_ERR(disk)) {
|
||||
err = PTR_ERR(disk);
|
||||
goto out_cleanup_tags;
|
||||
}
|
||||
lo->lo_queue->queuedata = lo;
|
||||
lo->lo_queue = lo->lo_disk->queue;
|
||||
|
||||
blk_queue_max_hw_sectors(lo->lo_queue, BLK_DEF_MAX_SECTORS);
|
||||
|
||||
@ -2292,11 +2291,6 @@ static int loop_add(struct loop_device **l, int i)
|
||||
*/
|
||||
blk_queue_flag_set(QUEUE_FLAG_NOMERGES, lo->lo_queue);
|
||||
|
||||
err = -ENOMEM;
|
||||
disk = lo->lo_disk = alloc_disk(1 << part_shift);
|
||||
if (!disk)
|
||||
goto out_free_queue;
|
||||
|
||||
/*
|
||||
* Disable partition scanning by default. The in-kernel partition
|
||||
* scanning can be requested individually per-device during its
|
||||
@ -2325,6 +2319,7 @@ static int loop_add(struct loop_device **l, int i)
|
||||
spin_lock_init(&lo->lo_work_lock);
|
||||
disk->major = LOOP_MAJOR;
|
||||
disk->first_minor = i << part_shift;
|
||||
disk->minors = 1 << part_shift;
|
||||
disk->fops = &lo_fops;
|
||||
disk->private_data = lo;
|
||||
disk->queue = lo->lo_queue;
|
||||
@ -2333,8 +2328,6 @@ static int loop_add(struct loop_device **l, int i)
|
||||
*l = lo;
|
||||
return lo->lo_number;
|
||||
|
||||
out_free_queue:
|
||||
blk_cleanup_queue(lo->lo_queue);
|
||||
out_cleanup_tags:
|
||||
blk_mq_free_tag_set(&lo->tag_set);
|
||||
out_free_idr:
|
||||
@ -2348,9 +2341,8 @@ out:
|
||||
static void loop_remove(struct loop_device *lo)
|
||||
{
|
||||
del_gendisk(lo->lo_disk);
|
||||
blk_cleanup_queue(lo->lo_queue);
|
||||
blk_cleanup_disk(lo->lo_disk);
|
||||
blk_mq_free_tag_set(&lo->tag_set);
|
||||
put_disk(lo->lo_disk);
|
||||
mutex_destroy(&lo->lo_mutex);
|
||||
kfree(lo);
|
||||
}
|
||||
|
@ -132,16 +132,12 @@ static int __init n64cart_probe(struct platform_device *pdev)
|
||||
if (!reg_base)
|
||||
return -EINVAL;
|
||||
|
||||
disk = alloc_disk(0);
|
||||
disk = blk_alloc_disk(NUMA_NO_NODE);
|
||||
if (!disk)
|
||||
return -ENOMEM;
|
||||
|
||||
disk->queue = blk_alloc_queue(NUMA_NO_NODE);
|
||||
if (!disk->queue)
|
||||
return -ENOMEM;
|
||||
|
||||
disk->first_minor = 0;
|
||||
disk->flags = GENHD_FL_NO_PART_SCAN | GENHD_FL_EXT_DEVT;
|
||||
disk->flags = GENHD_FL_NO_PART_SCAN;
|
||||
disk->fops = &n64cart_fops;
|
||||
disk->private_data = &pdev->dev;
|
||||
strcpy(disk->disk_name, "n64cart");
|
||||
|
@ -219,15 +219,11 @@ static const struct device_attribute pid_attr = {
|
||||
static void nbd_dev_remove(struct nbd_device *nbd)
|
||||
{
|
||||
struct gendisk *disk = nbd->disk;
|
||||
struct request_queue *q;
|
||||
|
||||
if (disk) {
|
||||
q = disk->queue;
|
||||
del_gendisk(disk);
|
||||
blk_cleanup_queue(q);
|
||||
blk_mq_free_tag_set(&nbd->tag_set);
|
||||
disk->private_data = NULL;
|
||||
put_disk(disk);
|
||||
blk_cleanup_disk(disk);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1646,15 +1642,24 @@ static int nbd_dev_add(int index)
|
||||
{
|
||||
struct nbd_device *nbd;
|
||||
struct gendisk *disk;
|
||||
struct request_queue *q;
|
||||
int err = -ENOMEM;
|
||||
|
||||
nbd = kzalloc(sizeof(struct nbd_device), GFP_KERNEL);
|
||||
if (!nbd)
|
||||
goto out;
|
||||
|
||||
disk = alloc_disk(1 << part_shift);
|
||||
if (!disk)
|
||||
nbd->tag_set.ops = &nbd_mq_ops;
|
||||
nbd->tag_set.nr_hw_queues = 1;
|
||||
nbd->tag_set.queue_depth = 128;
|
||||
nbd->tag_set.numa_node = NUMA_NO_NODE;
|
||||
nbd->tag_set.cmd_size = sizeof(struct nbd_cmd);
|
||||
nbd->tag_set.flags = BLK_MQ_F_SHOULD_MERGE |
|
||||
BLK_MQ_F_BLOCKING;
|
||||
nbd->tag_set.driver_data = nbd;
|
||||
nbd->destroy_complete = NULL;
|
||||
|
||||
err = blk_mq_alloc_tag_set(&nbd->tag_set);
|
||||
if (err)
|
||||
goto out_free_nbd;
|
||||
|
||||
if (index >= 0) {
|
||||
@ -1668,30 +1673,15 @@ static int nbd_dev_add(int index)
|
||||
index = err;
|
||||
}
|
||||
if (err < 0)
|
||||
goto out_free_disk;
|
||||
|
||||
nbd->index = index;
|
||||
nbd->disk = disk;
|
||||
nbd->tag_set.ops = &nbd_mq_ops;
|
||||
nbd->tag_set.nr_hw_queues = 1;
|
||||
nbd->tag_set.queue_depth = 128;
|
||||
nbd->tag_set.numa_node = NUMA_NO_NODE;
|
||||
nbd->tag_set.cmd_size = sizeof(struct nbd_cmd);
|
||||
nbd->tag_set.flags = BLK_MQ_F_SHOULD_MERGE |
|
||||
BLK_MQ_F_BLOCKING;
|
||||
nbd->tag_set.driver_data = nbd;
|
||||
nbd->destroy_complete = NULL;
|
||||
|
||||
err = blk_mq_alloc_tag_set(&nbd->tag_set);
|
||||
if (err)
|
||||
goto out_free_idr;
|
||||
|
||||
q = blk_mq_init_queue(&nbd->tag_set);
|
||||
if (IS_ERR(q)) {
|
||||
err = PTR_ERR(q);
|
||||
goto out_free_tags;
|
||||
nbd->index = index;
|
||||
|
||||
disk = blk_mq_alloc_disk(&nbd->tag_set, NULL);
|
||||
if (IS_ERR(disk)) {
|
||||
err = PTR_ERR(disk);
|
||||
goto out_free_idr;
|
||||
}
|
||||
disk->queue = q;
|
||||
nbd->disk = disk;
|
||||
|
||||
/*
|
||||
* Tell the block layer that we are not a rotational device
|
||||
@ -1712,6 +1702,7 @@ static int nbd_dev_add(int index)
|
||||
INIT_LIST_HEAD(&nbd->list);
|
||||
disk->major = NBD_MAJOR;
|
||||
disk->first_minor = index << part_shift;
|
||||
disk->minors = 1 << part_shift;
|
||||
disk->fops = &nbd_fops;
|
||||
disk->private_data = nbd;
|
||||
sprintf(disk->disk_name, "nbd%d", index);
|
||||
@ -1719,12 +1710,10 @@ static int nbd_dev_add(int index)
|
||||
nbd_total_devices++;
|
||||
return index;
|
||||
|
||||
out_free_tags:
|
||||
blk_mq_free_tag_set(&nbd->tag_set);
|
||||
out_free_idr:
|
||||
idr_remove(&nbd_index_idr, index);
|
||||
out_free_disk:
|
||||
put_disk(disk);
|
||||
out_free_tags:
|
||||
blk_mq_free_tag_set(&nbd->tag_set);
|
||||
out_free_nbd:
|
||||
kfree(nbd);
|
||||
out:
|
||||
|
@ -1597,11 +1597,10 @@ static void null_del_dev(struct nullb *nullb)
|
||||
null_restart_queue_async(nullb);
|
||||
}
|
||||
|
||||
blk_cleanup_queue(nullb->q);
|
||||
blk_cleanup_disk(nullb->disk);
|
||||
if (dev->queue_mode == NULL_Q_MQ &&
|
||||
nullb->tag_set == &nullb->__tag_set)
|
||||
blk_mq_free_tag_set(nullb->tag_set);
|
||||
put_disk(nullb->disk);
|
||||
cleanup_queues(nullb);
|
||||
if (null_cache_active(nullb))
|
||||
null_free_device_storage(nullb->dev, true);
|
||||
@ -1700,22 +1699,19 @@ static int init_driver_queues(struct nullb *nullb)
|
||||
static int null_gendisk_register(struct nullb *nullb)
|
||||
{
|
||||
sector_t size = ((sector_t)nullb->dev->size * SZ_1M) >> SECTOR_SHIFT;
|
||||
struct gendisk *disk;
|
||||
struct gendisk *disk = nullb->disk;
|
||||
|
||||
disk = nullb->disk = alloc_disk_node(1, nullb->dev->home_node);
|
||||
if (!disk)
|
||||
return -ENOMEM;
|
||||
set_capacity(disk, size);
|
||||
|
||||
disk->flags |= GENHD_FL_EXT_DEVT | GENHD_FL_SUPPRESS_PARTITION_INFO;
|
||||
disk->major = null_major;
|
||||
disk->first_minor = nullb->index;
|
||||
disk->minors = 1;
|
||||
if (queue_is_mq(nullb->q))
|
||||
disk->fops = &null_rq_ops;
|
||||
else
|
||||
disk->fops = &null_bio_ops;
|
||||
disk->private_data = nullb;
|
||||
disk->queue = nullb->q;
|
||||
strncpy(disk->disk_name, nullb->disk_name, DISK_NAME_LEN);
|
||||
|
||||
if (nullb->dev->zoned) {
|
||||
@ -1851,23 +1847,26 @@ static int null_add_dev(struct nullb_device *dev)
|
||||
goto out_cleanup_queues;
|
||||
|
||||
if (!null_setup_fault())
|
||||
goto out_cleanup_queues;
|
||||
goto out_cleanup_tags;
|
||||
|
||||
rv = -ENOMEM;
|
||||
nullb->tag_set->timeout = 5 * HZ;
|
||||
nullb->q = blk_mq_init_queue_data(nullb->tag_set, nullb);
|
||||
if (IS_ERR(nullb->q)) {
|
||||
rv = -ENOMEM;
|
||||
nullb->disk = blk_mq_alloc_disk(nullb->tag_set, nullb);
|
||||
if (IS_ERR(nullb->disk)) {
|
||||
rv = PTR_ERR(nullb->disk);
|
||||
goto out_cleanup_tags;
|
||||
}
|
||||
nullb->q = nullb->disk->queue;
|
||||
} else if (dev->queue_mode == NULL_Q_BIO) {
|
||||
nullb->q = blk_alloc_queue(dev->home_node);
|
||||
if (!nullb->q) {
|
||||
rv = -ENOMEM;
|
||||
rv = -ENOMEM;
|
||||
nullb->disk = blk_alloc_disk(nullb->dev->home_node);
|
||||
if (!nullb->disk)
|
||||
goto out_cleanup_queues;
|
||||
}
|
||||
|
||||
nullb->q = nullb->disk->queue;
|
||||
rv = init_driver_queues(nullb);
|
||||
if (rv)
|
||||
goto out_cleanup_blk_queue;
|
||||
goto out_cleanup_disk;
|
||||
}
|
||||
|
||||
if (dev->mbps) {
|
||||
@ -1883,7 +1882,7 @@ static int null_add_dev(struct nullb_device *dev)
|
||||
if (dev->zoned) {
|
||||
rv = null_init_zoned_dev(dev, nullb->q);
|
||||
if (rv)
|
||||
goto out_cleanup_blk_queue;
|
||||
goto out_cleanup_disk;
|
||||
}
|
||||
|
||||
nullb->q->queuedata = nullb;
|
||||
@ -1921,8 +1920,8 @@ static int null_add_dev(struct nullb_device *dev)
|
||||
return 0;
|
||||
out_cleanup_zone:
|
||||
null_free_zoned_dev(dev);
|
||||
out_cleanup_blk_queue:
|
||||
blk_cleanup_queue(nullb->q);
|
||||
out_cleanup_disk:
|
||||
blk_cleanup_disk(nullb->disk);
|
||||
out_cleanup_tags:
|
||||
if (dev->queue_mode == NULL_Q_MQ && nullb->tag_set == &nullb->__tag_set)
|
||||
blk_mq_free_tag_set(nullb->tag_set);
|
||||
|
@ -309,21 +309,19 @@ static void pcd_init_units(void)
|
||||
|
||||
pcd_drive_count = 0;
|
||||
for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++) {
|
||||
struct gendisk *disk = alloc_disk(1);
|
||||
struct gendisk *disk;
|
||||
|
||||
if (!disk)
|
||||
if (blk_mq_alloc_sq_tag_set(&cd->tag_set, &pcd_mq_ops, 1,
|
||||
BLK_MQ_F_SHOULD_MERGE))
|
||||
continue;
|
||||
|
||||
disk->queue = blk_mq_init_sq_queue(&cd->tag_set, &pcd_mq_ops,
|
||||
1, BLK_MQ_F_SHOULD_MERGE);
|
||||
if (IS_ERR(disk->queue)) {
|
||||
disk->queue = NULL;
|
||||
put_disk(disk);
|
||||
disk = blk_mq_alloc_disk(&cd->tag_set, cd);
|
||||
if (IS_ERR(disk)) {
|
||||
blk_mq_free_tag_set(&cd->tag_set);
|
||||
continue;
|
||||
}
|
||||
|
||||
INIT_LIST_HEAD(&cd->rq_list);
|
||||
disk->queue->queuedata = cd;
|
||||
blk_queue_bounce_limit(disk->queue, BLK_BOUNCE_HIGH);
|
||||
cd->disk = disk;
|
||||
cd->pi = &cd->pia;
|
||||
@ -343,6 +341,7 @@ static void pcd_init_units(void)
|
||||
cd->info.mask = 0;
|
||||
disk->major = major;
|
||||
disk->first_minor = unit;
|
||||
disk->minors = 1;
|
||||
strcpy(disk->disk_name, cd->name); /* umm... */
|
||||
disk->fops = &pcd_bdops;
|
||||
disk->flags = GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE;
|
||||
@ -759,10 +758,8 @@ static int pcd_detect(void)
|
||||
for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++) {
|
||||
if (!cd->disk)
|
||||
continue;
|
||||
blk_cleanup_queue(cd->disk->queue);
|
||||
cd->disk->queue = NULL;
|
||||
blk_cleanup_disk(cd->disk);
|
||||
blk_mq_free_tag_set(&cd->tag_set);
|
||||
put_disk(cd->disk);
|
||||
}
|
||||
pi_unregister_driver(par_drv);
|
||||
return -1;
|
||||
|
@ -879,18 +879,6 @@ static void pd_probe_drive(struct pd_unit *disk)
|
||||
{
|
||||
struct gendisk *p;
|
||||
|
||||
p = alloc_disk(1 << PD_BITS);
|
||||
if (!p)
|
||||
return;
|
||||
|
||||
strcpy(p->disk_name, disk->name);
|
||||
p->fops = &pd_fops;
|
||||
p->major = major;
|
||||
p->first_minor = (disk - pd) << PD_BITS;
|
||||
p->events = DISK_EVENT_MEDIA_CHANGE;
|
||||
disk->gd = p;
|
||||
p->private_data = disk;
|
||||
|
||||
memset(&disk->tag_set, 0, sizeof(disk->tag_set));
|
||||
disk->tag_set.ops = &pd_mq_ops;
|
||||
disk->tag_set.cmd_size = sizeof(struct pd_req);
|
||||
@ -903,14 +891,21 @@ static void pd_probe_drive(struct pd_unit *disk)
|
||||
if (blk_mq_alloc_tag_set(&disk->tag_set))
|
||||
return;
|
||||
|
||||
p->queue = blk_mq_init_queue(&disk->tag_set);
|
||||
if (IS_ERR(p->queue)) {
|
||||
p = blk_mq_alloc_disk(&disk->tag_set, disk);
|
||||
if (!p) {
|
||||
blk_mq_free_tag_set(&disk->tag_set);
|
||||
p->queue = NULL;
|
||||
return;
|
||||
}
|
||||
disk->gd = p;
|
||||
|
||||
strcpy(p->disk_name, disk->name);
|
||||
p->fops = &pd_fops;
|
||||
p->major = major;
|
||||
p->first_minor = (disk - pd) << PD_BITS;
|
||||
p->minors = 1 << PD_BITS;
|
||||
p->events = DISK_EVENT_MEDIA_CHANGE;
|
||||
p->private_data = disk;
|
||||
|
||||
p->queue->queuedata = disk;
|
||||
blk_queue_max_hw_sectors(p->queue, cluster);
|
||||
blk_queue_bounce_limit(p->queue, BLK_BOUNCE_HIGH);
|
||||
|
||||
@ -1019,9 +1014,8 @@ static void __exit pd_exit(void)
|
||||
if (p) {
|
||||
disk->gd = NULL;
|
||||
del_gendisk(p);
|
||||
blk_cleanup_queue(p->queue);
|
||||
blk_mq_free_tag_set(&disk->tag_set);
|
||||
put_disk(p);
|
||||
blk_cleanup_disk(p);
|
||||
pi_release(disk->pi);
|
||||
}
|
||||
}
|
||||
|
@ -294,20 +294,17 @@ static void __init pf_init_units(void)
|
||||
for (unit = 0, pf = units; unit < PF_UNITS; unit++, pf++) {
|
||||
struct gendisk *disk;
|
||||
|
||||
disk = alloc_disk(1);
|
||||
if (!disk)
|
||||
if (blk_mq_alloc_sq_tag_set(&pf->tag_set, &pf_mq_ops, 1,
|
||||
BLK_MQ_F_SHOULD_MERGE))
|
||||
continue;
|
||||
|
||||
disk->queue = blk_mq_init_sq_queue(&pf->tag_set, &pf_mq_ops,
|
||||
1, BLK_MQ_F_SHOULD_MERGE);
|
||||
if (IS_ERR(disk->queue)) {
|
||||
disk->queue = NULL;
|
||||
put_disk(disk);
|
||||
disk = blk_mq_alloc_disk(&pf->tag_set, pf);
|
||||
if (IS_ERR(disk)) {
|
||||
blk_mq_free_tag_set(&pf->tag_set);
|
||||
continue;
|
||||
}
|
||||
|
||||
INIT_LIST_HEAD(&pf->rq_list);
|
||||
disk->queue->queuedata = pf;
|
||||
blk_queue_max_segments(disk->queue, cluster);
|
||||
blk_queue_bounce_limit(disk->queue, BLK_BOUNCE_HIGH);
|
||||
pf->disk = disk;
|
||||
@ -318,6 +315,7 @@ static void __init pf_init_units(void)
|
||||
snprintf(pf->name, PF_NAMELEN, "%s%d", name, unit);
|
||||
disk->major = major;
|
||||
disk->first_minor = unit;
|
||||
disk->minors = 1;
|
||||
strcpy(disk->disk_name, pf->name);
|
||||
disk->fops = &pf_fops;
|
||||
disk->events = DISK_EVENT_MEDIA_CHANGE;
|
||||
@ -766,10 +764,8 @@ static int pf_detect(void)
|
||||
for (pf = units, unit = 0; unit < PF_UNITS; pf++, unit++) {
|
||||
if (!pf->disk)
|
||||
continue;
|
||||
blk_cleanup_queue(pf->disk->queue);
|
||||
pf->disk->queue = NULL;
|
||||
blk_cleanup_disk(pf->disk);
|
||||
blk_mq_free_tag_set(&pf->tag_set);
|
||||
put_disk(pf->disk);
|
||||
}
|
||||
pi_unregister_driver(par_drv);
|
||||
return -1;
|
||||
|
@ -2711,19 +2711,17 @@ static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev)
|
||||
pd->write_congestion_off = write_congestion_off;
|
||||
|
||||
ret = -ENOMEM;
|
||||
disk = alloc_disk(1);
|
||||
disk = blk_alloc_disk(NUMA_NO_NODE);
|
||||
if (!disk)
|
||||
goto out_mem;
|
||||
pd->disk = disk;
|
||||
disk->major = pktdev_major;
|
||||
disk->first_minor = idx;
|
||||
disk->minors = 1;
|
||||
disk->fops = &pktcdvd_ops;
|
||||
disk->flags = GENHD_FL_REMOVABLE;
|
||||
strcpy(disk->disk_name, pd->name);
|
||||
disk->private_data = pd;
|
||||
disk->queue = blk_alloc_queue(NUMA_NO_NODE);
|
||||
if (!disk->queue)
|
||||
goto out_mem2;
|
||||
|
||||
pd->pkt_dev = MKDEV(pktdev_major, idx);
|
||||
ret = pkt_new_dev(pd, dev);
|
||||
@ -2746,7 +2744,7 @@ static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev)
|
||||
return 0;
|
||||
|
||||
out_mem2:
|
||||
put_disk(disk);
|
||||
blk_cleanup_disk(disk);
|
||||
out_mem:
|
||||
mempool_exit(&pd->rb_pool);
|
||||
kfree(pd);
|
||||
@ -2796,8 +2794,7 @@ static int pkt_remove_dev(dev_t pkt_dev)
|
||||
pkt_dbg(1, pd, "writer unmapped\n");
|
||||
|
||||
del_gendisk(pd->disk);
|
||||
blk_cleanup_queue(pd->disk->queue);
|
||||
put_disk(pd->disk);
|
||||
blk_cleanup_disk(pd->disk);
|
||||
|
||||
mempool_exit(&pd->rb_pool);
|
||||
kfree(pd);
|
||||
|
@ -29,7 +29,6 @@
|
||||
|
||||
struct ps3disk_private {
|
||||
spinlock_t lock; /* Request queue spinlock */
|
||||
struct request_queue *queue;
|
||||
struct blk_mq_tag_set tag_set;
|
||||
struct gendisk *gendisk;
|
||||
unsigned int blocking_factor;
|
||||
@ -267,7 +266,7 @@ static irqreturn_t ps3disk_interrupt(int irq, void *data)
|
||||
blk_mq_end_request(req, error);
|
||||
spin_unlock(&priv->lock);
|
||||
|
||||
blk_mq_run_hw_queues(priv->queue, true);
|
||||
blk_mq_run_hw_queues(priv->gendisk->queue, true);
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
|
||||
@ -441,17 +440,20 @@ static int ps3disk_probe(struct ps3_system_bus_device *_dev)
|
||||
|
||||
ps3disk_identify(dev);
|
||||
|
||||
queue = blk_mq_init_sq_queue(&priv->tag_set, &ps3disk_mq_ops, 1,
|
||||
error = blk_mq_alloc_sq_tag_set(&priv->tag_set, &ps3disk_mq_ops, 1,
|
||||
BLK_MQ_F_SHOULD_MERGE);
|
||||
if (IS_ERR(queue)) {
|
||||
dev_err(&dev->sbd.core, "%s:%u: blk_mq_init_queue failed\n",
|
||||
__func__, __LINE__);
|
||||
error = PTR_ERR(queue);
|
||||
if (error)
|
||||
goto fail_teardown;
|
||||
|
||||
gendisk = blk_mq_alloc_disk(&priv->tag_set, dev);
|
||||
if (IS_ERR(gendisk)) {
|
||||
dev_err(&dev->sbd.core, "%s:%u: blk_mq_alloc_disk failed\n",
|
||||
__func__, __LINE__);
|
||||
error = PTR_ERR(gendisk);
|
||||
goto fail_free_tag_set;
|
||||
}
|
||||
|
||||
priv->queue = queue;
|
||||
queue->queuedata = dev;
|
||||
queue = gendisk->queue;
|
||||
|
||||
blk_queue_max_hw_sectors(queue, dev->bounce_size >> 9);
|
||||
blk_queue_dma_alignment(queue, dev->blk_size-1);
|
||||
@ -462,19 +464,11 @@ static int ps3disk_probe(struct ps3_system_bus_device *_dev)
|
||||
blk_queue_max_segments(queue, -1);
|
||||
blk_queue_max_segment_size(queue, dev->bounce_size);
|
||||
|
||||
gendisk = alloc_disk(PS3DISK_MINORS);
|
||||
if (!gendisk) {
|
||||
dev_err(&dev->sbd.core, "%s:%u: alloc_disk failed\n", __func__,
|
||||
__LINE__);
|
||||
error = -ENOMEM;
|
||||
goto fail_cleanup_queue;
|
||||
}
|
||||
|
||||
priv->gendisk = gendisk;
|
||||
gendisk->major = ps3disk_major;
|
||||
gendisk->first_minor = devidx * PS3DISK_MINORS;
|
||||
gendisk->minors = PS3DISK_MINORS;
|
||||
gendisk->fops = &ps3disk_fops;
|
||||
gendisk->queue = queue;
|
||||
gendisk->private_data = dev;
|
||||
snprintf(gendisk->disk_name, sizeof(gendisk->disk_name), PS3DISK_NAME,
|
||||
devidx+'a');
|
||||
@ -490,8 +484,7 @@ static int ps3disk_probe(struct ps3_system_bus_device *_dev)
|
||||
device_add_disk(&dev->sbd.core, gendisk, NULL);
|
||||
return 0;
|
||||
|
||||
fail_cleanup_queue:
|
||||
blk_cleanup_queue(queue);
|
||||
fail_free_tag_set:
|
||||
blk_mq_free_tag_set(&priv->tag_set);
|
||||
fail_teardown:
|
||||
ps3stor_teardown(dev);
|
||||
@ -517,9 +510,8 @@ static void ps3disk_remove(struct ps3_system_bus_device *_dev)
|
||||
&ps3disk_mask);
|
||||
mutex_unlock(&ps3disk_mask_mutex);
|
||||
del_gendisk(priv->gendisk);
|
||||
blk_cleanup_queue(priv->queue);
|
||||
blk_cleanup_disk(priv->gendisk);
|
||||
blk_mq_free_tag_set(&priv->tag_set);
|
||||
put_disk(priv->gendisk);
|
||||
dev_notice(&dev->sbd.core, "Synchronizing disk cache\n");
|
||||
ps3disk_sync_cache(dev);
|
||||
ps3stor_teardown(dev);
|
||||
|
@ -67,7 +67,6 @@ struct ps3vram_cache {
|
||||
};
|
||||
|
||||
struct ps3vram_priv {
|
||||
struct request_queue *queue;
|
||||
struct gendisk *gendisk;
|
||||
|
||||
u64 size;
|
||||
@ -613,7 +612,6 @@ static int ps3vram_probe(struct ps3_system_bus_device *dev)
|
||||
{
|
||||
struct ps3vram_priv *priv;
|
||||
int error, status;
|
||||
struct request_queue *queue;
|
||||
struct gendisk *gendisk;
|
||||
u64 ddr_size, ddr_lpar, ctrl_lpar, info_lpar, reports_lpar,
|
||||
reports_size, xdr_lpar;
|
||||
@ -736,33 +734,23 @@ static int ps3vram_probe(struct ps3_system_bus_device *dev)
|
||||
|
||||
ps3vram_proc_init(dev);
|
||||
|
||||
queue = blk_alloc_queue(NUMA_NO_NODE);
|
||||
if (!queue) {
|
||||
dev_err(&dev->core, "blk_alloc_queue failed\n");
|
||||
gendisk = blk_alloc_disk(NUMA_NO_NODE);
|
||||
if (!gendisk) {
|
||||
dev_err(&dev->core, "blk_alloc_disk failed\n");
|
||||
error = -ENOMEM;
|
||||
goto out_cache_cleanup;
|
||||
}
|
||||
|
||||
priv->queue = queue;
|
||||
blk_queue_max_segments(queue, BLK_MAX_SEGMENTS);
|
||||
blk_queue_max_segment_size(queue, BLK_MAX_SEGMENT_SIZE);
|
||||
blk_queue_max_hw_sectors(queue, BLK_SAFE_MAX_SECTORS);
|
||||
|
||||
gendisk = alloc_disk(1);
|
||||
if (!gendisk) {
|
||||
dev_err(&dev->core, "alloc_disk failed\n");
|
||||
error = -ENOMEM;
|
||||
goto fail_cleanup_queue;
|
||||
}
|
||||
|
||||
priv->gendisk = gendisk;
|
||||
gendisk->major = ps3vram_major;
|
||||
gendisk->first_minor = 0;
|
||||
gendisk->minors = 1;
|
||||
gendisk->fops = &ps3vram_fops;
|
||||
gendisk->queue = queue;
|
||||
gendisk->private_data = dev;
|
||||
strlcpy(gendisk->disk_name, DEVICE_NAME, sizeof(gendisk->disk_name));
|
||||
set_capacity(gendisk, priv->size >> 9);
|
||||
blk_queue_max_segments(gendisk->queue, BLK_MAX_SEGMENTS);
|
||||
blk_queue_max_segment_size(gendisk->queue, BLK_MAX_SEGMENT_SIZE);
|
||||
blk_queue_max_hw_sectors(gendisk->queue, BLK_SAFE_MAX_SECTORS);
|
||||
|
||||
dev_info(&dev->core, "%s: Using %llu MiB of GPU memory\n",
|
||||
gendisk->disk_name, get_capacity(gendisk) >> 11);
|
||||
@ -770,8 +758,6 @@ static int ps3vram_probe(struct ps3_system_bus_device *dev)
|
||||
device_add_disk(&dev->core, gendisk, NULL);
|
||||
return 0;
|
||||
|
||||
fail_cleanup_queue:
|
||||
blk_cleanup_queue(queue);
|
||||
out_cache_cleanup:
|
||||
remove_proc_entry(DEVICE_NAME, NULL);
|
||||
ps3vram_cache_cleanup(dev);
|
||||
@ -802,8 +788,7 @@ static void ps3vram_remove(struct ps3_system_bus_device *dev)
|
||||
struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev);
|
||||
|
||||
del_gendisk(priv->gendisk);
|
||||
put_disk(priv->gendisk);
|
||||
blk_cleanup_queue(priv->queue);
|
||||
blk_cleanup_disk(priv->gendisk);
|
||||
remove_proc_entry(DEVICE_NAME, NULL);
|
||||
ps3vram_cache_cleanup(dev);
|
||||
iounmap(priv->reports);
|
||||
|
@ -4750,9 +4750,8 @@ static blk_status_t rbd_queue_rq(struct blk_mq_hw_ctx *hctx,
|
||||
|
||||
static void rbd_free_disk(struct rbd_device *rbd_dev)
|
||||
{
|
||||
blk_cleanup_queue(rbd_dev->disk->queue);
|
||||
blk_cleanup_disk(rbd_dev->disk);
|
||||
blk_mq_free_tag_set(&rbd_dev->tag_set);
|
||||
put_disk(rbd_dev->disk);
|
||||
rbd_dev->disk = NULL;
|
||||
}
|
||||
|
||||
@ -4922,22 +4921,6 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
|
||||
rbd_dev->layout.object_size * rbd_dev->layout.stripe_count;
|
||||
int err;
|
||||
|
||||
/* create gendisk info */
|
||||
disk = alloc_disk(single_major ?
|
||||
(1 << RBD_SINGLE_MAJOR_PART_SHIFT) :
|
||||
RBD_MINORS_PER_MAJOR);
|
||||
if (!disk)
|
||||
return -ENOMEM;
|
||||
|
||||
snprintf(disk->disk_name, sizeof(disk->disk_name), RBD_DRV_NAME "%d",
|
||||
rbd_dev->dev_id);
|
||||
disk->major = rbd_dev->major;
|
||||
disk->first_minor = rbd_dev->minor;
|
||||
if (single_major)
|
||||
disk->flags |= GENHD_FL_EXT_DEVT;
|
||||
disk->fops = &rbd_bd_ops;
|
||||
disk->private_data = rbd_dev;
|
||||
|
||||
memset(&rbd_dev->tag_set, 0, sizeof(rbd_dev->tag_set));
|
||||
rbd_dev->tag_set.ops = &rbd_mq_ops;
|
||||
rbd_dev->tag_set.queue_depth = rbd_dev->opts->queue_depth;
|
||||
@ -4948,13 +4931,26 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
|
||||
|
||||
err = blk_mq_alloc_tag_set(&rbd_dev->tag_set);
|
||||
if (err)
|
||||
goto out_disk;
|
||||
return err;
|
||||
|
||||
q = blk_mq_init_queue(&rbd_dev->tag_set);
|
||||
if (IS_ERR(q)) {
|
||||
err = PTR_ERR(q);
|
||||
disk = blk_mq_alloc_disk(&rbd_dev->tag_set, rbd_dev);
|
||||
if (IS_ERR(disk)) {
|
||||
err = PTR_ERR(disk);
|
||||
goto out_tag_set;
|
||||
}
|
||||
q = disk->queue;
|
||||
|
||||
snprintf(disk->disk_name, sizeof(disk->disk_name), RBD_DRV_NAME "%d",
|
||||
rbd_dev->dev_id);
|
||||
disk->major = rbd_dev->major;
|
||||
disk->first_minor = rbd_dev->minor;
|
||||
if (single_major) {
|
||||
disk->minors = (1 << RBD_SINGLE_MAJOR_PART_SHIFT);
|
||||
disk->flags |= GENHD_FL_EXT_DEVT;
|
||||
} else {
|
||||
disk->minors = RBD_MINORS_PER_MAJOR;
|
||||
}
|
||||
disk->fops = &rbd_bd_ops;
|
||||
|
||||
blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
|
||||
/* QUEUE_FLAG_ADD_RANDOM is off by default for blk-mq */
|
||||
@ -4976,21 +4972,11 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
|
||||
if (!ceph_test_opt(rbd_dev->rbd_client->client, NOCRC))
|
||||
blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, q);
|
||||
|
||||
/*
|
||||
* disk_release() expects a queue ref from add_disk() and will
|
||||
* put it. Hold an extra ref until add_disk() is called.
|
||||
*/
|
||||
WARN_ON(!blk_get_queue(q));
|
||||
disk->queue = q;
|
||||
q->queuedata = rbd_dev;
|
||||
|
||||
rbd_dev->disk = disk;
|
||||
|
||||
return 0;
|
||||
out_tag_set:
|
||||
blk_mq_free_tag_set(&rbd_dev->tag_set);
|
||||
out_disk:
|
||||
put_disk(disk);
|
||||
return err;
|
||||
}
|
||||
|
||||
@ -7088,8 +7074,6 @@ static ssize_t do_rbd_add(struct bus_type *bus,
|
||||
goto err_out_image_lock;
|
||||
|
||||
device_add_disk(&rbd_dev->dev, rbd_dev->disk, NULL);
|
||||
/* see rbd_init_disk() */
|
||||
blk_put_queue(rbd_dev->disk->queue);
|
||||
|
||||
spin_lock(&rbd_dev_list_lock);
|
||||
list_add_tail(&rbd_dev->node, &rbd_dev_list);
|
||||
|
@ -1353,18 +1353,6 @@ static void rnbd_init_mq_hw_queues(struct rnbd_clt_dev *dev)
|
||||
}
|
||||
}
|
||||
|
||||
static int setup_mq_dev(struct rnbd_clt_dev *dev)
|
||||
{
|
||||
dev->queue = blk_mq_init_queue(&dev->sess->tag_set);
|
||||
if (IS_ERR(dev->queue)) {
|
||||
rnbd_clt_err(dev, "Initializing multiqueue queue failed, err: %ld\n",
|
||||
PTR_ERR(dev->queue));
|
||||
return PTR_ERR(dev->queue);
|
||||
}
|
||||
rnbd_init_mq_hw_queues(dev);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void setup_request_queue(struct rnbd_clt_dev *dev)
|
||||
{
|
||||
blk_queue_logical_block_size(dev->queue, dev->logical_block_size);
|
||||
@ -1393,13 +1381,13 @@ static void setup_request_queue(struct rnbd_clt_dev *dev)
|
||||
blk_queue_io_opt(dev->queue, dev->sess->max_io_size);
|
||||
blk_queue_virt_boundary(dev->queue, SZ_4K - 1);
|
||||
blk_queue_write_cache(dev->queue, dev->wc, dev->fua);
|
||||
dev->queue->queuedata = dev;
|
||||
}
|
||||
|
||||
static void rnbd_clt_setup_gen_disk(struct rnbd_clt_dev *dev, int idx)
|
||||
{
|
||||
dev->gd->major = rnbd_client_major;
|
||||
dev->gd->first_minor = idx << RNBD_PART_BITS;
|
||||
dev->gd->minors = 1 << RNBD_PART_BITS;
|
||||
dev->gd->fops = &rnbd_client_ops;
|
||||
dev->gd->queue = dev->queue;
|
||||
dev->gd->private_data = dev;
|
||||
@ -1426,24 +1414,18 @@ static void rnbd_clt_setup_gen_disk(struct rnbd_clt_dev *dev, int idx)
|
||||
|
||||
static int rnbd_client_setup_device(struct rnbd_clt_dev *dev)
|
||||
{
|
||||
int err, idx = dev->clt_device_id;
|
||||
int idx = dev->clt_device_id;
|
||||
|
||||
dev->size = dev->nsectors * dev->logical_block_size;
|
||||
|
||||
err = setup_mq_dev(dev);
|
||||
if (err)
|
||||
return err;
|
||||
dev->gd = blk_mq_alloc_disk(&dev->sess->tag_set, dev);
|
||||
if (IS_ERR(dev->gd))
|
||||
return PTR_ERR(dev->gd);
|
||||
dev->queue = dev->gd->queue;
|
||||
rnbd_init_mq_hw_queues(dev);
|
||||
|
||||
setup_request_queue(dev);
|
||||
|
||||
dev->gd = alloc_disk_node(1 << RNBD_PART_BITS, NUMA_NO_NODE);
|
||||
if (!dev->gd) {
|
||||
blk_cleanup_queue(dev->queue);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
rnbd_clt_setup_gen_disk(dev, idx);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1650,8 +1632,7 @@ put_sess:
|
||||
static void destroy_gen_disk(struct rnbd_clt_dev *dev)
|
||||
{
|
||||
del_gendisk(dev->gd);
|
||||
blk_cleanup_queue(dev->queue);
|
||||
put_disk(dev->gd);
|
||||
blk_cleanup_disk(dev->gd);
|
||||
}
|
||||
|
||||
static void destroy_sysfs(struct rnbd_clt_dev *dev,
|
||||
|
@ -236,47 +236,40 @@ int rsxx_setup_dev(struct rsxx_cardinfo *card)
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
card->queue = blk_alloc_queue(NUMA_NO_NODE);
|
||||
if (!card->queue) {
|
||||
dev_err(CARD_TO_DEV(card), "Failed queue alloc\n");
|
||||
unregister_blkdev(card->major, DRIVER_NAME);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
card->gendisk = alloc_disk(blkdev_minors);
|
||||
card->gendisk = blk_alloc_disk(blkdev_minors);
|
||||
if (!card->gendisk) {
|
||||
dev_err(CARD_TO_DEV(card), "Failed disk alloc\n");
|
||||
blk_cleanup_queue(card->queue);
|
||||
unregister_blkdev(card->major, DRIVER_NAME);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
if (card->config_valid) {
|
||||
blk_size = card->config.data.block_size;
|
||||
blk_queue_dma_alignment(card->queue, blk_size - 1);
|
||||
blk_queue_logical_block_size(card->queue, blk_size);
|
||||
blk_queue_dma_alignment(card->gendisk->queue, blk_size - 1);
|
||||
blk_queue_logical_block_size(card->gendisk->queue, blk_size);
|
||||
}
|
||||
|
||||
blk_queue_max_hw_sectors(card->queue, blkdev_max_hw_sectors);
|
||||
blk_queue_physical_block_size(card->queue, RSXX_HW_BLK_SIZE);
|
||||
blk_queue_max_hw_sectors(card->gendisk->queue, blkdev_max_hw_sectors);
|
||||
blk_queue_physical_block_size(card->gendisk->queue, RSXX_HW_BLK_SIZE);
|
||||
|
||||
blk_queue_flag_set(QUEUE_FLAG_NONROT, card->queue);
|
||||
blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, card->queue);
|
||||
blk_queue_flag_set(QUEUE_FLAG_NONROT, card->gendisk->queue);
|
||||
blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, card->gendisk->queue);
|
||||
if (rsxx_discard_supported(card)) {
|
||||
blk_queue_flag_set(QUEUE_FLAG_DISCARD, card->queue);
|
||||
blk_queue_max_discard_sectors(card->queue,
|
||||
blk_queue_flag_set(QUEUE_FLAG_DISCARD, card->gendisk->queue);
|
||||
blk_queue_max_discard_sectors(card->gendisk->queue,
|
||||
RSXX_HW_BLK_SIZE >> 9);
|
||||
card->queue->limits.discard_granularity = RSXX_HW_BLK_SIZE;
|
||||
card->queue->limits.discard_alignment = RSXX_HW_BLK_SIZE;
|
||||
card->gendisk->queue->limits.discard_granularity =
|
||||
RSXX_HW_BLK_SIZE;
|
||||
card->gendisk->queue->limits.discard_alignment =
|
||||
RSXX_HW_BLK_SIZE;
|
||||
}
|
||||
|
||||
snprintf(card->gendisk->disk_name, sizeof(card->gendisk->disk_name),
|
||||
"rsxx%d", card->disk_id);
|
||||
card->gendisk->major = card->major;
|
||||
card->gendisk->first_minor = 0;
|
||||
card->gendisk->minors = blkdev_minors;
|
||||
card->gendisk->fops = &rsxx_fops;
|
||||
card->gendisk->private_data = card;
|
||||
card->gendisk->queue = card->queue;
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -286,10 +279,8 @@ void rsxx_destroy_dev(struct rsxx_cardinfo *card)
|
||||
if (!enable_blkdev)
|
||||
return;
|
||||
|
||||
put_disk(card->gendisk);
|
||||
blk_cleanup_disk(card->gendisk);
|
||||
card->gendisk = NULL;
|
||||
|
||||
blk_cleanup_queue(card->queue);
|
||||
unregister_blkdev(card->major, DRIVER_NAME);
|
||||
}
|
||||
|
||||
|
@ -154,7 +154,6 @@ struct rsxx_cardinfo {
|
||||
bool bdev_attached;
|
||||
int disk_id;
|
||||
int major;
|
||||
struct request_queue *queue;
|
||||
struct gendisk *gendisk;
|
||||
struct {
|
||||
/* Used to convert a byte address to a device address. */
|
||||
|
@ -780,27 +780,6 @@ static const struct blk_mq_ops vdc_mq_ops = {
|
||||
.queue_rq = vdc_queue_rq,
|
||||
};
|
||||
|
||||
static void cleanup_queue(struct request_queue *q)
|
||||
{
|
||||
struct vdc_port *port = q->queuedata;
|
||||
|
||||
blk_cleanup_queue(q);
|
||||
blk_mq_free_tag_set(&port->tag_set);
|
||||
}
|
||||
|
||||
static struct request_queue *init_queue(struct vdc_port *port)
|
||||
{
|
||||
struct request_queue *q;
|
||||
|
||||
q = blk_mq_init_sq_queue(&port->tag_set, &vdc_mq_ops, VDC_TX_RING_SIZE,
|
||||
BLK_MQ_F_SHOULD_MERGE);
|
||||
if (IS_ERR(q))
|
||||
return q;
|
||||
|
||||
q->queuedata = port;
|
||||
return q;
|
||||
}
|
||||
|
||||
static int probe_disk(struct vdc_port *port)
|
||||
{
|
||||
struct request_queue *q;
|
||||
@ -838,21 +817,21 @@ static int probe_disk(struct vdc_port *port)
|
||||
(u64)geom.num_sec);
|
||||
}
|
||||
|
||||
q = init_queue(port);
|
||||
if (IS_ERR(q)) {
|
||||
printk(KERN_ERR PFX "%s: Could not allocate queue.\n",
|
||||
port->vio.name);
|
||||
return PTR_ERR(q);
|
||||
}
|
||||
g = alloc_disk(1 << PARTITION_SHIFT);
|
||||
if (!g) {
|
||||
err = blk_mq_alloc_sq_tag_set(&port->tag_set, &vdc_mq_ops,
|
||||
VDC_TX_RING_SIZE, BLK_MQ_F_SHOULD_MERGE);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
g = blk_mq_alloc_disk(&port->tag_set, port);
|
||||
if (IS_ERR(g)) {
|
||||
printk(KERN_ERR PFX "%s: Could not allocate gendisk.\n",
|
||||
port->vio.name);
|
||||
cleanup_queue(q);
|
||||
return -ENOMEM;
|
||||
blk_mq_free_tag_set(&port->tag_set);
|
||||
return PTR_ERR(g);
|
||||
}
|
||||
|
||||
port->disk = g;
|
||||
q = g->queue;
|
||||
|
||||
/* Each segment in a request is up to an aligned page in size. */
|
||||
blk_queue_segment_boundary(q, PAGE_SIZE - 1);
|
||||
@ -862,6 +841,7 @@ static int probe_disk(struct vdc_port *port)
|
||||
blk_queue_max_hw_sectors(q, port->max_xfer_size);
|
||||
g->major = vdc_major;
|
||||
g->first_minor = port->vio.vdev->dev_no << PARTITION_SHIFT;
|
||||
g->minors = 1 << PARTITION_SHIFT;
|
||||
strcpy(g->disk_name, port->disk_name);
|
||||
|
||||
g->fops = &vdc_fops;
|
||||
@ -1083,9 +1063,8 @@ static int vdc_port_remove(struct vio_dev *vdev)
|
||||
del_timer_sync(&port->vio.timer);
|
||||
|
||||
del_gendisk(port->disk);
|
||||
cleanup_queue(port->disk->queue);
|
||||
put_disk(port->disk);
|
||||
port->disk = NULL;
|
||||
blk_cleanup_disk(port->disk);
|
||||
blk_mq_free_tag_set(&port->tag_set);
|
||||
|
||||
vdc_free_tx_ring(port);
|
||||
vio_ldc_free(&port->vio);
|
||||
|
@ -800,23 +800,20 @@ static int swim_floppy_init(struct swim_priv *swd)
|
||||
spin_lock_init(&swd->lock);
|
||||
|
||||
for (drive = 0; drive < swd->floppy_count; drive++) {
|
||||
struct request_queue *q;
|
||||
err = blk_mq_alloc_sq_tag_set(&swd->unit[drive].tag_set,
|
||||
&swim_mq_ops, 2, BLK_MQ_F_SHOULD_MERGE);
|
||||
if (err)
|
||||
goto exit_put_disks;
|
||||
|
||||
swd->unit[drive].disk = alloc_disk(1);
|
||||
if (swd->unit[drive].disk == NULL) {
|
||||
err = -ENOMEM;
|
||||
swd->unit[drive].disk =
|
||||
blk_mq_alloc_disk(&swd->unit[drive].tag_set,
|
||||
&swd->unit[drive]);
|
||||
if (IS_ERR(swd->unit[drive].disk)) {
|
||||
blk_mq_free_tag_set(&swd->unit[drive].tag_set);
|
||||
err = PTR_ERR(swd->unit[drive].disk);
|
||||
goto exit_put_disks;
|
||||
}
|
||||
|
||||
q = blk_mq_init_sq_queue(&swd->unit[drive].tag_set, &swim_mq_ops,
|
||||
2, BLK_MQ_F_SHOULD_MERGE);
|
||||
if (IS_ERR(q)) {
|
||||
err = PTR_ERR(q);
|
||||
goto exit_put_disks;
|
||||
}
|
||||
|
||||
swd->unit[drive].disk->queue = q;
|
||||
swd->unit[drive].disk->queue->queuedata = &swd->unit[drive];
|
||||
swd->unit[drive].swd = swd;
|
||||
}
|
||||
|
||||
@ -824,6 +821,7 @@ static int swim_floppy_init(struct swim_priv *swd)
|
||||
swd->unit[drive].disk->flags = GENHD_FL_REMOVABLE;
|
||||
swd->unit[drive].disk->major = FLOPPY_MAJOR;
|
||||
swd->unit[drive].disk->first_minor = drive;
|
||||
swd->unit[drive].disk->minors = 1;
|
||||
sprintf(swd->unit[drive].disk->disk_name, "fd%d", drive);
|
||||
swd->unit[drive].disk->fops = &floppy_fops;
|
||||
swd->unit[drive].disk->events = DISK_EVENT_MEDIA_CHANGE;
|
||||
@ -839,14 +837,10 @@ exit_put_disks:
|
||||
do {
|
||||
struct gendisk *disk = swd->unit[drive].disk;
|
||||
|
||||
if (disk) {
|
||||
if (disk->queue) {
|
||||
blk_cleanup_queue(disk->queue);
|
||||
disk->queue = NULL;
|
||||
}
|
||||
blk_mq_free_tag_set(&swd->unit[drive].tag_set);
|
||||
put_disk(disk);
|
||||
}
|
||||
if (!disk)
|
||||
continue;
|
||||
blk_cleanup_disk(disk);
|
||||
blk_mq_free_tag_set(&swd->unit[drive].tag_set);
|
||||
} while (drive--);
|
||||
return err;
|
||||
}
|
||||
|
@ -1202,30 +1202,27 @@ static int swim3_attach(struct macio_dev *mdev,
|
||||
return rc;
|
||||
}
|
||||
|
||||
disk = alloc_disk(1);
|
||||
if (disk == NULL) {
|
||||
rc = -ENOMEM;
|
||||
goto out_unregister;
|
||||
}
|
||||
|
||||
fs = &floppy_states[floppy_count];
|
||||
memset(fs, 0, sizeof(*fs));
|
||||
|
||||
disk->queue = blk_mq_init_sq_queue(&fs->tag_set, &swim3_mq_ops, 2,
|
||||
BLK_MQ_F_SHOULD_MERGE);
|
||||
if (IS_ERR(disk->queue)) {
|
||||
rc = PTR_ERR(disk->queue);
|
||||
disk->queue = NULL;
|
||||
goto out_put_disk;
|
||||
rc = blk_mq_alloc_sq_tag_set(&fs->tag_set, &swim3_mq_ops, 2,
|
||||
BLK_MQ_F_SHOULD_MERGE);
|
||||
if (rc)
|
||||
goto out_unregister;
|
||||
|
||||
disk = blk_mq_alloc_disk(&fs->tag_set, fs);
|
||||
if (IS_ERR(disk)) {
|
||||
rc = PTR_ERR(disk);
|
||||
goto out_free_tag_set;
|
||||
}
|
||||
disk->queue->queuedata = fs;
|
||||
|
||||
rc = swim3_add_device(mdev, floppy_count);
|
||||
if (rc)
|
||||
goto out_cleanup_queue;
|
||||
goto out_cleanup_disk;
|
||||
|
||||
disk->major = FLOPPY_MAJOR;
|
||||
disk->first_minor = floppy_count;
|
||||
disk->minors = 1;
|
||||
disk->fops = &floppy_fops;
|
||||
disk->private_data = fs;
|
||||
disk->events = DISK_EVENT_MEDIA_CHANGE;
|
||||
@ -1237,12 +1234,10 @@ static int swim3_attach(struct macio_dev *mdev,
|
||||
disks[floppy_count++] = disk;
|
||||
return 0;
|
||||
|
||||
out_cleanup_queue:
|
||||
blk_cleanup_queue(disk->queue);
|
||||
disk->queue = NULL;
|
||||
out_cleanup_disk:
|
||||
blk_cleanup_disk(disk);
|
||||
out_free_tag_set:
|
||||
blk_mq_free_tag_set(&fs->tag_set);
|
||||
out_put_disk:
|
||||
put_disk(disk);
|
||||
out_unregister:
|
||||
if (floppy_count == 0)
|
||||
unregister_blkdev(FLOPPY_MAJOR, "fd");
|
||||
|
@ -1343,32 +1343,25 @@ static int carm_init_disk(struct carm_host *host, unsigned int port_no)
|
||||
{
|
||||
struct carm_port *port = &host->port[port_no];
|
||||
struct gendisk *disk;
|
||||
struct request_queue *q;
|
||||
|
||||
port->host = host;
|
||||
port->port_no = port_no;
|
||||
|
||||
disk = alloc_disk(CARM_MINORS_PER_MAJOR);
|
||||
if (!disk)
|
||||
return -ENOMEM;
|
||||
disk = blk_mq_alloc_disk(&host->tag_set, port);
|
||||
if (IS_ERR(disk))
|
||||
return PTR_ERR(disk);
|
||||
|
||||
port->disk = disk;
|
||||
sprintf(disk->disk_name, DRV_NAME "/%u",
|
||||
(unsigned int)host->id * CARM_MAX_PORTS + port_no);
|
||||
disk->major = host->major;
|
||||
disk->first_minor = port_no * CARM_MINORS_PER_MAJOR;
|
||||
disk->minors = CARM_MINORS_PER_MAJOR;
|
||||
disk->fops = &carm_bd_ops;
|
||||
disk->private_data = port;
|
||||
|
||||
q = blk_mq_init_queue(&host->tag_set);
|
||||
if (IS_ERR(q))
|
||||
return PTR_ERR(q);
|
||||
|
||||
blk_queue_max_segments(q, CARM_MAX_REQ_SG);
|
||||
blk_queue_segment_boundary(q, CARM_SG_BOUNDARY);
|
||||
|
||||
q->queuedata = port;
|
||||
disk->queue = q;
|
||||
blk_queue_max_segments(disk->queue, CARM_MAX_REQ_SG);
|
||||
blk_queue_segment_boundary(disk->queue, CARM_SG_BOUNDARY);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1382,9 +1375,7 @@ static void carm_free_disk(struct carm_host *host, unsigned int port_no)
|
||||
|
||||
if (disk->flags & GENHD_FL_UP)
|
||||
del_gendisk(disk);
|
||||
if (disk->queue)
|
||||
blk_cleanup_queue(disk->queue);
|
||||
put_disk(disk);
|
||||
blk_cleanup_disk(disk);
|
||||
}
|
||||
|
||||
static int carm_init_shm(struct carm_host *host)
|
||||
|
@ -749,13 +749,6 @@ static int virtblk_probe(struct virtio_device *vdev)
|
||||
if (err)
|
||||
goto out_free_vblk;
|
||||
|
||||
/* FIXME: How many partitions? How long is a piece of string? */
|
||||
vblk->disk = alloc_disk(1 << PART_BITS);
|
||||
if (!vblk->disk) {
|
||||
err = -ENOMEM;
|
||||
goto out_free_vq;
|
||||
}
|
||||
|
||||
/* Default queue sizing is to fill the ring. */
|
||||
if (likely(!virtblk_queue_depth)) {
|
||||
queue_depth = vblk->vqs[0].vq->num_free;
|
||||
@ -779,21 +772,20 @@ static int virtblk_probe(struct virtio_device *vdev)
|
||||
|
||||
err = blk_mq_alloc_tag_set(&vblk->tag_set);
|
||||
if (err)
|
||||
goto out_put_disk;
|
||||
goto out_free_vq;
|
||||
|
||||
q = blk_mq_init_queue(&vblk->tag_set);
|
||||
if (IS_ERR(q)) {
|
||||
err = -ENOMEM;
|
||||
vblk->disk = blk_mq_alloc_disk(&vblk->tag_set, vblk);
|
||||
if (IS_ERR(vblk->disk)) {
|
||||
err = PTR_ERR(vblk->disk);
|
||||
goto out_free_tags;
|
||||
}
|
||||
vblk->disk->queue = q;
|
||||
|
||||
q->queuedata = vblk;
|
||||
q = vblk->disk->queue;
|
||||
|
||||
virtblk_name_format("vd", index, vblk->disk->disk_name, DISK_NAME_LEN);
|
||||
|
||||
vblk->disk->major = major;
|
||||
vblk->disk->first_minor = index_to_minor(index);
|
||||
vblk->disk->minors = 1 << PART_BITS;
|
||||
vblk->disk->private_data = vblk;
|
||||
vblk->disk->fops = &virtblk_fops;
|
||||
vblk->disk->flags |= GENHD_FL_EXT_DEVT;
|
||||
@ -892,8 +884,6 @@ static int virtblk_probe(struct virtio_device *vdev)
|
||||
|
||||
out_free_tags:
|
||||
blk_mq_free_tag_set(&vblk->tag_set);
|
||||
out_put_disk:
|
||||
put_disk(vblk->disk);
|
||||
out_free_vq:
|
||||
vdev->config->del_vqs(vdev);
|
||||
kfree(vblk->vqs);
|
||||
@ -913,8 +903,7 @@ static void virtblk_remove(struct virtio_device *vdev)
|
||||
flush_work(&vblk->config_work);
|
||||
|
||||
del_gendisk(vblk->disk);
|
||||
blk_cleanup_queue(vblk->disk->queue);
|
||||
|
||||
blk_cleanup_disk(vblk->disk);
|
||||
blk_mq_free_tag_set(&vblk->tag_set);
|
||||
|
||||
mutex_lock(&vblk->vdev_mutex);
|
||||
@ -925,7 +914,6 @@ static void virtblk_remove(struct virtio_device *vdev)
|
||||
/* Virtqueues are stopped, nothing can use vblk->vdev anymore. */
|
||||
vblk->vdev = NULL;
|
||||
|
||||
put_disk(vblk->disk);
|
||||
vdev->config->del_vqs(vdev);
|
||||
kfree(vblk->vqs);
|
||||
|
||||
|
@ -968,48 +968,6 @@ static void blkif_set_queue_limits(struct blkfront_info *info)
|
||||
blk_queue_dma_alignment(rq, 511);
|
||||
}
|
||||
|
||||
static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size,
|
||||
unsigned int physical_sector_size)
|
||||
{
|
||||
struct request_queue *rq;
|
||||
struct blkfront_info *info = gd->private_data;
|
||||
|
||||
memset(&info->tag_set, 0, sizeof(info->tag_set));
|
||||
info->tag_set.ops = &blkfront_mq_ops;
|
||||
info->tag_set.nr_hw_queues = info->nr_rings;
|
||||
if (HAS_EXTRA_REQ && info->max_indirect_segments == 0) {
|
||||
/*
|
||||
* When indirect descriptior is not supported, the I/O request
|
||||
* will be split between multiple request in the ring.
|
||||
* To avoid problems when sending the request, divide by
|
||||
* 2 the depth of the queue.
|
||||
*/
|
||||
info->tag_set.queue_depth = BLK_RING_SIZE(info) / 2;
|
||||
} else
|
||||
info->tag_set.queue_depth = BLK_RING_SIZE(info);
|
||||
info->tag_set.numa_node = NUMA_NO_NODE;
|
||||
info->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
|
||||
info->tag_set.cmd_size = sizeof(struct blkif_req);
|
||||
info->tag_set.driver_data = info;
|
||||
|
||||
if (blk_mq_alloc_tag_set(&info->tag_set))
|
||||
return -EINVAL;
|
||||
rq = blk_mq_init_queue(&info->tag_set);
|
||||
if (IS_ERR(rq)) {
|
||||
blk_mq_free_tag_set(&info->tag_set);
|
||||
return PTR_ERR(rq);
|
||||
}
|
||||
|
||||
rq->queuedata = info;
|
||||
info->rq = gd->queue = rq;
|
||||
info->gd = gd;
|
||||
info->sector_size = sector_size;
|
||||
info->physical_sector_size = physical_sector_size;
|
||||
blkif_set_queue_limits(info);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const char *flush_info(struct blkfront_info *info)
|
||||
{
|
||||
if (info->feature_flush && info->feature_fua)
|
||||
@ -1146,12 +1104,36 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
|
||||
|
||||
err = xlbd_reserve_minors(minor, nr_minors);
|
||||
if (err)
|
||||
goto out;
|
||||
return err;
|
||||
err = -ENODEV;
|
||||
|
||||
gd = alloc_disk(nr_minors);
|
||||
if (gd == NULL)
|
||||
goto release;
|
||||
memset(&info->tag_set, 0, sizeof(info->tag_set));
|
||||
info->tag_set.ops = &blkfront_mq_ops;
|
||||
info->tag_set.nr_hw_queues = info->nr_rings;
|
||||
if (HAS_EXTRA_REQ && info->max_indirect_segments == 0) {
|
||||
/*
|
||||
* When indirect descriptior is not supported, the I/O request
|
||||
* will be split between multiple request in the ring.
|
||||
* To avoid problems when sending the request, divide by
|
||||
* 2 the depth of the queue.
|
||||
*/
|
||||
info->tag_set.queue_depth = BLK_RING_SIZE(info) / 2;
|
||||
} else
|
||||
info->tag_set.queue_depth = BLK_RING_SIZE(info);
|
||||
info->tag_set.numa_node = NUMA_NO_NODE;
|
||||
info->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
|
||||
info->tag_set.cmd_size = sizeof(struct blkif_req);
|
||||
info->tag_set.driver_data = info;
|
||||
|
||||
err = blk_mq_alloc_tag_set(&info->tag_set);
|
||||
if (err)
|
||||
goto out_release_minors;
|
||||
|
||||
gd = blk_mq_alloc_disk(&info->tag_set, info);
|
||||
if (IS_ERR(gd)) {
|
||||
err = PTR_ERR(gd);
|
||||
goto out_free_tag_set;
|
||||
}
|
||||
|
||||
strcpy(gd->disk_name, DEV_NAME);
|
||||
ptr = encode_disk_name(gd->disk_name + sizeof(DEV_NAME) - 1, offset);
|
||||
@ -1164,14 +1146,16 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
|
||||
|
||||
gd->major = XENVBD_MAJOR;
|
||||
gd->first_minor = minor;
|
||||
gd->minors = nr_minors;
|
||||
gd->fops = &xlvbd_block_fops;
|
||||
gd->private_data = info;
|
||||
set_capacity(gd, capacity);
|
||||
|
||||
if (xlvbd_init_blk_queue(gd, sector_size, physical_sector_size)) {
|
||||
del_gendisk(gd);
|
||||
goto release;
|
||||
}
|
||||
info->rq = gd->queue;
|
||||
info->gd = gd;
|
||||
info->sector_size = sector_size;
|
||||
info->physical_sector_size = physical_sector_size;
|
||||
blkif_set_queue_limits(info);
|
||||
|
||||
xlvbd_flush(info);
|
||||
|
||||
@ -1186,9 +1170,10 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
|
||||
|
||||
return 0;
|
||||
|
||||
release:
|
||||
out_free_tag_set:
|
||||
blk_mq_free_tag_set(&info->tag_set);
|
||||
out_release_minors:
|
||||
xlbd_release_minors(minor, nr_minors);
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
|
||||
@ -1217,12 +1202,9 @@ static void xlvbd_release_gendisk(struct blkfront_info *info)
|
||||
nr_minors = info->gd->minors;
|
||||
xlbd_release_minors(minor, nr_minors);
|
||||
|
||||
blk_cleanup_queue(info->rq);
|
||||
blk_mq_free_tag_set(&info->tag_set);
|
||||
info->rq = NULL;
|
||||
|
||||
put_disk(info->gd);
|
||||
blk_cleanup_disk(info->gd);
|
||||
info->gd = NULL;
|
||||
blk_mq_free_tag_set(&info->tag_set);
|
||||
}
|
||||
|
||||
/* Already hold rinfo->ring_lock. */
|
||||
@ -2163,7 +2145,7 @@ static void blkfront_closing(struct blkfront_info *info)
|
||||
return;
|
||||
}
|
||||
|
||||
mutex_lock(&bdev->bd_mutex);
|
||||
mutex_lock(&bdev->bd_disk->open_mutex);
|
||||
|
||||
if (bdev->bd_openers) {
|
||||
xenbus_dev_error(xbdev, -EBUSY,
|
||||
@ -2174,7 +2156,7 @@ static void blkfront_closing(struct blkfront_info *info)
|
||||
xenbus_frontend_closed(xbdev);
|
||||
}
|
||||
|
||||
mutex_unlock(&bdev->bd_mutex);
|
||||
mutex_unlock(&bdev->bd_disk->open_mutex);
|
||||
bdput(bdev);
|
||||
}
|
||||
|
||||
@ -2531,7 +2513,7 @@ static int blkfront_remove(struct xenbus_device *xbdev)
|
||||
* isn't closed yet, we let release take care of it.
|
||||
*/
|
||||
|
||||
mutex_lock(&bdev->bd_mutex);
|
||||
mutex_lock(&disk->open_mutex);
|
||||
info = disk->private_data;
|
||||
|
||||
dev_warn(disk_to_dev(disk),
|
||||
@ -2546,7 +2528,7 @@ static int blkfront_remove(struct xenbus_device *xbdev)
|
||||
mutex_unlock(&blkfront_mutex);
|
||||
}
|
||||
|
||||
mutex_unlock(&bdev->bd_mutex);
|
||||
mutex_unlock(&disk->open_mutex);
|
||||
bdput(bdev);
|
||||
|
||||
return 0;
|
||||
|
@ -323,27 +323,20 @@ static const struct blk_mq_ops z2_mq_ops = {
|
||||
|
||||
static int z2ram_register_disk(int minor)
|
||||
{
|
||||
struct request_queue *q;
|
||||
struct gendisk *disk;
|
||||
|
||||
disk = alloc_disk(1);
|
||||
if (!disk)
|
||||
return -ENOMEM;
|
||||
|
||||
q = blk_mq_init_queue(&tag_set);
|
||||
if (IS_ERR(q)) {
|
||||
put_disk(disk);
|
||||
return PTR_ERR(q);
|
||||
}
|
||||
disk = blk_mq_alloc_disk(&tag_set, NULL);
|
||||
if (IS_ERR(disk))
|
||||
return PTR_ERR(disk);
|
||||
|
||||
disk->major = Z2RAM_MAJOR;
|
||||
disk->first_minor = minor;
|
||||
disk->minors = 1;
|
||||
disk->fops = &z2_fops;
|
||||
if (minor)
|
||||
sprintf(disk->disk_name, "z2ram%d", minor);
|
||||
else
|
||||
sprintf(disk->disk_name, "z2ram");
|
||||
disk->queue = q;
|
||||
|
||||
z2ram_gendisk[minor] = disk;
|
||||
add_disk(disk);
|
||||
|
@ -1781,24 +1781,24 @@ static ssize_t reset_store(struct device *dev,
|
||||
zram = dev_to_zram(dev);
|
||||
bdev = zram->disk->part0;
|
||||
|
||||
mutex_lock(&bdev->bd_mutex);
|
||||
mutex_lock(&bdev->bd_disk->open_mutex);
|
||||
/* Do not reset an active device or claimed device */
|
||||
if (bdev->bd_openers || zram->claim) {
|
||||
mutex_unlock(&bdev->bd_mutex);
|
||||
mutex_unlock(&bdev->bd_disk->open_mutex);
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
/* From now on, anyone can't open /dev/zram[0-9] */
|
||||
zram->claim = true;
|
||||
mutex_unlock(&bdev->bd_mutex);
|
||||
mutex_unlock(&bdev->bd_disk->open_mutex);
|
||||
|
||||
/* Make sure all the pending I/O are finished */
|
||||
fsync_bdev(bdev);
|
||||
zram_reset_device(zram);
|
||||
|
||||
mutex_lock(&bdev->bd_mutex);
|
||||
mutex_lock(&bdev->bd_disk->open_mutex);
|
||||
zram->claim = false;
|
||||
mutex_unlock(&bdev->bd_mutex);
|
||||
mutex_unlock(&bdev->bd_disk->open_mutex);
|
||||
|
||||
return len;
|
||||
}
|
||||
@ -1808,7 +1808,7 @@ static int zram_open(struct block_device *bdev, fmode_t mode)
|
||||
int ret = 0;
|
||||
struct zram *zram;
|
||||
|
||||
WARN_ON(!mutex_is_locked(&bdev->bd_mutex));
|
||||
WARN_ON(!mutex_is_locked(&bdev->bd_disk->open_mutex));
|
||||
|
||||
zram = bdev->bd_disk->private_data;
|
||||
/* zram was claimed to reset so open request fails */
|
||||
@ -1890,7 +1890,6 @@ static const struct attribute_group *zram_disk_attr_groups[] = {
|
||||
static int zram_add(void)
|
||||
{
|
||||
struct zram *zram;
|
||||
struct request_queue *queue;
|
||||
int ret, device_id;
|
||||
|
||||
zram = kzalloc(sizeof(struct zram), GFP_KERNEL);
|
||||
@ -1906,27 +1905,20 @@ static int zram_add(void)
|
||||
#ifdef CONFIG_ZRAM_WRITEBACK
|
||||
spin_lock_init(&zram->wb_limit_lock);
|
||||
#endif
|
||||
queue = blk_alloc_queue(NUMA_NO_NODE);
|
||||
if (!queue) {
|
||||
pr_err("Error allocating disk queue for device %d\n",
|
||||
|
||||
/* gendisk structure */
|
||||
zram->disk = blk_alloc_disk(NUMA_NO_NODE);
|
||||
if (!zram->disk) {
|
||||
pr_err("Error allocating disk structure for device %d\n",
|
||||
device_id);
|
||||
ret = -ENOMEM;
|
||||
goto out_free_idr;
|
||||
}
|
||||
|
||||
/* gendisk structure */
|
||||
zram->disk = alloc_disk(1);
|
||||
if (!zram->disk) {
|
||||
pr_err("Error allocating disk structure for device %d\n",
|
||||
device_id);
|
||||
ret = -ENOMEM;
|
||||
goto out_free_queue;
|
||||
}
|
||||
|
||||
zram->disk->major = zram_major;
|
||||
zram->disk->first_minor = device_id;
|
||||
zram->disk->minors = 1;
|
||||
zram->disk->fops = &zram_devops;
|
||||
zram->disk->queue = queue;
|
||||
zram->disk->private_data = zram;
|
||||
snprintf(zram->disk->disk_name, 16, "zram%d", device_id);
|
||||
|
||||
@ -1969,8 +1961,6 @@ static int zram_add(void)
|
||||
pr_info("Added device: %s\n", zram->disk->disk_name);
|
||||
return device_id;
|
||||
|
||||
out_free_queue:
|
||||
blk_cleanup_queue(queue);
|
||||
out_free_idr:
|
||||
idr_remove(&zram_index_idr, device_id);
|
||||
out_free_dev:
|
||||
@ -1982,14 +1972,14 @@ static int zram_remove(struct zram *zram)
|
||||
{
|
||||
struct block_device *bdev = zram->disk->part0;
|
||||
|
||||
mutex_lock(&bdev->bd_mutex);
|
||||
mutex_lock(&bdev->bd_disk->open_mutex);
|
||||
if (bdev->bd_openers || zram->claim) {
|
||||
mutex_unlock(&bdev->bd_mutex);
|
||||
mutex_unlock(&bdev->bd_disk->open_mutex);
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
zram->claim = true;
|
||||
mutex_unlock(&bdev->bd_mutex);
|
||||
mutex_unlock(&bdev->bd_disk->open_mutex);
|
||||
|
||||
zram_debugfs_unregister(zram);
|
||||
|
||||
@ -2000,8 +1990,7 @@ static int zram_remove(struct zram *zram)
|
||||
pr_info("Removed device: %s\n", zram->disk->disk_name);
|
||||
|
||||
del_gendisk(zram->disk);
|
||||
blk_cleanup_queue(zram->disk->queue);
|
||||
put_disk(zram->disk);
|
||||
blk_cleanup_disk(zram->disk);
|
||||
kfree(zram);
|
||||
return 0;
|
||||
}
|
||||
|
@ -112,7 +112,7 @@ struct zram {
|
||||
/*
|
||||
* zram is claimed so open request will be failed
|
||||
*/
|
||||
bool claim; /* Protected by bdev->bd_mutex */
|
||||
bool claim; /* Protected by disk->open_mutex */
|
||||
struct file *backing_dev;
|
||||
#ifdef CONFIG_ZRAM_WRITEBACK
|
||||
spinlock_t wb_limit_lock;
|
||||
|
@ -772,53 +772,50 @@ static int probe_gdrom(struct platform_device *devptr)
|
||||
goto probe_fail_no_mem;
|
||||
}
|
||||
probe_gdrom_setupcd();
|
||||
gd.disk = alloc_disk(1);
|
||||
if (!gd.disk) {
|
||||
err = -ENODEV;
|
||||
goto probe_fail_no_disk;
|
||||
|
||||
err = blk_mq_alloc_sq_tag_set(&gd.tag_set, &gdrom_mq_ops, 1,
|
||||
BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING);
|
||||
if (err)
|
||||
goto probe_fail_free_cd_info;
|
||||
|
||||
gd.disk = blk_mq_alloc_disk(&gd.tag_set, NULL);
|
||||
if (IS_ERR(gd.disk)) {
|
||||
err = PTR_ERR(gd.disk);
|
||||
goto probe_fail_free_tag_set;
|
||||
}
|
||||
gd.gdrom_rq = gd.disk->queue;
|
||||
probe_gdrom_setupdisk();
|
||||
if (register_cdrom(gd.disk, gd.cd_info)) {
|
||||
err = -ENODEV;
|
||||
goto probe_fail_cdrom_register;
|
||||
goto probe_fail_cleanup_disk;
|
||||
}
|
||||
gd.disk->fops = &gdrom_bdops;
|
||||
gd.disk->events = DISK_EVENT_MEDIA_CHANGE;
|
||||
/* latch on to the interrupt */
|
||||
err = gdrom_set_interrupt_handlers();
|
||||
if (err)
|
||||
goto probe_fail_cmdirq_register;
|
||||
|
||||
gd.gdrom_rq = blk_mq_init_sq_queue(&gd.tag_set, &gdrom_mq_ops, 1,
|
||||
BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING);
|
||||
if (IS_ERR(gd.gdrom_rq)) {
|
||||
err = PTR_ERR(gd.gdrom_rq);
|
||||
gd.gdrom_rq = NULL;
|
||||
goto probe_fail_requestq;
|
||||
}
|
||||
goto probe_fail_cleanup_disk;
|
||||
|
||||
err = probe_gdrom_setupqueue();
|
||||
if (err)
|
||||
goto probe_fail_toc;
|
||||
goto probe_fail_free_irqs;
|
||||
|
||||
gd.toc = kzalloc(sizeof(struct gdromtoc), GFP_KERNEL);
|
||||
if (!gd.toc) {
|
||||
err = -ENOMEM;
|
||||
goto probe_fail_toc;
|
||||
goto probe_fail_free_irqs;
|
||||
}
|
||||
add_disk(gd.disk);
|
||||
return 0;
|
||||
|
||||
probe_fail_toc:
|
||||
blk_cleanup_queue(gd.gdrom_rq);
|
||||
blk_mq_free_tag_set(&gd.tag_set);
|
||||
probe_fail_requestq:
|
||||
probe_fail_free_irqs:
|
||||
free_irq(HW_EVENT_GDROM_DMA, &gd);
|
||||
free_irq(HW_EVENT_GDROM_CMD, &gd);
|
||||
probe_fail_cmdirq_register:
|
||||
probe_fail_cdrom_register:
|
||||
del_gendisk(gd.disk);
|
||||
probe_fail_no_disk:
|
||||
probe_fail_cleanup_disk:
|
||||
blk_cleanup_disk(gd.disk);
|
||||
probe_fail_free_tag_set:
|
||||
blk_mq_free_tag_set(&gd.tag_set);
|
||||
probe_fail_free_cd_info:
|
||||
kfree(gd.cd_info);
|
||||
probe_fail_no_mem:
|
||||
unregister_blkdev(gdrom_major, GDROM_DEV_NAME);
|
||||
|
@ -305,7 +305,6 @@ static int __nvm_config_extended(struct nvm_dev *dev,
|
||||
static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create)
|
||||
{
|
||||
struct nvm_ioctl_create_extended e;
|
||||
struct request_queue *tqueue;
|
||||
struct gendisk *tdisk;
|
||||
struct nvm_tgt_type *tt;
|
||||
struct nvm_target *t;
|
||||
@ -370,24 +369,16 @@ static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create)
|
||||
goto err_t;
|
||||
}
|
||||
|
||||
tdisk = alloc_disk(0);
|
||||
tdisk = blk_alloc_disk(dev->q->node);
|
||||
if (!tdisk) {
|
||||
ret = -ENOMEM;
|
||||
goto err_dev;
|
||||
}
|
||||
|
||||
tqueue = blk_alloc_queue(dev->q->node);
|
||||
if (!tqueue) {
|
||||
ret = -ENOMEM;
|
||||
goto err_disk;
|
||||
}
|
||||
|
||||
strlcpy(tdisk->disk_name, create->tgtname, sizeof(tdisk->disk_name));
|
||||
tdisk->flags = GENHD_FL_EXT_DEVT;
|
||||
tdisk->major = 0;
|
||||
tdisk->first_minor = 0;
|
||||
tdisk->fops = tt->bops;
|
||||
tdisk->queue = tqueue;
|
||||
|
||||
targetdata = tt->init(tgt_dev, tdisk, create->flags);
|
||||
if (IS_ERR(targetdata)) {
|
||||
@ -396,14 +387,14 @@ static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create)
|
||||
}
|
||||
|
||||
tdisk->private_data = targetdata;
|
||||
tqueue->queuedata = targetdata;
|
||||
tdisk->queue->queuedata = targetdata;
|
||||
|
||||
mdts = (dev->geo.csecs >> 9) * NVM_MAX_VLBA;
|
||||
if (dev->geo.mdts) {
|
||||
mdts = min_t(u32, dev->geo.mdts,
|
||||
(dev->geo.csecs >> 9) * NVM_MAX_VLBA);
|
||||
}
|
||||
blk_queue_max_hw_sectors(tqueue, mdts);
|
||||
blk_queue_max_hw_sectors(tdisk->queue, mdts);
|
||||
|
||||
set_capacity(tdisk, tt->capacity(targetdata));
|
||||
add_disk(tdisk);
|
||||
@ -428,10 +419,7 @@ err_sysfs:
|
||||
if (tt->exit)
|
||||
tt->exit(targetdata, true);
|
||||
err_init:
|
||||
blk_cleanup_queue(tqueue);
|
||||
tdisk->queue = NULL;
|
||||
err_disk:
|
||||
put_disk(tdisk);
|
||||
blk_cleanup_disk(tdisk);
|
||||
err_dev:
|
||||
nvm_remove_tgt_dev(tgt_dev, 0);
|
||||
err_t:
|
||||
@ -445,10 +433,8 @@ static void __nvm_remove_target(struct nvm_target *t, bool graceful)
|
||||
{
|
||||
struct nvm_tgt_type *tt = t->type;
|
||||
struct gendisk *tdisk = t->disk;
|
||||
struct request_queue *q = tdisk->queue;
|
||||
|
||||
del_gendisk(tdisk);
|
||||
blk_cleanup_queue(q);
|
||||
|
||||
if (tt->sysfs_exit)
|
||||
tt->sysfs_exit(tdisk);
|
||||
@ -457,7 +443,7 @@ static void __nvm_remove_target(struct nvm_target *t, bool graceful)
|
||||
tt->exit(tdisk->private_data, graceful);
|
||||
|
||||
nvm_remove_tgt_dev(t->dev, 1);
|
||||
put_disk(tdisk);
|
||||
blk_cleanup_disk(tdisk);
|
||||
module_put(t->type->owner);
|
||||
|
||||
list_del(&t->list);
|
||||
|
@ -890,13 +890,9 @@ static void bcache_device_free(struct bcache_device *d)
|
||||
if (disk_added)
|
||||
del_gendisk(disk);
|
||||
|
||||
if (disk->queue)
|
||||
blk_cleanup_queue(disk->queue);
|
||||
|
||||
blk_cleanup_disk(disk);
|
||||
ida_simple_remove(&bcache_device_idx,
|
||||
first_minor_to_idx(disk->first_minor));
|
||||
if (disk_added)
|
||||
put_disk(disk);
|
||||
}
|
||||
|
||||
bioset_exit(&d->bio_split);
|
||||
@ -946,7 +942,7 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size,
|
||||
BIOSET_NEED_BVECS|BIOSET_NEED_RESCUER))
|
||||
goto err;
|
||||
|
||||
d->disk = alloc_disk(BCACHE_MINORS);
|
||||
d->disk = blk_alloc_disk(NUMA_NO_NODE);
|
||||
if (!d->disk)
|
||||
goto err;
|
||||
|
||||
@ -955,14 +951,11 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size,
|
||||
|
||||
d->disk->major = bcache_major;
|
||||
d->disk->first_minor = idx_to_first_minor(idx);
|
||||
d->disk->minors = BCACHE_MINORS;
|
||||
d->disk->fops = ops;
|
||||
d->disk->private_data = d;
|
||||
|
||||
q = blk_alloc_queue(NUMA_NO_NODE);
|
||||
if (!q)
|
||||
return -ENOMEM;
|
||||
|
||||
d->disk->queue = q;
|
||||
q = d->disk->queue;
|
||||
q->limits.max_hw_sectors = UINT_MAX;
|
||||
q->limits.max_sectors = UINT_MAX;
|
||||
q->limits.max_segment_size = UINT_MAX;
|
||||
|
@ -530,7 +530,6 @@ static const struct blk_mq_ops dm_mq_ops = {
|
||||
|
||||
int dm_mq_init_request_queue(struct mapped_device *md, struct dm_table *t)
|
||||
{
|
||||
struct request_queue *q;
|
||||
struct dm_target *immutable_tgt;
|
||||
int err;
|
||||
|
||||
@ -557,12 +556,10 @@ int dm_mq_init_request_queue(struct mapped_device *md, struct dm_table *t)
|
||||
if (err)
|
||||
goto out_kfree_tag_set;
|
||||
|
||||
q = blk_mq_init_allocated_queue(md->tag_set, md->queue, true);
|
||||
if (IS_ERR(q)) {
|
||||
err = PTR_ERR(q);
|
||||
err = blk_mq_init_allocated_queue(md->tag_set, md->queue);
|
||||
if (err)
|
||||
goto out_tag_set;
|
||||
}
|
||||
|
||||
elevator_init_mq(md->queue);
|
||||
return 0;
|
||||
|
||||
out_tag_set:
|
||||
|
@ -1801,13 +1801,13 @@ static void cleanup_mapped_device(struct mapped_device *md)
|
||||
md->disk->private_data = NULL;
|
||||
spin_unlock(&_minor_lock);
|
||||
del_gendisk(md->disk);
|
||||
put_disk(md->disk);
|
||||
}
|
||||
|
||||
if (md->queue) {
|
||||
if (md->queue)
|
||||
dm_queue_destroy_keyslot_manager(md->queue);
|
||||
blk_cleanup_queue(md->queue);
|
||||
}
|
||||
|
||||
if (md->disk)
|
||||
blk_cleanup_disk(md->disk);
|
||||
|
||||
cleanup_srcu_struct(&md->io_barrier);
|
||||
|
||||
@ -1869,13 +1869,10 @@ static struct mapped_device *alloc_dev(int minor)
|
||||
* established. If request-based table is loaded: blk-mq will
|
||||
* override accordingly.
|
||||
*/
|
||||
md->queue = blk_alloc_queue(numa_node_id);
|
||||
if (!md->queue)
|
||||
goto bad;
|
||||
|
||||
md->disk = alloc_disk_node(1, md->numa_node_id);
|
||||
md->disk = blk_alloc_disk(md->numa_node_id);
|
||||
if (!md->disk)
|
||||
goto bad;
|
||||
md->queue = md->disk->queue;
|
||||
|
||||
init_waitqueue_head(&md->wait);
|
||||
INIT_WORK(&md->work, dm_wq_work);
|
||||
@ -1888,6 +1885,7 @@ static struct mapped_device *alloc_dev(int minor)
|
||||
|
||||
md->disk->major = _major;
|
||||
md->disk->first_minor = minor;
|
||||
md->disk->minors = 1;
|
||||
md->disk->fops = &dm_blk_dops;
|
||||
md->disk->queue = md->queue;
|
||||
md->disk->private_data = md;
|
||||
|
@ -5598,12 +5598,10 @@ static void md_free(struct kobject *ko)
|
||||
if (mddev->sysfs_level)
|
||||
sysfs_put(mddev->sysfs_level);
|
||||
|
||||
if (mddev->gendisk)
|
||||
if (mddev->gendisk) {
|
||||
del_gendisk(mddev->gendisk);
|
||||
if (mddev->queue)
|
||||
blk_cleanup_queue(mddev->queue);
|
||||
if (mddev->gendisk)
|
||||
put_disk(mddev->gendisk);
|
||||
blk_cleanup_disk(mddev->gendisk);
|
||||
}
|
||||
percpu_ref_exit(&mddev->writes_pending);
|
||||
|
||||
bioset_exit(&mddev->bio_set);
|
||||
@ -5711,20 +5709,13 @@ static int md_alloc(dev_t dev, char *name)
|
||||
goto abort;
|
||||
|
||||
error = -ENOMEM;
|
||||
mddev->queue = blk_alloc_queue(NUMA_NO_NODE);
|
||||
if (!mddev->queue)
|
||||
disk = blk_alloc_disk(NUMA_NO_NODE);
|
||||
if (!disk)
|
||||
goto abort;
|
||||
|
||||
blk_set_stacking_limits(&mddev->queue->limits);
|
||||
|
||||
disk = alloc_disk(1 << shift);
|
||||
if (!disk) {
|
||||
blk_cleanup_queue(mddev->queue);
|
||||
mddev->queue = NULL;
|
||||
goto abort;
|
||||
}
|
||||
disk->major = MAJOR(mddev->unit);
|
||||
disk->first_minor = unit << shift;
|
||||
disk->minors = 1 << shift;
|
||||
if (name)
|
||||
strcpy(disk->disk_name, name);
|
||||
else if (partitioned)
|
||||
@ -5733,7 +5724,9 @@ static int md_alloc(dev_t dev, char *name)
|
||||
sprintf(disk->disk_name, "md%d", unit);
|
||||
disk->fops = &md_fops;
|
||||
disk->private_data = mddev;
|
||||
disk->queue = mddev->queue;
|
||||
|
||||
mddev->queue = disk->queue;
|
||||
blk_set_stacking_limits(&mddev->queue->limits);
|
||||
blk_queue_write_cache(mddev->queue, true, true);
|
||||
/* Allow extended partitions. This makes the
|
||||
* 'mdp' device redundant, but we can't really
|
||||
|
@ -395,10 +395,10 @@ struct mddev {
|
||||
* that we are never stopping an array while it is open.
|
||||
* 'reconfig_mutex' protects all other reconfiguration.
|
||||
* These locks are separate due to conflicting interactions
|
||||
* with bdev->bd_mutex.
|
||||
* with disk->open_mutex.
|
||||
* Lock ordering is:
|
||||
* reconfig_mutex -> bd_mutex
|
||||
* bd_mutex -> open_mutex: e.g. __blkdev_get -> md_open
|
||||
* reconfig_mutex -> disk->open_mutex
|
||||
* disk->open_mutex -> open_mutex: e.g. __blkdev_get -> md_open
|
||||
*/
|
||||
struct mutex open_mutex;
|
||||
struct mutex reconfig_mutex;
|
||||
|
@ -2120,21 +2120,17 @@ static int msb_init_disk(struct memstick_dev *card)
|
||||
if (msb->disk_id < 0)
|
||||
return msb->disk_id;
|
||||
|
||||
msb->disk = alloc_disk(0);
|
||||
if (!msb->disk) {
|
||||
rc = -ENOMEM;
|
||||
rc = blk_mq_alloc_sq_tag_set(&msb->tag_set, &msb_mq_ops, 2,
|
||||
BLK_MQ_F_SHOULD_MERGE);
|
||||
if (rc)
|
||||
goto out_release_id;
|
||||
}
|
||||
|
||||
msb->queue = blk_mq_init_sq_queue(&msb->tag_set, &msb_mq_ops, 2,
|
||||
BLK_MQ_F_SHOULD_MERGE);
|
||||
if (IS_ERR(msb->queue)) {
|
||||
rc = PTR_ERR(msb->queue);
|
||||
msb->queue = NULL;
|
||||
goto out_put_disk;
|
||||
msb->disk = blk_mq_alloc_disk(&msb->tag_set, card);
|
||||
if (IS_ERR(msb->disk)) {
|
||||
rc = PTR_ERR(msb->disk);
|
||||
goto out_free_tag_set;
|
||||
}
|
||||
|
||||
msb->queue->queuedata = card;
|
||||
msb->queue = msb->disk->queue;
|
||||
|
||||
blk_queue_max_hw_sectors(msb->queue, MS_BLOCK_MAX_PAGES);
|
||||
blk_queue_max_segments(msb->queue, MS_BLOCK_MAX_SEGS);
|
||||
@ -2145,8 +2141,6 @@ static int msb_init_disk(struct memstick_dev *card)
|
||||
sprintf(msb->disk->disk_name, "msblk%d", msb->disk_id);
|
||||
msb->disk->fops = &msb_bdops;
|
||||
msb->disk->private_data = msb;
|
||||
msb->disk->queue = msb->queue;
|
||||
msb->disk->flags |= GENHD_FL_EXT_DEVT;
|
||||
|
||||
capacity = msb->pages_in_block * msb->logical_block_count;
|
||||
capacity *= (msb->page_size / 512);
|
||||
@ -2166,8 +2160,8 @@ static int msb_init_disk(struct memstick_dev *card)
|
||||
dbg("Disk added");
|
||||
return 0;
|
||||
|
||||
out_put_disk:
|
||||
put_disk(msb->disk);
|
||||
out_free_tag_set:
|
||||
blk_mq_free_tag_set(&msb->tag_set);
|
||||
out_release_id:
|
||||
mutex_lock(&msb_disk_lock);
|
||||
idr_remove(&msb_disk_idr, msb->disk_id);
|
||||
|
@ -1205,21 +1205,17 @@ static int mspro_block_init_disk(struct memstick_dev *card)
|
||||
if (disk_id < 0)
|
||||
return disk_id;
|
||||
|
||||
msb->disk = alloc_disk(1 << MSPRO_BLOCK_PART_SHIFT);
|
||||
if (!msb->disk) {
|
||||
rc = -ENOMEM;
|
||||
rc = blk_mq_alloc_sq_tag_set(&msb->tag_set, &mspro_mq_ops, 2,
|
||||
BLK_MQ_F_SHOULD_MERGE);
|
||||
if (rc)
|
||||
goto out_release_id;
|
||||
}
|
||||
|
||||
msb->queue = blk_mq_init_sq_queue(&msb->tag_set, &mspro_mq_ops, 2,
|
||||
BLK_MQ_F_SHOULD_MERGE);
|
||||
if (IS_ERR(msb->queue)) {
|
||||
rc = PTR_ERR(msb->queue);
|
||||
msb->queue = NULL;
|
||||
goto out_put_disk;
|
||||
msb->disk = blk_mq_alloc_disk(&msb->tag_set, card);
|
||||
if (IS_ERR(msb->disk)) {
|
||||
rc = PTR_ERR(msb->disk);
|
||||
goto out_free_tag_set;
|
||||
}
|
||||
|
||||
msb->queue->queuedata = card;
|
||||
msb->queue = msb->disk->queue;
|
||||
|
||||
blk_queue_max_hw_sectors(msb->queue, MSPRO_BLOCK_MAX_PAGES);
|
||||
blk_queue_max_segments(msb->queue, MSPRO_BLOCK_MAX_SEGS);
|
||||
@ -1228,10 +1224,10 @@ static int mspro_block_init_disk(struct memstick_dev *card)
|
||||
|
||||
msb->disk->major = major;
|
||||
msb->disk->first_minor = disk_id << MSPRO_BLOCK_PART_SHIFT;
|
||||
msb->disk->minors = 1 << MSPRO_BLOCK_PART_SHIFT;
|
||||
msb->disk->fops = &ms_block_bdops;
|
||||
msb->usage_count = 1;
|
||||
msb->disk->private_data = msb;
|
||||
msb->disk->queue = msb->queue;
|
||||
|
||||
sprintf(msb->disk->disk_name, "mspblk%d", disk_id);
|
||||
|
||||
@ -1247,8 +1243,8 @@ static int mspro_block_init_disk(struct memstick_dev *card)
|
||||
msb->active = 1;
|
||||
return 0;
|
||||
|
||||
out_put_disk:
|
||||
put_disk(msb->disk);
|
||||
out_free_tag_set:
|
||||
blk_mq_free_tag_set(&msb->tag_set);
|
||||
out_release_id:
|
||||
mutex_lock(&mspro_block_disk_lock);
|
||||
idr_remove(&mspro_block_disk_idr, disk_id);
|
||||
|
@ -30,11 +30,9 @@ static void blktrans_dev_release(struct kref *kref)
|
||||
struct mtd_blktrans_dev *dev =
|
||||
container_of(kref, struct mtd_blktrans_dev, ref);
|
||||
|
||||
dev->disk->private_data = NULL;
|
||||
blk_cleanup_queue(dev->rq);
|
||||
blk_cleanup_disk(dev->disk);
|
||||
blk_mq_free_tag_set(dev->tag_set);
|
||||
kfree(dev->tag_set);
|
||||
put_disk(dev->disk);
|
||||
list_del(&dev->list);
|
||||
kfree(dev);
|
||||
}
|
||||
@ -354,7 +352,7 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new)
|
||||
if (new->devnum > (MINORMASK >> tr->part_bits) ||
|
||||
(tr->part_bits && new->devnum >= 27 * 26)) {
|
||||
mutex_unlock(&blktrans_ref_mutex);
|
||||
goto error1;
|
||||
return ret;
|
||||
}
|
||||
|
||||
list_add_tail(&new->list, &tr->devs);
|
||||
@ -366,17 +364,29 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new)
|
||||
if (!tr->writesect)
|
||||
new->readonly = 1;
|
||||
|
||||
/* Create gendisk */
|
||||
ret = -ENOMEM;
|
||||
gd = alloc_disk(1 << tr->part_bits);
|
||||
new->tag_set = kzalloc(sizeof(*new->tag_set), GFP_KERNEL);
|
||||
if (!new->tag_set)
|
||||
goto out_list_del;
|
||||
|
||||
if (!gd)
|
||||
goto error2;
|
||||
ret = blk_mq_alloc_sq_tag_set(new->tag_set, &mtd_mq_ops, 2,
|
||||
BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING);
|
||||
if (ret)
|
||||
goto out_kfree_tag_set;
|
||||
|
||||
/* Create gendisk */
|
||||
gd = blk_mq_alloc_disk(new->tag_set, new);
|
||||
if (IS_ERR(gd)) {
|
||||
ret = PTR_ERR(gd);
|
||||
goto out_free_tag_set;
|
||||
}
|
||||
|
||||
new->disk = gd;
|
||||
new->rq = new->disk->queue;
|
||||
gd->private_data = new;
|
||||
gd->major = tr->major;
|
||||
gd->first_minor = (new->devnum) << tr->part_bits;
|
||||
gd->minors = 1 << tr->part_bits;
|
||||
gd->fops = &mtd_block_ops;
|
||||
|
||||
if (tr->part_bits)
|
||||
@ -398,22 +408,9 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new)
|
||||
spin_lock_init(&new->queue_lock);
|
||||
INIT_LIST_HEAD(&new->rq_list);
|
||||
|
||||
new->tag_set = kzalloc(sizeof(*new->tag_set), GFP_KERNEL);
|
||||
if (!new->tag_set)
|
||||
goto error3;
|
||||
|
||||
new->rq = blk_mq_init_sq_queue(new->tag_set, &mtd_mq_ops, 2,
|
||||
BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING);
|
||||
if (IS_ERR(new->rq)) {
|
||||
ret = PTR_ERR(new->rq);
|
||||
new->rq = NULL;
|
||||
goto error4;
|
||||
}
|
||||
|
||||
if (tr->flush)
|
||||
blk_queue_write_cache(new->rq, true, false);
|
||||
|
||||
new->rq->queuedata = new;
|
||||
blk_queue_logical_block_size(new->rq, tr->blksize);
|
||||
|
||||
blk_queue_flag_set(QUEUE_FLAG_NONROT, new->rq);
|
||||
@ -437,13 +434,13 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new)
|
||||
WARN_ON(ret);
|
||||
}
|
||||
return 0;
|
||||
error4:
|
||||
|
||||
out_free_tag_set:
|
||||
blk_mq_free_tag_set(new->tag_set);
|
||||
out_kfree_tag_set:
|
||||
kfree(new->tag_set);
|
||||
error3:
|
||||
put_disk(new->disk);
|
||||
error2:
|
||||
out_list_del:
|
||||
list_del(&new->list);
|
||||
error1:
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -394,28 +394,6 @@ int ubiblock_create(struct ubi_volume_info *vi)
|
||||
dev->vol_id = vi->vol_id;
|
||||
dev->leb_size = vi->usable_leb_size;
|
||||
|
||||
/* Initialize the gendisk of this ubiblock device */
|
||||
gd = alloc_disk(1);
|
||||
if (!gd) {
|
||||
pr_err("UBI: block: alloc_disk failed\n");
|
||||
ret = -ENODEV;
|
||||
goto out_free_dev;
|
||||
}
|
||||
|
||||
gd->fops = &ubiblock_ops;
|
||||
gd->major = ubiblock_major;
|
||||
gd->first_minor = idr_alloc(&ubiblock_minor_idr, dev, 0, 0, GFP_KERNEL);
|
||||
if (gd->first_minor < 0) {
|
||||
dev_err(disk_to_dev(gd),
|
||||
"block: dynamic minor allocation failed");
|
||||
ret = -ENODEV;
|
||||
goto out_put_disk;
|
||||
}
|
||||
gd->private_data = dev;
|
||||
sprintf(gd->disk_name, "ubiblock%d_%d", dev->ubi_num, dev->vol_id);
|
||||
set_capacity(gd, disk_capacity);
|
||||
dev->gd = gd;
|
||||
|
||||
dev->tag_set.ops = &ubiblock_mq_ops;
|
||||
dev->tag_set.queue_depth = 64;
|
||||
dev->tag_set.numa_node = NUMA_NO_NODE;
|
||||
@ -427,19 +405,34 @@ int ubiblock_create(struct ubi_volume_info *vi)
|
||||
ret = blk_mq_alloc_tag_set(&dev->tag_set);
|
||||
if (ret) {
|
||||
dev_err(disk_to_dev(dev->gd), "blk_mq_alloc_tag_set failed");
|
||||
goto out_remove_minor;
|
||||
goto out_free_dev;;
|
||||
}
|
||||
|
||||
dev->rq = blk_mq_init_queue(&dev->tag_set);
|
||||
if (IS_ERR(dev->rq)) {
|
||||
dev_err(disk_to_dev(gd), "blk_mq_init_queue failed");
|
||||
ret = PTR_ERR(dev->rq);
|
||||
|
||||
/* Initialize the gendisk of this ubiblock device */
|
||||
gd = blk_mq_alloc_disk(&dev->tag_set, dev);
|
||||
if (IS_ERR(gd)) {
|
||||
ret = PTR_ERR(gd);
|
||||
goto out_free_tags;
|
||||
}
|
||||
blk_queue_max_segments(dev->rq, UBI_MAX_SG_COUNT);
|
||||
|
||||
dev->rq->queuedata = dev;
|
||||
dev->gd->queue = dev->rq;
|
||||
gd->fops = &ubiblock_ops;
|
||||
gd->major = ubiblock_major;
|
||||
gd->minors = 1;
|
||||
gd->first_minor = idr_alloc(&ubiblock_minor_idr, dev, 0, 0, GFP_KERNEL);
|
||||
if (gd->first_minor < 0) {
|
||||
dev_err(disk_to_dev(gd),
|
||||
"block: dynamic minor allocation failed");
|
||||
ret = -ENODEV;
|
||||
goto out_cleanup_disk;
|
||||
}
|
||||
gd->private_data = dev;
|
||||
sprintf(gd->disk_name, "ubiblock%d_%d", dev->ubi_num, dev->vol_id);
|
||||
set_capacity(gd, disk_capacity);
|
||||
dev->gd = gd;
|
||||
|
||||
dev->rq = gd->queue;
|
||||
blk_queue_max_segments(dev->rq, UBI_MAX_SG_COUNT);
|
||||
|
||||
/*
|
||||
* Create one workqueue per volume (per registered block device).
|
||||
@ -448,7 +441,7 @@ int ubiblock_create(struct ubi_volume_info *vi)
|
||||
dev->wq = alloc_workqueue("%s", 0, 0, gd->disk_name);
|
||||
if (!dev->wq) {
|
||||
ret = -ENOMEM;
|
||||
goto out_free_queue;
|
||||
goto out_remove_minor;
|
||||
}
|
||||
|
||||
list_add_tail(&dev->list, &ubiblock_devices);
|
||||
@ -460,14 +453,12 @@ int ubiblock_create(struct ubi_volume_info *vi)
|
||||
mutex_unlock(&devices_mutex);
|
||||
return 0;
|
||||
|
||||
out_free_queue:
|
||||
blk_cleanup_queue(dev->rq);
|
||||
out_free_tags:
|
||||
blk_mq_free_tag_set(&dev->tag_set);
|
||||
out_remove_minor:
|
||||
idr_remove(&ubiblock_minor_idr, gd->first_minor);
|
||||
out_put_disk:
|
||||
put_disk(dev->gd);
|
||||
out_cleanup_disk:
|
||||
blk_cleanup_disk(dev->gd);
|
||||
out_free_tags:
|
||||
blk_mq_free_tag_set(&dev->tag_set);
|
||||
out_free_dev:
|
||||
kfree(dev);
|
||||
out_unlock:
|
||||
@ -483,11 +474,10 @@ static void ubiblock_cleanup(struct ubiblock *dev)
|
||||
/* Flush pending work */
|
||||
destroy_workqueue(dev->wq);
|
||||
/* Finally destroy the blk queue */
|
||||
blk_cleanup_queue(dev->rq);
|
||||
blk_mq_free_tag_set(&dev->tag_set);
|
||||
dev_info(disk_to_dev(dev->gd), "released");
|
||||
blk_cleanup_disk(dev->gd);
|
||||
blk_mq_free_tag_set(&dev->tag_set);
|
||||
idr_remove(&ubiblock_minor_idr, dev->gd->first_minor);
|
||||
put_disk(dev->gd);
|
||||
}
|
||||
|
||||
int ubiblock_remove(struct ubi_volume_info *vi)
|
||||
|
@ -228,49 +228,34 @@ static const struct block_device_operations nd_blk_fops = {
|
||||
.submit_bio = nd_blk_submit_bio,
|
||||
};
|
||||
|
||||
static void nd_blk_release_queue(void *q)
|
||||
{
|
||||
blk_cleanup_queue(q);
|
||||
}
|
||||
|
||||
static void nd_blk_release_disk(void *disk)
|
||||
{
|
||||
del_gendisk(disk);
|
||||
put_disk(disk);
|
||||
blk_cleanup_disk(disk);
|
||||
}
|
||||
|
||||
static int nsblk_attach_disk(struct nd_namespace_blk *nsblk)
|
||||
{
|
||||
struct device *dev = &nsblk->common.dev;
|
||||
resource_size_t available_disk_size;
|
||||
struct request_queue *q;
|
||||
struct gendisk *disk;
|
||||
u64 internal_nlba;
|
||||
|
||||
internal_nlba = div_u64(nsblk->size, nsblk_internal_lbasize(nsblk));
|
||||
available_disk_size = internal_nlba * nsblk_sector_size(nsblk);
|
||||
|
||||
q = blk_alloc_queue(NUMA_NO_NODE);
|
||||
if (!q)
|
||||
return -ENOMEM;
|
||||
if (devm_add_action_or_reset(dev, nd_blk_release_queue, q))
|
||||
return -ENOMEM;
|
||||
|
||||
blk_queue_max_hw_sectors(q, UINT_MAX);
|
||||
blk_queue_logical_block_size(q, nsblk_sector_size(nsblk));
|
||||
blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
|
||||
|
||||
disk = alloc_disk(0);
|
||||
disk = blk_alloc_disk(NUMA_NO_NODE);
|
||||
if (!disk)
|
||||
return -ENOMEM;
|
||||
|
||||
disk->first_minor = 0;
|
||||
disk->fops = &nd_blk_fops;
|
||||
disk->queue = q;
|
||||
disk->flags = GENHD_FL_EXT_DEVT;
|
||||
disk->private_data = nsblk;
|
||||
nvdimm_namespace_disk_name(&nsblk->common, disk->disk_name);
|
||||
|
||||
blk_queue_max_hw_sectors(disk->queue, UINT_MAX);
|
||||
blk_queue_logical_block_size(disk->queue, nsblk_sector_size(nsblk));
|
||||
blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue);
|
||||
|
||||
if (devm_add_action_or_reset(dev, nd_blk_release_disk, disk))
|
||||
return -ENOMEM;
|
||||
|
||||
|
@ -1521,35 +1521,25 @@ static int btt_blk_init(struct btt *btt)
|
||||
struct nd_btt *nd_btt = btt->nd_btt;
|
||||
struct nd_namespace_common *ndns = nd_btt->ndns;
|
||||
|
||||
/* create a new disk and request queue for btt */
|
||||
btt->btt_queue = blk_alloc_queue(NUMA_NO_NODE);
|
||||
if (!btt->btt_queue)
|
||||
btt->btt_disk = blk_alloc_disk(NUMA_NO_NODE);
|
||||
if (!btt->btt_disk)
|
||||
return -ENOMEM;
|
||||
|
||||
btt->btt_disk = alloc_disk(0);
|
||||
if (!btt->btt_disk) {
|
||||
blk_cleanup_queue(btt->btt_queue);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
nvdimm_namespace_disk_name(ndns, btt->btt_disk->disk_name);
|
||||
btt->btt_disk->first_minor = 0;
|
||||
btt->btt_disk->fops = &btt_fops;
|
||||
btt->btt_disk->private_data = btt;
|
||||
btt->btt_disk->queue = btt->btt_queue;
|
||||
btt->btt_disk->flags = GENHD_FL_EXT_DEVT;
|
||||
|
||||
blk_queue_logical_block_size(btt->btt_queue, btt->sector_size);
|
||||
blk_queue_max_hw_sectors(btt->btt_queue, UINT_MAX);
|
||||
blk_queue_flag_set(QUEUE_FLAG_NONROT, btt->btt_queue);
|
||||
blk_queue_logical_block_size(btt->btt_disk->queue, btt->sector_size);
|
||||
blk_queue_max_hw_sectors(btt->btt_disk->queue, UINT_MAX);
|
||||
blk_queue_flag_set(QUEUE_FLAG_NONROT, btt->btt_disk->queue);
|
||||
|
||||
if (btt_meta_size(btt)) {
|
||||
int rc = nd_integrity_init(btt->btt_disk, btt_meta_size(btt));
|
||||
|
||||
if (rc) {
|
||||
del_gendisk(btt->btt_disk);
|
||||
put_disk(btt->btt_disk);
|
||||
blk_cleanup_queue(btt->btt_queue);
|
||||
blk_cleanup_disk(btt->btt_disk);
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
@ -1564,8 +1554,7 @@ static int btt_blk_init(struct btt *btt)
|
||||
static void btt_blk_cleanup(struct btt *btt)
|
||||
{
|
||||
del_gendisk(btt->btt_disk);
|
||||
put_disk(btt->btt_disk);
|
||||
blk_cleanup_queue(btt->btt_queue);
|
||||
blk_cleanup_disk(btt->btt_disk);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -201,7 +201,6 @@ struct badblocks;
|
||||
/**
|
||||
* struct btt - handle for a BTT instance
|
||||
* @btt_disk: Pointer to the gendisk for BTT device
|
||||
* @btt_queue: Pointer to the request queue for the BTT device
|
||||
* @arena_list: Head of the list of arenas
|
||||
* @debugfs_dir: Debugfs dentry
|
||||
* @nd_btt: Parent nd_btt struct
|
||||
@ -219,7 +218,6 @@ struct badblocks;
|
||||
*/
|
||||
struct btt {
|
||||
struct gendisk *btt_disk;
|
||||
struct request_queue *btt_queue;
|
||||
struct list_head arena_list;
|
||||
struct dentry *debugfs_dir;
|
||||
struct nd_btt *nd_btt;
|
||||
|
@ -335,10 +335,9 @@ static const struct attribute_group *pmem_attribute_groups[] = {
|
||||
|
||||
static void pmem_pagemap_cleanup(struct dev_pagemap *pgmap)
|
||||
{
|
||||
struct request_queue *q =
|
||||
container_of(pgmap->ref, struct request_queue, q_usage_counter);
|
||||
struct pmem_device *pmem = pgmap->owner;
|
||||
|
||||
blk_cleanup_queue(q);
|
||||
blk_cleanup_disk(pmem->disk);
|
||||
}
|
||||
|
||||
static void pmem_release_queue(void *pgmap)
|
||||
@ -361,7 +360,6 @@ static void pmem_release_disk(void *__pmem)
|
||||
kill_dax(pmem->dax_dev);
|
||||
put_dax(pmem->dax_dev);
|
||||
del_gendisk(pmem->disk);
|
||||
put_disk(pmem->disk);
|
||||
}
|
||||
|
||||
static const struct dev_pagemap_ops fsdax_pagemap_ops = {
|
||||
@ -422,10 +420,13 @@ static int pmem_attach_disk(struct device *dev,
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
q = blk_alloc_queue(dev_to_node(dev));
|
||||
if (!q)
|
||||
disk = blk_alloc_disk(nid);
|
||||
if (!disk)
|
||||
return -ENOMEM;
|
||||
q = disk->queue;
|
||||
|
||||
pmem->disk = disk;
|
||||
pmem->pgmap.owner = pmem;
|
||||
pmem->pfn_flags = PFN_DEV;
|
||||
pmem->pgmap.ref = &q->q_usage_counter;
|
||||
if (is_nd_pfn(dev)) {
|
||||
@ -470,14 +471,7 @@ static int pmem_attach_disk(struct device *dev,
|
||||
if (pmem->pfn_flags & PFN_MAP)
|
||||
blk_queue_flag_set(QUEUE_FLAG_DAX, q);
|
||||
|
||||
disk = alloc_disk_node(0, nid);
|
||||
if (!disk)
|
||||
return -ENOMEM;
|
||||
pmem->disk = disk;
|
||||
|
||||
disk->fops = &pmem_fops;
|
||||
disk->queue = q;
|
||||
disk->flags = GENHD_FL_EXT_DEVT;
|
||||
disk->private_data = pmem;
|
||||
nvdimm_namespace_disk_name(ndns, disk->disk_name);
|
||||
set_capacity(disk, (pmem->size - pmem->pfn_pad - pmem->data_offset)
|
||||
@ -491,7 +485,6 @@ static int pmem_attach_disk(struct device *dev,
|
||||
flags = DAXDEV_F_SYNC;
|
||||
dax_dev = alloc_dax(pmem, disk->disk_name, &pmem_dax_ops, flags);
|
||||
if (IS_ERR(dax_dev)) {
|
||||
put_disk(disk);
|
||||
return PTR_ERR(dax_dev);
|
||||
}
|
||||
dax_write_cache(dax_dev, nvdimm_has_cache(nd_region));
|
||||
|
@ -3701,7 +3701,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid,
|
||||
disk->fops = &nvme_bdev_ops;
|
||||
disk->private_data = ns;
|
||||
disk->queue = ns->queue;
|
||||
disk->flags = GENHD_FL_EXT_DEVT;
|
||||
/*
|
||||
* Without the multipath code enabled, multiple controller per
|
||||
* subsystems are visible as devices and thus we cannot use the
|
||||
|
@ -427,7 +427,6 @@ static void nvme_requeue_work(struct work_struct *work)
|
||||
|
||||
int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head)
|
||||
{
|
||||
struct request_queue *q;
|
||||
bool vwc = false;
|
||||
|
||||
mutex_init(&head->lock);
|
||||
@ -443,34 +442,24 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head)
|
||||
if (!(ctrl->subsys->cmic & NVME_CTRL_CMIC_MULTI_CTRL) || !multipath)
|
||||
return 0;
|
||||
|
||||
q = blk_alloc_queue(ctrl->numa_node);
|
||||
if (!q)
|
||||
goto out;
|
||||
blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
|
||||
/* set to a default value for 512 until disk is validated */
|
||||
blk_queue_logical_block_size(q, 512);
|
||||
blk_set_stacking_limits(&q->limits);
|
||||
head->disk = blk_alloc_disk(ctrl->numa_node);
|
||||
if (!head->disk)
|
||||
return -ENOMEM;
|
||||
head->disk->fops = &nvme_ns_head_ops;
|
||||
head->disk->private_data = head;
|
||||
sprintf(head->disk->disk_name, "nvme%dn%d",
|
||||
ctrl->subsys->instance, head->instance);
|
||||
|
||||
blk_queue_flag_set(QUEUE_FLAG_NONROT, head->disk->queue);
|
||||
/* set to a default value of 512 until the disk is validated */
|
||||
blk_queue_logical_block_size(head->disk->queue, 512);
|
||||
blk_set_stacking_limits(&head->disk->queue->limits);
|
||||
|
||||
/* we need to propagate up the VMC settings */
|
||||
if (ctrl->vwc & NVME_CTRL_VWC_PRESENT)
|
||||
vwc = true;
|
||||
blk_queue_write_cache(q, vwc, vwc);
|
||||
|
||||
head->disk = alloc_disk(0);
|
||||
if (!head->disk)
|
||||
goto out_cleanup_queue;
|
||||
head->disk->fops = &nvme_ns_head_ops;
|
||||
head->disk->private_data = head;
|
||||
head->disk->queue = q;
|
||||
head->disk->flags = GENHD_FL_EXT_DEVT;
|
||||
sprintf(head->disk->disk_name, "nvme%dn%d",
|
||||
ctrl->subsys->instance, head->instance);
|
||||
blk_queue_write_cache(head->disk->queue, vwc, vwc);
|
||||
return 0;
|
||||
|
||||
out_cleanup_queue:
|
||||
blk_cleanup_queue(q);
|
||||
out:
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
static void nvme_mpath_set_live(struct nvme_ns *ns)
|
||||
@ -769,16 +758,7 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head)
|
||||
/* make sure all pending bios are cleaned up */
|
||||
kblockd_schedule_work(&head->requeue_work);
|
||||
flush_work(&head->requeue_work);
|
||||
blk_cleanup_queue(head->disk->queue);
|
||||
if (!test_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) {
|
||||
/*
|
||||
* if device_add_disk wasn't called, prevent
|
||||
* disk release to put a bogus reference on the
|
||||
* request queue
|
||||
*/
|
||||
head->disk->queue = NULL;
|
||||
}
|
||||
put_disk(head->disk);
|
||||
blk_cleanup_disk(head->disk);
|
||||
}
|
||||
|
||||
void nvme_mpath_init_ctrl(struct nvme_ctrl *ctrl)
|
||||
|
@ -109,9 +109,9 @@ int dasd_scan_partitions(struct dasd_block *block)
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
mutex_lock(&bdev->bd_mutex);
|
||||
rc = bdev_disk_changed(bdev, false);
|
||||
mutex_unlock(&bdev->bd_mutex);
|
||||
mutex_lock(&block->gdp->open_mutex);
|
||||
rc = bdev_disk_changed(block->gdp, false);
|
||||
mutex_unlock(&block->gdp->open_mutex);
|
||||
if (rc)
|
||||
DBF_DEV_EVENT(DBF_ERR, block->base,
|
||||
"scan partitions error, rc %d", rc);
|
||||
@ -145,9 +145,9 @@ void dasd_destroy_partitions(struct dasd_block *block)
|
||||
bdev = block->bdev;
|
||||
block->bdev = NULL;
|
||||
|
||||
mutex_lock(&bdev->bd_mutex);
|
||||
bdev_disk_changed(bdev, true);
|
||||
mutex_unlock(&bdev->bd_mutex);
|
||||
mutex_lock(&bdev->bd_disk->open_mutex);
|
||||
bdev_disk_changed(bdev->bd_disk, true);
|
||||
mutex_unlock(&bdev->bd_disk->open_mutex);
|
||||
|
||||
/* Matching blkdev_put to the blkdev_get in dasd_scan_partitions. */
|
||||
blkdev_put(bdev, FMODE_READ);
|
||||
|
@ -90,7 +90,6 @@ struct dcssblk_dev_info {
|
||||
int segment_type;
|
||||
unsigned char save_pending;
|
||||
unsigned char is_shared;
|
||||
struct request_queue *dcssblk_queue;
|
||||
int num_of_segments;
|
||||
struct list_head seg_list;
|
||||
struct dax_device *dax_dev;
|
||||
@ -429,9 +428,7 @@ removeseg:
|
||||
kill_dax(dev_info->dax_dev);
|
||||
put_dax(dev_info->dax_dev);
|
||||
del_gendisk(dev_info->gd);
|
||||
blk_cleanup_queue(dev_info->dcssblk_queue);
|
||||
dev_info->gd->queue = NULL;
|
||||
put_disk(dev_info->gd);
|
||||
blk_cleanup_disk(dev_info->gd);
|
||||
up_write(&dcssblk_devices_sem);
|
||||
|
||||
if (device_remove_file_self(dev, attr)) {
|
||||
@ -644,18 +641,17 @@ dcssblk_add_store(struct device *dev, struct device_attribute *attr, const char
|
||||
dev_info->dev.release = dcssblk_release_segment;
|
||||
dev_info->dev.groups = dcssblk_dev_attr_groups;
|
||||
INIT_LIST_HEAD(&dev_info->lh);
|
||||
dev_info->gd = alloc_disk(DCSSBLK_MINORS_PER_DISK);
|
||||
dev_info->gd = blk_alloc_disk(NUMA_NO_NODE);
|
||||
if (dev_info->gd == NULL) {
|
||||
rc = -ENOMEM;
|
||||
goto seg_list_del;
|
||||
}
|
||||
dev_info->gd->major = dcssblk_major;
|
||||
dev_info->gd->minors = DCSSBLK_MINORS_PER_DISK;
|
||||
dev_info->gd->fops = &dcssblk_devops;
|
||||
dev_info->dcssblk_queue = blk_alloc_queue(NUMA_NO_NODE);
|
||||
dev_info->gd->queue = dev_info->dcssblk_queue;
|
||||
dev_info->gd->private_data = dev_info;
|
||||
blk_queue_logical_block_size(dev_info->dcssblk_queue, 4096);
|
||||
blk_queue_flag_set(QUEUE_FLAG_DAX, dev_info->dcssblk_queue);
|
||||
blk_queue_logical_block_size(dev_info->gd->queue, 4096);
|
||||
blk_queue_flag_set(QUEUE_FLAG_DAX, dev_info->gd->queue);
|
||||
|
||||
seg_byte_size = (dev_info->end - dev_info->start + 1);
|
||||
set_capacity(dev_info->gd, seg_byte_size >> 9); // size in sectors
|
||||
@ -719,9 +715,7 @@ dcssblk_add_store(struct device *dev, struct device_attribute *attr, const char
|
||||
|
||||
put_dev:
|
||||
list_del(&dev_info->lh);
|
||||
blk_cleanup_queue(dev_info->dcssblk_queue);
|
||||
dev_info->gd->queue = NULL;
|
||||
put_disk(dev_info->gd);
|
||||
blk_cleanup_disk(dev_info->gd);
|
||||
list_for_each_entry(seg_info, &dev_info->seg_list, lh) {
|
||||
segment_unload(seg_info->segment_name);
|
||||
}
|
||||
@ -731,9 +725,7 @@ put_dev:
|
||||
dev_list_del:
|
||||
list_del(&dev_info->lh);
|
||||
release_gd:
|
||||
blk_cleanup_queue(dev_info->dcssblk_queue);
|
||||
dev_info->gd->queue = NULL;
|
||||
put_disk(dev_info->gd);
|
||||
blk_cleanup_disk(dev_info->gd);
|
||||
up_write(&dcssblk_devices_sem);
|
||||
seg_list_del:
|
||||
if (dev_info == NULL)
|
||||
@ -801,9 +793,7 @@ dcssblk_remove_store(struct device *dev, struct device_attribute *attr, const ch
|
||||
kill_dax(dev_info->dax_dev);
|
||||
put_dax(dev_info->dax_dev);
|
||||
del_gendisk(dev_info->gd);
|
||||
blk_cleanup_queue(dev_info->dcssblk_queue);
|
||||
dev_info->gd->queue = NULL;
|
||||
put_disk(dev_info->gd);
|
||||
blk_cleanup_disk(dev_info->gd);
|
||||
|
||||
/* unload all related segments */
|
||||
list_for_each_entry(entry, &dev_info->seg_list, lh)
|
||||
|
@ -462,12 +462,12 @@ int scm_blk_dev_setup(struct scm_blk_dev *bdev, struct scm_device *scmdev)
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
rq = blk_mq_init_queue(&bdev->tag_set);
|
||||
if (IS_ERR(rq)) {
|
||||
ret = PTR_ERR(rq);
|
||||
bdev->gendisk = blk_mq_alloc_disk(&bdev->tag_set, scmdev);
|
||||
if (IS_ERR(bdev->gendisk)) {
|
||||
ret = PTR_ERR(bdev->gendisk);
|
||||
goto out_tag;
|
||||
}
|
||||
bdev->rq = rq;
|
||||
rq = bdev->rq = bdev->gendisk->queue;
|
||||
nr_max_blk = min(scmdev->nr_max_block,
|
||||
(unsigned int) (PAGE_SIZE / sizeof(struct aidaw)));
|
||||
|
||||
@ -477,17 +477,11 @@ int scm_blk_dev_setup(struct scm_blk_dev *bdev, struct scm_device *scmdev)
|
||||
blk_queue_flag_set(QUEUE_FLAG_NONROT, rq);
|
||||
blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, rq);
|
||||
|
||||
bdev->gendisk = alloc_disk(SCM_NR_PARTS);
|
||||
if (!bdev->gendisk) {
|
||||
ret = -ENOMEM;
|
||||
goto out_queue;
|
||||
}
|
||||
rq->queuedata = scmdev;
|
||||
bdev->gendisk->private_data = scmdev;
|
||||
bdev->gendisk->fops = &scm_blk_devops;
|
||||
bdev->gendisk->queue = rq;
|
||||
bdev->gendisk->major = scm_major;
|
||||
bdev->gendisk->first_minor = devindex * SCM_NR_PARTS;
|
||||
bdev->gendisk->minors = SCM_NR_PARTS;
|
||||
|
||||
len = snprintf(bdev->gendisk->disk_name, DISK_NAME_LEN, "scm");
|
||||
if (devindex > 25) {
|
||||
@ -504,8 +498,6 @@ int scm_blk_dev_setup(struct scm_blk_dev *bdev, struct scm_device *scmdev)
|
||||
device_add_disk(&scmdev->dev, bdev->gendisk, NULL);
|
||||
return 0;
|
||||
|
||||
out_queue:
|
||||
blk_cleanup_queue(rq);
|
||||
out_tag:
|
||||
blk_mq_free_tag_set(&bdev->tag_set);
|
||||
out:
|
||||
@ -516,9 +508,8 @@ out:
|
||||
void scm_blk_dev_cleanup(struct scm_blk_dev *bdev)
|
||||
{
|
||||
del_gendisk(bdev->gendisk);
|
||||
blk_cleanup_queue(bdev->gendisk->queue);
|
||||
blk_cleanup_disk(bdev->gendisk);
|
||||
blk_mq_free_tag_set(&bdev->tag_set);
|
||||
put_disk(bdev->gendisk);
|
||||
}
|
||||
|
||||
void scm_blk_set_available(struct scm_blk_dev *bdev)
|
||||
|
@ -56,7 +56,6 @@ typedef struct {
|
||||
static xpram_device_t xpram_devices[XPRAM_MAX_DEVS];
|
||||
static unsigned int xpram_sizes[XPRAM_MAX_DEVS];
|
||||
static struct gendisk *xpram_disks[XPRAM_MAX_DEVS];
|
||||
static struct request_queue *xpram_queues[XPRAM_MAX_DEVS];
|
||||
static unsigned int xpram_pages;
|
||||
static int xpram_devs;
|
||||
|
||||
@ -341,17 +340,13 @@ static int __init xpram_setup_blkdev(void)
|
||||
int i, rc = -ENOMEM;
|
||||
|
||||
for (i = 0; i < xpram_devs; i++) {
|
||||
xpram_disks[i] = alloc_disk(1);
|
||||
xpram_disks[i] = blk_alloc_disk(NUMA_NO_NODE);
|
||||
if (!xpram_disks[i])
|
||||
goto out;
|
||||
xpram_queues[i] = blk_alloc_queue(NUMA_NO_NODE);
|
||||
if (!xpram_queues[i]) {
|
||||
put_disk(xpram_disks[i]);
|
||||
goto out;
|
||||
}
|
||||
blk_queue_flag_set(QUEUE_FLAG_NONROT, xpram_queues[i]);
|
||||
blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, xpram_queues[i]);
|
||||
blk_queue_logical_block_size(xpram_queues[i], 4096);
|
||||
blk_queue_flag_set(QUEUE_FLAG_NONROT, xpram_disks[i]->queue);
|
||||
blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM,
|
||||
xpram_disks[i]->queue);
|
||||
blk_queue_logical_block_size(xpram_disks[i]->queue, 4096);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -373,9 +368,9 @@ static int __init xpram_setup_blkdev(void)
|
||||
offset += xpram_devices[i].size;
|
||||
disk->major = XPRAM_MAJOR;
|
||||
disk->first_minor = i;
|
||||
disk->minors = 1;
|
||||
disk->fops = &xpram_devops;
|
||||
disk->private_data = &xpram_devices[i];
|
||||
disk->queue = xpram_queues[i];
|
||||
sprintf(disk->disk_name, "slram%d", i);
|
||||
set_capacity(disk, xpram_sizes[i] << 1);
|
||||
add_disk(disk);
|
||||
@ -383,10 +378,8 @@ static int __init xpram_setup_blkdev(void)
|
||||
|
||||
return 0;
|
||||
out:
|
||||
while (i--) {
|
||||
blk_cleanup_queue(xpram_queues[i]);
|
||||
put_disk(xpram_disks[i]);
|
||||
}
|
||||
while (i--)
|
||||
blk_cleanup_disk(xpram_disks[i]);
|
||||
return rc;
|
||||
}
|
||||
|
||||
@ -434,8 +427,7 @@ static void __exit xpram_exit(void)
|
||||
int i;
|
||||
for (i = 0; i < xpram_devs; i++) {
|
||||
del_gendisk(xpram_disks[i]);
|
||||
blk_cleanup_queue(xpram_queues[i]);
|
||||
put_disk(xpram_disks[i]);
|
||||
blk_cleanup_disk(xpram_disks[i]);
|
||||
}
|
||||
unregister_blkdev(XPRAM_MAJOR, XPRAM_NAME);
|
||||
platform_device_unregister(xpram_pdev);
|
||||
|
@ -1416,7 +1416,7 @@ static bool sd_need_revalidate(struct block_device *bdev,
|
||||
* In the latter case @inode and @filp carry an abridged amount
|
||||
* of information as noted above.
|
||||
*
|
||||
* Locking: called with bdev->bd_mutex held.
|
||||
* Locking: called with bdev->bd_disk->open_mutex held.
|
||||
**/
|
||||
static int sd_open(struct block_device *bdev, fmode_t mode)
|
||||
{
|
||||
@ -1490,7 +1490,7 @@ error_out:
|
||||
* Note: may block (uninterruptible) if error recovery is underway
|
||||
* on this disk.
|
||||
*
|
||||
* Locking: called with bdev->bd_mutex held.
|
||||
* Locking: called with bdev->bd_disk->open_mutex held.
|
||||
**/
|
||||
static void sd_release(struct gendisk *disk, fmode_t mode)
|
||||
{
|
||||
|
252
fs/block_dev.c
252
fs/block_dev.c
@ -895,7 +895,6 @@ struct block_device *bdev_alloc(struct gendisk *disk, u8 partno)
|
||||
mapping_set_gfp_mask(&inode->i_data, GFP_USER);
|
||||
|
||||
bdev = I_BDEV(inode);
|
||||
mutex_init(&bdev->bd_mutex);
|
||||
mutex_init(&bdev->bd_fsfreeze_mutex);
|
||||
spin_lock_init(&bdev->bd_size_lock);
|
||||
bdev->bd_disk = disk;
|
||||
@ -1154,7 +1153,7 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
|
||||
struct bd_holder_disk *holder;
|
||||
int ret = 0;
|
||||
|
||||
mutex_lock(&bdev->bd_mutex);
|
||||
mutex_lock(&bdev->bd_disk->open_mutex);
|
||||
|
||||
WARN_ON_ONCE(!bdev->bd_holder);
|
||||
|
||||
@ -1199,7 +1198,7 @@ out_del:
|
||||
out_free:
|
||||
kfree(holder);
|
||||
out_unlock:
|
||||
mutex_unlock(&bdev->bd_mutex);
|
||||
mutex_unlock(&bdev->bd_disk->open_mutex);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bd_link_disk_holder);
|
||||
@ -1218,7 +1217,7 @@ void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk)
|
||||
{
|
||||
struct bd_holder_disk *holder;
|
||||
|
||||
mutex_lock(&bdev->bd_mutex);
|
||||
mutex_lock(&bdev->bd_disk->open_mutex);
|
||||
|
||||
holder = bd_find_holder_disk(bdev, disk);
|
||||
|
||||
@ -1230,138 +1229,97 @@ void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk)
|
||||
kfree(holder);
|
||||
}
|
||||
|
||||
mutex_unlock(&bdev->bd_mutex);
|
||||
mutex_unlock(&bdev->bd_disk->open_mutex);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bd_unlink_disk_holder);
|
||||
#endif
|
||||
|
||||
static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part);
|
||||
|
||||
int bdev_disk_changed(struct block_device *bdev, bool invalidate)
|
||||
static void blkdev_flush_mapping(struct block_device *bdev)
|
||||
{
|
||||
struct gendisk *disk = bdev->bd_disk;
|
||||
int ret = 0;
|
||||
|
||||
lockdep_assert_held(&bdev->bd_mutex);
|
||||
|
||||
if (!(disk->flags & GENHD_FL_UP))
|
||||
return -ENXIO;
|
||||
|
||||
rescan:
|
||||
if (bdev->bd_part_count)
|
||||
return -EBUSY;
|
||||
WARN_ON_ONCE(bdev->bd_holders);
|
||||
sync_blockdev(bdev);
|
||||
invalidate_bdev(bdev);
|
||||
blk_drop_partitions(disk);
|
||||
|
||||
clear_bit(GD_NEED_PART_SCAN, &disk->state);
|
||||
|
||||
/*
|
||||
* Historically we only set the capacity to zero for devices that
|
||||
* support partitions (independ of actually having partitions created).
|
||||
* Doing that is rather inconsistent, but changing it broke legacy
|
||||
* udisks polling for legacy ide-cdrom devices. Use the crude check
|
||||
* below to get the sane behavior for most device while not breaking
|
||||
* userspace for this particular setup.
|
||||
*/
|
||||
if (invalidate) {
|
||||
if (disk_part_scan_enabled(disk) ||
|
||||
!(disk->flags & GENHD_FL_REMOVABLE))
|
||||
set_capacity(disk, 0);
|
||||
}
|
||||
|
||||
if (get_capacity(disk)) {
|
||||
ret = blk_add_partitions(disk, bdev);
|
||||
if (ret == -EAGAIN)
|
||||
goto rescan;
|
||||
} else if (invalidate) {
|
||||
/*
|
||||
* Tell userspace that the media / partition table may have
|
||||
* changed.
|
||||
*/
|
||||
kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE);
|
||||
}
|
||||
|
||||
return ret;
|
||||
kill_bdev(bdev);
|
||||
bdev_write_inode(bdev);
|
||||
}
|
||||
/*
|
||||
* Only exported for loop and dasd for historic reasons. Don't use in new
|
||||
* code!
|
||||
*/
|
||||
EXPORT_SYMBOL_GPL(bdev_disk_changed);
|
||||
|
||||
/*
|
||||
* bd_mutex locking:
|
||||
*
|
||||
* mutex_lock(part->bd_mutex)
|
||||
* mutex_lock_nested(whole->bd_mutex, 1)
|
||||
*/
|
||||
static int __blkdev_get(struct block_device *bdev, fmode_t mode)
|
||||
static int blkdev_get_whole(struct block_device *bdev, fmode_t mode)
|
||||
{
|
||||
struct gendisk *disk = bdev->bd_disk;
|
||||
int ret = 0;
|
||||
|
||||
if (!(disk->flags & GENHD_FL_UP))
|
||||
return -ENXIO;
|
||||
if (disk->fops->open) {
|
||||
ret = disk->fops->open(bdev, mode);
|
||||
if (ret) {
|
||||
/* avoid ghost partitions on a removed medium */
|
||||
if (ret == -ENOMEDIUM &&
|
||||
test_bit(GD_NEED_PART_SCAN, &disk->state))
|
||||
bdev_disk_changed(disk, true);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
if (!bdev->bd_openers) {
|
||||
if (!bdev_is_partition(bdev)) {
|
||||
ret = 0;
|
||||
if (disk->fops->open)
|
||||
ret = disk->fops->open(bdev, mode);
|
||||
|
||||
if (!ret)
|
||||
set_init_blocksize(bdev);
|
||||
|
||||
/*
|
||||
* If the device is invalidated, rescan partition
|
||||
* if open succeeded or failed with -ENOMEDIUM.
|
||||
* The latter is necessary to prevent ghost
|
||||
* partitions on a removed medium.
|
||||
*/
|
||||
if (test_bit(GD_NEED_PART_SCAN, &disk->state) &&
|
||||
(!ret || ret == -ENOMEDIUM))
|
||||
bdev_disk_changed(bdev, ret == -ENOMEDIUM);
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
} else {
|
||||
struct block_device *whole = bdgrab(disk->part0);
|
||||
|
||||
mutex_lock_nested(&whole->bd_mutex, 1);
|
||||
ret = __blkdev_get(whole, mode);
|
||||
if (ret) {
|
||||
mutex_unlock(&whole->bd_mutex);
|
||||
bdput(whole);
|
||||
return ret;
|
||||
}
|
||||
whole->bd_part_count++;
|
||||
mutex_unlock(&whole->bd_mutex);
|
||||
|
||||
if (!bdev_nr_sectors(bdev)) {
|
||||
__blkdev_put(whole, mode, 1);
|
||||
bdput(whole);
|
||||
return -ENXIO;
|
||||
}
|
||||
set_init_blocksize(bdev);
|
||||
}
|
||||
|
||||
set_init_blocksize(bdev);
|
||||
if (bdev->bd_bdi == &noop_backing_dev_info)
|
||||
bdev->bd_bdi = bdi_get(disk->queue->backing_dev_info);
|
||||
} else {
|
||||
if (!bdev_is_partition(bdev)) {
|
||||
if (bdev->bd_disk->fops->open)
|
||||
ret = bdev->bd_disk->fops->open(bdev, mode);
|
||||
/* the same as first opener case, read comment there */
|
||||
if (test_bit(GD_NEED_PART_SCAN, &disk->state) &&
|
||||
(!ret || ret == -ENOMEDIUM))
|
||||
bdev_disk_changed(bdev, ret == -ENOMEDIUM);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
if (test_bit(GD_NEED_PART_SCAN, &disk->state))
|
||||
bdev_disk_changed(disk, false);
|
||||
bdev->bd_openers++;
|
||||
return 0;;
|
||||
}
|
||||
|
||||
static void blkdev_put_whole(struct block_device *bdev, fmode_t mode)
|
||||
{
|
||||
if (!--bdev->bd_openers)
|
||||
blkdev_flush_mapping(bdev);
|
||||
if (bdev->bd_disk->fops->release)
|
||||
bdev->bd_disk->fops->release(bdev->bd_disk, mode);
|
||||
}
|
||||
|
||||
static int blkdev_get_part(struct block_device *part, fmode_t mode)
|
||||
{
|
||||
struct gendisk *disk = part->bd_disk;
|
||||
struct block_device *whole;
|
||||
int ret;
|
||||
|
||||
if (part->bd_openers)
|
||||
goto done;
|
||||
|
||||
whole = bdgrab(disk->part0);
|
||||
ret = blkdev_get_whole(whole, mode);
|
||||
if (ret)
|
||||
goto out_put_whole;
|
||||
|
||||
ret = -ENXIO;
|
||||
if (!bdev_nr_sectors(part))
|
||||
goto out_blkdev_put;
|
||||
|
||||
disk->open_partitions++;
|
||||
set_init_blocksize(part);
|
||||
if (part->bd_bdi == &noop_backing_dev_info)
|
||||
part->bd_bdi = bdi_get(disk->queue->backing_dev_info);
|
||||
done:
|
||||
part->bd_openers++;
|
||||
return 0;
|
||||
|
||||
out_blkdev_put:
|
||||
blkdev_put_whole(whole, mode);
|
||||
out_put_whole:
|
||||
bdput(whole);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void blkdev_put_part(struct block_device *part, fmode_t mode)
|
||||
{
|
||||
struct block_device *whole = bdev_whole(part);
|
||||
|
||||
if (--part->bd_openers)
|
||||
return;
|
||||
blkdev_flush_mapping(part);
|
||||
whole->bd_disk->open_partitions--;
|
||||
blkdev_put_whole(whole, mode);
|
||||
bdput(whole);
|
||||
}
|
||||
|
||||
struct block_device *blkdev_get_no_open(dev_t dev)
|
||||
@ -1447,8 +1405,14 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
|
||||
|
||||
disk_block_events(disk);
|
||||
|
||||
mutex_lock(&bdev->bd_mutex);
|
||||
ret =__blkdev_get(bdev, mode);
|
||||
mutex_lock(&disk->open_mutex);
|
||||
ret = -ENXIO;
|
||||
if (!(disk->flags & GENHD_FL_UP))
|
||||
goto abort_claiming;
|
||||
if (bdev_is_partition(bdev))
|
||||
ret = blkdev_get_part(bdev, mode);
|
||||
else
|
||||
ret = blkdev_get_whole(bdev, mode);
|
||||
if (ret)
|
||||
goto abort_claiming;
|
||||
if (mode & FMODE_EXCL) {
|
||||
@ -1467,7 +1431,7 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
|
||||
unblock_events = false;
|
||||
}
|
||||
}
|
||||
mutex_unlock(&bdev->bd_mutex);
|
||||
mutex_unlock(&disk->open_mutex);
|
||||
|
||||
if (unblock_events)
|
||||
disk_unblock_events(disk);
|
||||
@ -1476,7 +1440,7 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
|
||||
abort_claiming:
|
||||
if (mode & FMODE_EXCL)
|
||||
bd_abort_claiming(bdev, holder);
|
||||
mutex_unlock(&bdev->bd_mutex);
|
||||
mutex_unlock(&disk->open_mutex);
|
||||
disk_unblock_events(disk);
|
||||
put_blkdev:
|
||||
blkdev_put_no_open(bdev);
|
||||
@ -1551,10 +1515,9 @@ static int blkdev_open(struct inode * inode, struct file * filp)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
|
||||
void blkdev_put(struct block_device *bdev, fmode_t mode)
|
||||
{
|
||||
struct gendisk *disk = bdev->bd_disk;
|
||||
struct block_device *victim = NULL;
|
||||
|
||||
/*
|
||||
* Sync early if it looks like we're the last one. If someone else
|
||||
@ -1566,41 +1529,14 @@ static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
|
||||
if (bdev->bd_openers == 1)
|
||||
sync_blockdev(bdev);
|
||||
|
||||
mutex_lock_nested(&bdev->bd_mutex, for_part);
|
||||
if (for_part)
|
||||
bdev->bd_part_count--;
|
||||
|
||||
if (!--bdev->bd_openers) {
|
||||
WARN_ON_ONCE(bdev->bd_holders);
|
||||
sync_blockdev(bdev);
|
||||
kill_bdev(bdev);
|
||||
bdev_write_inode(bdev);
|
||||
if (bdev_is_partition(bdev))
|
||||
victim = bdev_whole(bdev);
|
||||
}
|
||||
|
||||
if (!bdev_is_partition(bdev) && disk->fops->release)
|
||||
disk->fops->release(disk, mode);
|
||||
mutex_unlock(&bdev->bd_mutex);
|
||||
if (victim) {
|
||||
__blkdev_put(victim, mode, 1);
|
||||
bdput(victim);
|
||||
}
|
||||
}
|
||||
|
||||
void blkdev_put(struct block_device *bdev, fmode_t mode)
|
||||
{
|
||||
struct gendisk *disk = bdev->bd_disk;
|
||||
|
||||
mutex_lock(&bdev->bd_mutex);
|
||||
|
||||
mutex_lock(&disk->open_mutex);
|
||||
if (mode & FMODE_EXCL) {
|
||||
struct block_device *whole = bdev_whole(bdev);
|
||||
bool bdev_free;
|
||||
|
||||
/*
|
||||
* Release a claim on the device. The holder fields
|
||||
* are protected with bdev_lock. bd_mutex is to
|
||||
* are protected with bdev_lock. open_mutex is to
|
||||
* synchronize disk_holder unlinking.
|
||||
*/
|
||||
spin_lock(&bdev_lock);
|
||||
@ -1631,9 +1567,13 @@ void blkdev_put(struct block_device *bdev, fmode_t mode)
|
||||
* from userland - e.g. eject(1).
|
||||
*/
|
||||
disk_flush_events(disk, DISK_EVENT_MEDIA_CHANGE);
|
||||
mutex_unlock(&bdev->bd_mutex);
|
||||
|
||||
__blkdev_put(bdev, mode, 0);
|
||||
if (bdev_is_partition(bdev))
|
||||
blkdev_put_part(bdev, mode);
|
||||
else
|
||||
blkdev_put_whole(bdev, mode);
|
||||
mutex_unlock(&disk->open_mutex);
|
||||
|
||||
blkdev_put_no_open(bdev);
|
||||
}
|
||||
EXPORT_SYMBOL(blkdev_put);
|
||||
@ -1941,10 +1881,10 @@ void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg)
|
||||
old_inode = inode;
|
||||
bdev = I_BDEV(inode);
|
||||
|
||||
mutex_lock(&bdev->bd_mutex);
|
||||
mutex_lock(&bdev->bd_disk->open_mutex);
|
||||
if (bdev->bd_openers)
|
||||
func(bdev, arg);
|
||||
mutex_unlock(&bdev->bd_mutex);
|
||||
mutex_unlock(&bdev->bd_disk->open_mutex);
|
||||
|
||||
spin_lock(&blockdev_superblock->s_inode_list_lock);
|
||||
}
|
||||
|
@ -1247,7 +1247,7 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
|
||||
lockdep_assert_held(&uuid_mutex);
|
||||
/*
|
||||
* The device_list_mutex cannot be taken here in case opening the
|
||||
* underlying device takes further locks like bd_mutex.
|
||||
* underlying device takes further locks like open_mutex.
|
||||
*
|
||||
* We also don't need the lock here as this is called during mount and
|
||||
* exclusion is provided by uuid_mutex
|
||||
|
@ -2343,28 +2343,6 @@ int dirtytime_interval_handler(struct ctl_table *table, int write,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static noinline void block_dump___mark_inode_dirty(struct inode *inode)
|
||||
{
|
||||
if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) {
|
||||
struct dentry *dentry;
|
||||
const char *name = "?";
|
||||
|
||||
dentry = d_find_alias(inode);
|
||||
if (dentry) {
|
||||
spin_lock(&dentry->d_lock);
|
||||
name = (const char *) dentry->d_name.name;
|
||||
}
|
||||
printk(KERN_DEBUG
|
||||
"%s(%d): dirtied inode %lu (%s) on %s\n",
|
||||
current->comm, task_pid_nr(current), inode->i_ino,
|
||||
name, inode->i_sb->s_id);
|
||||
if (dentry) {
|
||||
spin_unlock(&dentry->d_lock);
|
||||
dput(dentry);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* __mark_inode_dirty - internal function to mark an inode dirty
|
||||
*
|
||||
@ -2434,9 +2412,6 @@ void __mark_inode_dirty(struct inode *inode, int flags)
|
||||
(dirtytime && (inode->i_state & I_DIRTY_INODE)))
|
||||
return;
|
||||
|
||||
if (unlikely(block_dump))
|
||||
block_dump___mark_inode_dirty(inode);
|
||||
|
||||
spin_lock(&inode->i_lock);
|
||||
if (dirtytime && (inode->i_state & I_DIRTY_INODE))
|
||||
goto out_unlock_inode;
|
||||
|
@ -1277,9 +1277,9 @@ int get_tree_bdev(struct fs_context *fc,
|
||||
}
|
||||
|
||||
/*
|
||||
* s_umount nests inside bd_mutex during
|
||||
* s_umount nests inside open_mutex during
|
||||
* __invalidate_device(). blkdev_put() acquires
|
||||
* bd_mutex and can't be called under s_umount. Drop
|
||||
* open_mutex and can't be called under s_umount. Drop
|
||||
* s_umount temporarily. This is safe as we're
|
||||
* holding an active reference.
|
||||
*/
|
||||
@ -1352,9 +1352,9 @@ struct dentry *mount_bdev(struct file_system_type *fs_type,
|
||||
}
|
||||
|
||||
/*
|
||||
* s_umount nests inside bd_mutex during
|
||||
* s_umount nests inside open_mutex during
|
||||
* __invalidate_device(). blkdev_put() acquires
|
||||
* bd_mutex and can't be called under s_umount. Drop
|
||||
* open_mutex and can't be called under s_umount. Drop
|
||||
* s_umount temporarily. This is safe as we're
|
||||
* holding an active reference.
|
||||
*/
|
||||
|
@ -44,9 +44,6 @@ static inline unsigned int bio_max_segs(unsigned int nr_segs)
|
||||
#define bio_offset(bio) bio_iter_offset((bio), (bio)->bi_iter)
|
||||
#define bio_iovec(bio) bio_iter_iovec((bio), (bio)->bi_iter)
|
||||
|
||||
#define bio_multiple_segments(bio) \
|
||||
((bio)->bi_iter.bi_size != bio_iovec(bio).bv_len)
|
||||
|
||||
#define bvec_iter_sectors(iter) ((iter).bi_size >> 9)
|
||||
#define bvec_iter_end_sector(iter) ((iter).bi_sector + bvec_iter_sectors((iter)))
|
||||
|
||||
@ -271,7 +268,7 @@ static inline void bio_clear_flag(struct bio *bio, unsigned int bit)
|
||||
|
||||
static inline void bio_get_first_bvec(struct bio *bio, struct bio_vec *bv)
|
||||
{
|
||||
*bv = bio_iovec(bio);
|
||||
*bv = mp_bvec_iter_bvec(bio->bi_io_vec, bio->bi_iter);
|
||||
}
|
||||
|
||||
static inline void bio_get_last_bvec(struct bio *bio, struct bio_vec *bv)
|
||||
@ -279,10 +276,9 @@ static inline void bio_get_last_bvec(struct bio *bio, struct bio_vec *bv)
|
||||
struct bvec_iter iter = bio->bi_iter;
|
||||
int idx;
|
||||
|
||||
if (unlikely(!bio_multiple_segments(bio))) {
|
||||
*bv = bio_iovec(bio);
|
||||
return;
|
||||
}
|
||||
bio_get_first_bvec(bio, bv);
|
||||
if (bv->bv_len == bio->bi_iter.bi_size)
|
||||
return; /* this bio only has a single bvec */
|
||||
|
||||
bio_advance_iter(bio, &iter, iter.bi_size);
|
||||
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user