loop: scale loop device by introducing per device lock
Currently, loop device has only one global lock: loop_ctl_mutex. This becomes hot in scenarios where many loop devices are used. Scale it by introducing per-device lock: lo_mutex that protects modifications of all fields in struct loop_device. Keep loop_ctl_mutex to protect global data: loop_index_idr, loop_lookup, loop_add. The new lock ordering requirement is that loop_ctl_mutex must be taken before lo_mutex. Signed-off-by: Pavel Tatashin <pasha.tatashin@soleen.com> Reviewed-by: Tyler Hicks <tyhicks@linux.microsoft.com> Reviewed-by: Petr Vorel <pvorel@suse.cz> Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
parent
767630c63b
commit
6cc8e74308
2 changed files with 54 additions and 40 deletions
|
@ -704,7 +704,7 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
|
||||||
int error;
|
int error;
|
||||||
bool partscan;
|
bool partscan;
|
||||||
|
|
||||||
error = mutex_lock_killable(&loop_ctl_mutex);
|
error = mutex_lock_killable(&lo->lo_mutex);
|
||||||
if (error)
|
if (error)
|
||||||
return error;
|
return error;
|
||||||
error = -ENXIO;
|
error = -ENXIO;
|
||||||
|
@ -743,9 +743,9 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
|
||||||
loop_update_dio(lo);
|
loop_update_dio(lo);
|
||||||
blk_mq_unfreeze_queue(lo->lo_queue);
|
blk_mq_unfreeze_queue(lo->lo_queue);
|
||||||
partscan = lo->lo_flags & LO_FLAGS_PARTSCAN;
|
partscan = lo->lo_flags & LO_FLAGS_PARTSCAN;
|
||||||
mutex_unlock(&loop_ctl_mutex);
|
mutex_unlock(&lo->lo_mutex);
|
||||||
/*
|
/*
|
||||||
* We must drop file reference outside of loop_ctl_mutex as dropping
|
* We must drop file reference outside of lo_mutex as dropping
|
||||||
* the file ref can take bd_mutex which creates circular locking
|
* the file ref can take bd_mutex which creates circular locking
|
||||||
* dependency.
|
* dependency.
|
||||||
*/
|
*/
|
||||||
|
@ -755,7 +755,7 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
out_err:
|
out_err:
|
||||||
mutex_unlock(&loop_ctl_mutex);
|
mutex_unlock(&lo->lo_mutex);
|
||||||
if (file)
|
if (file)
|
||||||
fput(file);
|
fput(file);
|
||||||
return error;
|
return error;
|
||||||
|
@ -1092,7 +1092,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
|
||||||
goto out_putf;
|
goto out_putf;
|
||||||
}
|
}
|
||||||
|
|
||||||
error = mutex_lock_killable(&loop_ctl_mutex);
|
error = mutex_lock_killable(&lo->lo_mutex);
|
||||||
if (error)
|
if (error)
|
||||||
goto out_bdev;
|
goto out_bdev;
|
||||||
|
|
||||||
|
@ -1171,7 +1171,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
|
||||||
* put /dev/loopXX inode. Later in __loop_clr_fd() we bdput(bdev).
|
* put /dev/loopXX inode. Later in __loop_clr_fd() we bdput(bdev).
|
||||||
*/
|
*/
|
||||||
bdgrab(bdev);
|
bdgrab(bdev);
|
||||||
mutex_unlock(&loop_ctl_mutex);
|
mutex_unlock(&lo->lo_mutex);
|
||||||
if (partscan)
|
if (partscan)
|
||||||
loop_reread_partitions(lo, bdev);
|
loop_reread_partitions(lo, bdev);
|
||||||
if (!(mode & FMODE_EXCL))
|
if (!(mode & FMODE_EXCL))
|
||||||
|
@ -1179,7 +1179,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
out_unlock:
|
out_unlock:
|
||||||
mutex_unlock(&loop_ctl_mutex);
|
mutex_unlock(&lo->lo_mutex);
|
||||||
out_bdev:
|
out_bdev:
|
||||||
if (!(mode & FMODE_EXCL))
|
if (!(mode & FMODE_EXCL))
|
||||||
bd_abort_claiming(bdev, loop_configure);
|
bd_abort_claiming(bdev, loop_configure);
|
||||||
|
@ -1200,7 +1200,7 @@ static int __loop_clr_fd(struct loop_device *lo, bool release)
|
||||||
bool partscan = false;
|
bool partscan = false;
|
||||||
int lo_number;
|
int lo_number;
|
||||||
|
|
||||||
mutex_lock(&loop_ctl_mutex);
|
mutex_lock(&lo->lo_mutex);
|
||||||
if (WARN_ON_ONCE(lo->lo_state != Lo_rundown)) {
|
if (WARN_ON_ONCE(lo->lo_state != Lo_rundown)) {
|
||||||
err = -ENXIO;
|
err = -ENXIO;
|
||||||
goto out_unlock;
|
goto out_unlock;
|
||||||
|
@ -1253,7 +1253,7 @@ static int __loop_clr_fd(struct loop_device *lo, bool release)
|
||||||
lo_number = lo->lo_number;
|
lo_number = lo->lo_number;
|
||||||
loop_unprepare_queue(lo);
|
loop_unprepare_queue(lo);
|
||||||
out_unlock:
|
out_unlock:
|
||||||
mutex_unlock(&loop_ctl_mutex);
|
mutex_unlock(&lo->lo_mutex);
|
||||||
if (partscan) {
|
if (partscan) {
|
||||||
/*
|
/*
|
||||||
* bd_mutex has been held already in release path, so don't
|
* bd_mutex has been held already in release path, so don't
|
||||||
|
@ -1284,18 +1284,17 @@ out_unlock:
|
||||||
* protects us from all the other places trying to change the 'lo'
|
* protects us from all the other places trying to change the 'lo'
|
||||||
* device.
|
* device.
|
||||||
*/
|
*/
|
||||||
mutex_lock(&loop_ctl_mutex);
|
mutex_lock(&lo->lo_mutex);
|
||||||
lo->lo_flags = 0;
|
lo->lo_flags = 0;
|
||||||
if (!part_shift)
|
if (!part_shift)
|
||||||
lo->lo_disk->flags |= GENHD_FL_NO_PART_SCAN;
|
lo->lo_disk->flags |= GENHD_FL_NO_PART_SCAN;
|
||||||
lo->lo_state = Lo_unbound;
|
lo->lo_state = Lo_unbound;
|
||||||
mutex_unlock(&loop_ctl_mutex);
|
mutex_unlock(&lo->lo_mutex);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Need not hold loop_ctl_mutex to fput backing file.
|
* Need not hold lo_mutex to fput backing file. Calling fput holding
|
||||||
* Calling fput holding loop_ctl_mutex triggers a circular
|
* lo_mutex triggers a circular lock dependency possibility warning as
|
||||||
* lock dependency possibility warning as fput can take
|
* fput can take bd_mutex which is usually taken before lo_mutex.
|
||||||
* bd_mutex which is usually taken before loop_ctl_mutex.
|
|
||||||
*/
|
*/
|
||||||
if (filp)
|
if (filp)
|
||||||
fput(filp);
|
fput(filp);
|
||||||
|
@ -1306,11 +1305,11 @@ static int loop_clr_fd(struct loop_device *lo)
|
||||||
{
|
{
|
||||||
int err;
|
int err;
|
||||||
|
|
||||||
err = mutex_lock_killable(&loop_ctl_mutex);
|
err = mutex_lock_killable(&lo->lo_mutex);
|
||||||
if (err)
|
if (err)
|
||||||
return err;
|
return err;
|
||||||
if (lo->lo_state != Lo_bound) {
|
if (lo->lo_state != Lo_bound) {
|
||||||
mutex_unlock(&loop_ctl_mutex);
|
mutex_unlock(&lo->lo_mutex);
|
||||||
return -ENXIO;
|
return -ENXIO;
|
||||||
}
|
}
|
||||||
/*
|
/*
|
||||||
|
@ -1325,11 +1324,11 @@ static int loop_clr_fd(struct loop_device *lo)
|
||||||
*/
|
*/
|
||||||
if (atomic_read(&lo->lo_refcnt) > 1) {
|
if (atomic_read(&lo->lo_refcnt) > 1) {
|
||||||
lo->lo_flags |= LO_FLAGS_AUTOCLEAR;
|
lo->lo_flags |= LO_FLAGS_AUTOCLEAR;
|
||||||
mutex_unlock(&loop_ctl_mutex);
|
mutex_unlock(&lo->lo_mutex);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
lo->lo_state = Lo_rundown;
|
lo->lo_state = Lo_rundown;
|
||||||
mutex_unlock(&loop_ctl_mutex);
|
mutex_unlock(&lo->lo_mutex);
|
||||||
|
|
||||||
return __loop_clr_fd(lo, false);
|
return __loop_clr_fd(lo, false);
|
||||||
}
|
}
|
||||||
|
@ -1344,7 +1343,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
|
||||||
bool partscan = false;
|
bool partscan = false;
|
||||||
bool size_changed = false;
|
bool size_changed = false;
|
||||||
|
|
||||||
err = mutex_lock_killable(&loop_ctl_mutex);
|
err = mutex_lock_killable(&lo->lo_mutex);
|
||||||
if (err)
|
if (err)
|
||||||
return err;
|
return err;
|
||||||
if (lo->lo_encrypt_key_size &&
|
if (lo->lo_encrypt_key_size &&
|
||||||
|
@ -1411,7 +1410,7 @@ out_unfreeze:
|
||||||
partscan = true;
|
partscan = true;
|
||||||
}
|
}
|
||||||
out_unlock:
|
out_unlock:
|
||||||
mutex_unlock(&loop_ctl_mutex);
|
mutex_unlock(&lo->lo_mutex);
|
||||||
if (partscan)
|
if (partscan)
|
||||||
loop_reread_partitions(lo, bdev);
|
loop_reread_partitions(lo, bdev);
|
||||||
|
|
||||||
|
@ -1425,11 +1424,11 @@ loop_get_status(struct loop_device *lo, struct loop_info64 *info)
|
||||||
struct kstat stat;
|
struct kstat stat;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
ret = mutex_lock_killable(&loop_ctl_mutex);
|
ret = mutex_lock_killable(&lo->lo_mutex);
|
||||||
if (ret)
|
if (ret)
|
||||||
return ret;
|
return ret;
|
||||||
if (lo->lo_state != Lo_bound) {
|
if (lo->lo_state != Lo_bound) {
|
||||||
mutex_unlock(&loop_ctl_mutex);
|
mutex_unlock(&lo->lo_mutex);
|
||||||
return -ENXIO;
|
return -ENXIO;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1448,10 +1447,10 @@ loop_get_status(struct loop_device *lo, struct loop_info64 *info)
|
||||||
lo->lo_encrypt_key_size);
|
lo->lo_encrypt_key_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Drop loop_ctl_mutex while we call into the filesystem. */
|
/* Drop lo_mutex while we call into the filesystem. */
|
||||||
path = lo->lo_backing_file->f_path;
|
path = lo->lo_backing_file->f_path;
|
||||||
path_get(&path);
|
path_get(&path);
|
||||||
mutex_unlock(&loop_ctl_mutex);
|
mutex_unlock(&lo->lo_mutex);
|
||||||
ret = vfs_getattr(&path, &stat, STATX_INO, AT_STATX_SYNC_AS_STAT);
|
ret = vfs_getattr(&path, &stat, STATX_INO, AT_STATX_SYNC_AS_STAT);
|
||||||
if (!ret) {
|
if (!ret) {
|
||||||
info->lo_device = huge_encode_dev(stat.dev);
|
info->lo_device = huge_encode_dev(stat.dev);
|
||||||
|
@ -1637,7 +1636,7 @@ static int lo_simple_ioctl(struct loop_device *lo, unsigned int cmd,
|
||||||
{
|
{
|
||||||
int err;
|
int err;
|
||||||
|
|
||||||
err = mutex_lock_killable(&loop_ctl_mutex);
|
err = mutex_lock_killable(&lo->lo_mutex);
|
||||||
if (err)
|
if (err)
|
||||||
return err;
|
return err;
|
||||||
switch (cmd) {
|
switch (cmd) {
|
||||||
|
@ -1653,7 +1652,7 @@ static int lo_simple_ioctl(struct loop_device *lo, unsigned int cmd,
|
||||||
default:
|
default:
|
||||||
err = lo->ioctl ? lo->ioctl(lo, cmd, arg) : -EINVAL;
|
err = lo->ioctl ? lo->ioctl(lo, cmd, arg) : -EINVAL;
|
||||||
}
|
}
|
||||||
mutex_unlock(&loop_ctl_mutex);
|
mutex_unlock(&lo->lo_mutex);
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1879,27 +1878,33 @@ static int lo_open(struct block_device *bdev, fmode_t mode)
|
||||||
struct loop_device *lo;
|
struct loop_device *lo;
|
||||||
int err;
|
int err;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* take loop_ctl_mutex to protect lo pointer from race with
|
||||||
|
* loop_control_ioctl(LOOP_CTL_REMOVE), however, to reduce contention
|
||||||
|
* release it prior to updating lo->lo_refcnt.
|
||||||
|
*/
|
||||||
err = mutex_lock_killable(&loop_ctl_mutex);
|
err = mutex_lock_killable(&loop_ctl_mutex);
|
||||||
if (err)
|
if (err)
|
||||||
return err;
|
return err;
|
||||||
lo = bdev->bd_disk->private_data;
|
lo = bdev->bd_disk->private_data;
|
||||||
if (!lo) {
|
if (!lo) {
|
||||||
err = -ENXIO;
|
mutex_unlock(&loop_ctl_mutex);
|
||||||
goto out;
|
return -ENXIO;
|
||||||
}
|
}
|
||||||
|
err = mutex_lock_killable(&lo->lo_mutex);
|
||||||
atomic_inc(&lo->lo_refcnt);
|
|
||||||
out:
|
|
||||||
mutex_unlock(&loop_ctl_mutex);
|
mutex_unlock(&loop_ctl_mutex);
|
||||||
return err;
|
if (err)
|
||||||
|
return err;
|
||||||
|
atomic_inc(&lo->lo_refcnt);
|
||||||
|
mutex_unlock(&lo->lo_mutex);
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void lo_release(struct gendisk *disk, fmode_t mode)
|
static void lo_release(struct gendisk *disk, fmode_t mode)
|
||||||
{
|
{
|
||||||
struct loop_device *lo;
|
struct loop_device *lo = disk->private_data;
|
||||||
|
|
||||||
mutex_lock(&loop_ctl_mutex);
|
mutex_lock(&lo->lo_mutex);
|
||||||
lo = disk->private_data;
|
|
||||||
if (atomic_dec_return(&lo->lo_refcnt))
|
if (atomic_dec_return(&lo->lo_refcnt))
|
||||||
goto out_unlock;
|
goto out_unlock;
|
||||||
|
|
||||||
|
@ -1907,7 +1912,7 @@ static void lo_release(struct gendisk *disk, fmode_t mode)
|
||||||
if (lo->lo_state != Lo_bound)
|
if (lo->lo_state != Lo_bound)
|
||||||
goto out_unlock;
|
goto out_unlock;
|
||||||
lo->lo_state = Lo_rundown;
|
lo->lo_state = Lo_rundown;
|
||||||
mutex_unlock(&loop_ctl_mutex);
|
mutex_unlock(&lo->lo_mutex);
|
||||||
/*
|
/*
|
||||||
* In autoclear mode, stop the loop thread
|
* In autoclear mode, stop the loop thread
|
||||||
* and remove configuration after last close.
|
* and remove configuration after last close.
|
||||||
|
@ -1924,7 +1929,7 @@ static void lo_release(struct gendisk *disk, fmode_t mode)
|
||||||
}
|
}
|
||||||
|
|
||||||
out_unlock:
|
out_unlock:
|
||||||
mutex_unlock(&loop_ctl_mutex);
|
mutex_unlock(&lo->lo_mutex);
|
||||||
}
|
}
|
||||||
|
|
||||||
static const struct block_device_operations lo_fops = {
|
static const struct block_device_operations lo_fops = {
|
||||||
|
@ -1963,10 +1968,10 @@ static int unregister_transfer_cb(int id, void *ptr, void *data)
|
||||||
struct loop_device *lo = ptr;
|
struct loop_device *lo = ptr;
|
||||||
struct loop_func_table *xfer = data;
|
struct loop_func_table *xfer = data;
|
||||||
|
|
||||||
mutex_lock(&loop_ctl_mutex);
|
mutex_lock(&lo->lo_mutex);
|
||||||
if (lo->lo_encryption == xfer)
|
if (lo->lo_encryption == xfer)
|
||||||
loop_release_xfer(lo);
|
loop_release_xfer(lo);
|
||||||
mutex_unlock(&loop_ctl_mutex);
|
mutex_unlock(&lo->lo_mutex);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2152,6 +2157,7 @@ static int loop_add(struct loop_device **l, int i)
|
||||||
disk->flags |= GENHD_FL_NO_PART_SCAN;
|
disk->flags |= GENHD_FL_NO_PART_SCAN;
|
||||||
disk->flags |= GENHD_FL_EXT_DEVT;
|
disk->flags |= GENHD_FL_EXT_DEVT;
|
||||||
atomic_set(&lo->lo_refcnt, 0);
|
atomic_set(&lo->lo_refcnt, 0);
|
||||||
|
mutex_init(&lo->lo_mutex);
|
||||||
lo->lo_number = i;
|
lo->lo_number = i;
|
||||||
spin_lock_init(&lo->lo_lock);
|
spin_lock_init(&lo->lo_lock);
|
||||||
disk->major = LOOP_MAJOR;
|
disk->major = LOOP_MAJOR;
|
||||||
|
@ -2182,6 +2188,7 @@ static void loop_remove(struct loop_device *lo)
|
||||||
blk_cleanup_queue(lo->lo_queue);
|
blk_cleanup_queue(lo->lo_queue);
|
||||||
blk_mq_free_tag_set(&lo->tag_set);
|
blk_mq_free_tag_set(&lo->tag_set);
|
||||||
put_disk(lo->lo_disk);
|
put_disk(lo->lo_disk);
|
||||||
|
mutex_destroy(&lo->lo_mutex);
|
||||||
kfree(lo);
|
kfree(lo);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2261,15 +2268,21 @@ static long loop_control_ioctl(struct file *file, unsigned int cmd,
|
||||||
ret = loop_lookup(&lo, parm);
|
ret = loop_lookup(&lo, parm);
|
||||||
if (ret < 0)
|
if (ret < 0)
|
||||||
break;
|
break;
|
||||||
|
ret = mutex_lock_killable(&lo->lo_mutex);
|
||||||
|
if (ret)
|
||||||
|
break;
|
||||||
if (lo->lo_state != Lo_unbound) {
|
if (lo->lo_state != Lo_unbound) {
|
||||||
ret = -EBUSY;
|
ret = -EBUSY;
|
||||||
|
mutex_unlock(&lo->lo_mutex);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (atomic_read(&lo->lo_refcnt) > 0) {
|
if (atomic_read(&lo->lo_refcnt) > 0) {
|
||||||
ret = -EBUSY;
|
ret = -EBUSY;
|
||||||
|
mutex_unlock(&lo->lo_mutex);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
lo->lo_disk->private_data = NULL;
|
lo->lo_disk->private_data = NULL;
|
||||||
|
mutex_unlock(&lo->lo_mutex);
|
||||||
idr_remove(&loop_index_idr, lo->lo_number);
|
idr_remove(&loop_index_idr, lo->lo_number);
|
||||||
loop_remove(lo);
|
loop_remove(lo);
|
||||||
break;
|
break;
|
||||||
|
|
|
@ -62,6 +62,7 @@ struct loop_device {
|
||||||
struct request_queue *lo_queue;
|
struct request_queue *lo_queue;
|
||||||
struct blk_mq_tag_set tag_set;
|
struct blk_mq_tag_set tag_set;
|
||||||
struct gendisk *lo_disk;
|
struct gendisk *lo_disk;
|
||||||
|
struct mutex lo_mutex;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct loop_cmd {
|
struct loop_cmd {
|
||||||
|
|
Loading…
Add table
Reference in a new issue