md/raid1: remove rcu protection to access rdev from conf
Because it's safe to accees rdev from conf: - If any spinlock is held, because synchronize_rcu() from md_kick_rdev_from_array() will prevent 'rdev' to be freed until spinlock is released; - If 'reconfig_lock' is held, because rdev can't be added or removed from array; - If there is normal IO inflight, because mddev_suspend() will prevent rdev to be added or removed from array; - If there is sync IO inflight, because 'MD_RECOVERY_RUNNING' is checked in remove_and_add_spares(). And these will cover all the scenarios in raid1. Signed-off-by: Yu Kuai <yukuai3@huawei.com> Signed-off-by: Song Liu <song@kernel.org> Link: https://lore.kernel.org/r/20231125081604.3939938-4-yukuai1@huaweicloud.com
This commit is contained in:
parent
a448af25be
commit
2d32777d60
1 changed files with 23 additions and 39 deletions
|
@ -609,7 +609,6 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
|
||||||
int choose_first;
|
int choose_first;
|
||||||
int choose_next_idle;
|
int choose_next_idle;
|
||||||
|
|
||||||
rcu_read_lock();
|
|
||||||
/*
|
/*
|
||||||
* Check if we can balance. We can balance on the whole
|
* Check if we can balance. We can balance on the whole
|
||||||
* device if no resync is going on, or below the resync window.
|
* device if no resync is going on, or below the resync window.
|
||||||
|
@ -642,7 +641,7 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
|
||||||
unsigned int pending;
|
unsigned int pending;
|
||||||
bool nonrot;
|
bool nonrot;
|
||||||
|
|
||||||
rdev = rcu_dereference(conf->mirrors[disk].rdev);
|
rdev = conf->mirrors[disk].rdev;
|
||||||
if (r1_bio->bios[disk] == IO_BLOCKED
|
if (r1_bio->bios[disk] == IO_BLOCKED
|
||||||
|| rdev == NULL
|
|| rdev == NULL
|
||||||
|| test_bit(Faulty, &rdev->flags))
|
|| test_bit(Faulty, &rdev->flags))
|
||||||
|
@ -773,7 +772,7 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
|
||||||
}
|
}
|
||||||
|
|
||||||
if (best_disk >= 0) {
|
if (best_disk >= 0) {
|
||||||
rdev = rcu_dereference(conf->mirrors[best_disk].rdev);
|
rdev = conf->mirrors[best_disk].rdev;
|
||||||
if (!rdev)
|
if (!rdev)
|
||||||
goto retry;
|
goto retry;
|
||||||
atomic_inc(&rdev->nr_pending);
|
atomic_inc(&rdev->nr_pending);
|
||||||
|
@ -784,7 +783,6 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
|
||||||
|
|
||||||
conf->mirrors[best_disk].next_seq_sect = this_sector + sectors;
|
conf->mirrors[best_disk].next_seq_sect = this_sector + sectors;
|
||||||
}
|
}
|
||||||
rcu_read_unlock();
|
|
||||||
*max_sectors = sectors;
|
*max_sectors = sectors;
|
||||||
|
|
||||||
return best_disk;
|
return best_disk;
|
||||||
|
@ -1235,14 +1233,12 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio,
|
||||||
|
|
||||||
if (r1bio_existed) {
|
if (r1bio_existed) {
|
||||||
/* Need to get the block device name carefully */
|
/* Need to get the block device name carefully */
|
||||||
struct md_rdev *rdev;
|
struct md_rdev *rdev = conf->mirrors[r1_bio->read_disk].rdev;
|
||||||
rcu_read_lock();
|
|
||||||
rdev = rcu_dereference(conf->mirrors[r1_bio->read_disk].rdev);
|
|
||||||
if (rdev)
|
if (rdev)
|
||||||
snprintf(b, sizeof(b), "%pg", rdev->bdev);
|
snprintf(b, sizeof(b), "%pg", rdev->bdev);
|
||||||
else
|
else
|
||||||
strcpy(b, "???");
|
strcpy(b, "???");
|
||||||
rcu_read_unlock();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -1396,10 +1392,9 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
|
||||||
|
|
||||||
disks = conf->raid_disks * 2;
|
disks = conf->raid_disks * 2;
|
||||||
blocked_rdev = NULL;
|
blocked_rdev = NULL;
|
||||||
rcu_read_lock();
|
|
||||||
max_sectors = r1_bio->sectors;
|
max_sectors = r1_bio->sectors;
|
||||||
for (i = 0; i < disks; i++) {
|
for (i = 0; i < disks; i++) {
|
||||||
struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev);
|
struct md_rdev *rdev = conf->mirrors[i].rdev;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The write-behind io is only attempted on drives marked as
|
* The write-behind io is only attempted on drives marked as
|
||||||
|
@ -1465,7 +1460,6 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
|
||||||
}
|
}
|
||||||
r1_bio->bios[i] = bio;
|
r1_bio->bios[i] = bio;
|
||||||
}
|
}
|
||||||
rcu_read_unlock();
|
|
||||||
|
|
||||||
if (unlikely(blocked_rdev)) {
|
if (unlikely(blocked_rdev)) {
|
||||||
/* Wait for this device to become unblocked */
|
/* Wait for this device to become unblocked */
|
||||||
|
@ -1617,15 +1611,16 @@ static void raid1_status(struct seq_file *seq, struct mddev *mddev)
|
||||||
struct r1conf *conf = mddev->private;
|
struct r1conf *conf = mddev->private;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
|
lockdep_assert_held(&mddev->lock);
|
||||||
|
|
||||||
seq_printf(seq, " [%d/%d] [", conf->raid_disks,
|
seq_printf(seq, " [%d/%d] [", conf->raid_disks,
|
||||||
conf->raid_disks - mddev->degraded);
|
conf->raid_disks - mddev->degraded);
|
||||||
rcu_read_lock();
|
|
||||||
for (i = 0; i < conf->raid_disks; i++) {
|
for (i = 0; i < conf->raid_disks; i++) {
|
||||||
struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev);
|
struct md_rdev *rdev = READ_ONCE(conf->mirrors[i].rdev);
|
||||||
|
|
||||||
seq_printf(seq, "%s",
|
seq_printf(seq, "%s",
|
||||||
rdev && test_bit(In_sync, &rdev->flags) ? "U" : "_");
|
rdev && test_bit(In_sync, &rdev->flags) ? "U" : "_");
|
||||||
}
|
}
|
||||||
rcu_read_unlock();
|
|
||||||
seq_printf(seq, "]");
|
seq_printf(seq, "]");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1691,16 +1686,15 @@ static void print_conf(struct r1conf *conf)
|
||||||
pr_debug(" --- wd:%d rd:%d\n", conf->raid_disks - conf->mddev->degraded,
|
pr_debug(" --- wd:%d rd:%d\n", conf->raid_disks - conf->mddev->degraded,
|
||||||
conf->raid_disks);
|
conf->raid_disks);
|
||||||
|
|
||||||
rcu_read_lock();
|
lockdep_assert_held(&conf->mddev->reconfig_mutex);
|
||||||
for (i = 0; i < conf->raid_disks; i++) {
|
for (i = 0; i < conf->raid_disks; i++) {
|
||||||
struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev);
|
struct md_rdev *rdev = conf->mirrors[i].rdev;
|
||||||
if (rdev)
|
if (rdev)
|
||||||
pr_debug(" disk %d, wo:%d, o:%d, dev:%pg\n",
|
pr_debug(" disk %d, wo:%d, o:%d, dev:%pg\n",
|
||||||
i, !test_bit(In_sync, &rdev->flags),
|
i, !test_bit(In_sync, &rdev->flags),
|
||||||
!test_bit(Faulty, &rdev->flags),
|
!test_bit(Faulty, &rdev->flags),
|
||||||
rdev->bdev);
|
rdev->bdev);
|
||||||
}
|
}
|
||||||
rcu_read_unlock();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void close_sync(struct r1conf *conf)
|
static void close_sync(struct r1conf *conf)
|
||||||
|
@ -1810,7 +1804,7 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
|
||||||
*/
|
*/
|
||||||
if (rdev->saved_raid_disk < 0)
|
if (rdev->saved_raid_disk < 0)
|
||||||
conf->fullsync = 1;
|
conf->fullsync = 1;
|
||||||
rcu_assign_pointer(p->rdev, rdev);
|
WRITE_ONCE(p->rdev, rdev);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (test_bit(WantReplacement, &p->rdev->flags) &&
|
if (test_bit(WantReplacement, &p->rdev->flags) &&
|
||||||
|
@ -1826,7 +1820,7 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
|
||||||
rdev->raid_disk = repl_slot;
|
rdev->raid_disk = repl_slot;
|
||||||
err = 0;
|
err = 0;
|
||||||
conf->fullsync = 1;
|
conf->fullsync = 1;
|
||||||
rcu_assign_pointer(p[conf->raid_disks].rdev, rdev);
|
WRITE_ONCE(p[conf->raid_disks].rdev, rdev);
|
||||||
}
|
}
|
||||||
|
|
||||||
print_conf(conf);
|
print_conf(conf);
|
||||||
|
@ -1862,7 +1856,7 @@ static int raid1_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
|
||||||
err = -EBUSY;
|
err = -EBUSY;
|
||||||
goto abort;
|
goto abort;
|
||||||
}
|
}
|
||||||
p->rdev = NULL;
|
WRITE_ONCE(p->rdev, NULL);
|
||||||
if (conf->mirrors[conf->raid_disks + number].rdev) {
|
if (conf->mirrors[conf->raid_disks + number].rdev) {
|
||||||
/* We just removed a device that is being replaced.
|
/* We just removed a device that is being replaced.
|
||||||
* Move down the replacement. We drain all IO before
|
* Move down the replacement. We drain all IO before
|
||||||
|
@ -1883,7 +1877,7 @@ static int raid1_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
|
||||||
goto abort;
|
goto abort;
|
||||||
}
|
}
|
||||||
clear_bit(Replacement, &repl->flags);
|
clear_bit(Replacement, &repl->flags);
|
||||||
p->rdev = repl;
|
WRITE_ONCE(p->rdev, repl);
|
||||||
conf->mirrors[conf->raid_disks + number].rdev = NULL;
|
conf->mirrors[conf->raid_disks + number].rdev = NULL;
|
||||||
unfreeze_array(conf);
|
unfreeze_array(conf);
|
||||||
}
|
}
|
||||||
|
@ -2281,8 +2275,7 @@ static void fix_read_error(struct r1conf *conf, int read_disk,
|
||||||
sector_t first_bad;
|
sector_t first_bad;
|
||||||
int bad_sectors;
|
int bad_sectors;
|
||||||
|
|
||||||
rcu_read_lock();
|
rdev = conf->mirrors[d].rdev;
|
||||||
rdev = rcu_dereference(conf->mirrors[d].rdev);
|
|
||||||
if (rdev &&
|
if (rdev &&
|
||||||
(test_bit(In_sync, &rdev->flags) ||
|
(test_bit(In_sync, &rdev->flags) ||
|
||||||
(!test_bit(Faulty, &rdev->flags) &&
|
(!test_bit(Faulty, &rdev->flags) &&
|
||||||
|
@ -2290,15 +2283,14 @@ static void fix_read_error(struct r1conf *conf, int read_disk,
|
||||||
is_badblock(rdev, sect, s,
|
is_badblock(rdev, sect, s,
|
||||||
&first_bad, &bad_sectors) == 0) {
|
&first_bad, &bad_sectors) == 0) {
|
||||||
atomic_inc(&rdev->nr_pending);
|
atomic_inc(&rdev->nr_pending);
|
||||||
rcu_read_unlock();
|
|
||||||
if (sync_page_io(rdev, sect, s<<9,
|
if (sync_page_io(rdev, sect, s<<9,
|
||||||
conf->tmppage, REQ_OP_READ, false))
|
conf->tmppage, REQ_OP_READ, false))
|
||||||
success = 1;
|
success = 1;
|
||||||
rdev_dec_pending(rdev, mddev);
|
rdev_dec_pending(rdev, mddev);
|
||||||
if (success)
|
if (success)
|
||||||
break;
|
break;
|
||||||
} else
|
}
|
||||||
rcu_read_unlock();
|
|
||||||
d++;
|
d++;
|
||||||
if (d == conf->raid_disks * 2)
|
if (d == conf->raid_disks * 2)
|
||||||
d = 0;
|
d = 0;
|
||||||
|
@ -2317,29 +2309,24 @@ static void fix_read_error(struct r1conf *conf, int read_disk,
|
||||||
if (d==0)
|
if (d==0)
|
||||||
d = conf->raid_disks * 2;
|
d = conf->raid_disks * 2;
|
||||||
d--;
|
d--;
|
||||||
rcu_read_lock();
|
rdev = conf->mirrors[d].rdev;
|
||||||
rdev = rcu_dereference(conf->mirrors[d].rdev);
|
|
||||||
if (rdev &&
|
if (rdev &&
|
||||||
!test_bit(Faulty, &rdev->flags)) {
|
!test_bit(Faulty, &rdev->flags)) {
|
||||||
atomic_inc(&rdev->nr_pending);
|
atomic_inc(&rdev->nr_pending);
|
||||||
rcu_read_unlock();
|
|
||||||
r1_sync_page_io(rdev, sect, s,
|
r1_sync_page_io(rdev, sect, s,
|
||||||
conf->tmppage, WRITE);
|
conf->tmppage, WRITE);
|
||||||
rdev_dec_pending(rdev, mddev);
|
rdev_dec_pending(rdev, mddev);
|
||||||
} else
|
}
|
||||||
rcu_read_unlock();
|
|
||||||
}
|
}
|
||||||
d = start;
|
d = start;
|
||||||
while (d != read_disk) {
|
while (d != read_disk) {
|
||||||
if (d==0)
|
if (d==0)
|
||||||
d = conf->raid_disks * 2;
|
d = conf->raid_disks * 2;
|
||||||
d--;
|
d--;
|
||||||
rcu_read_lock();
|
rdev = conf->mirrors[d].rdev;
|
||||||
rdev = rcu_dereference(conf->mirrors[d].rdev);
|
|
||||||
if (rdev &&
|
if (rdev &&
|
||||||
!test_bit(Faulty, &rdev->flags)) {
|
!test_bit(Faulty, &rdev->flags)) {
|
||||||
atomic_inc(&rdev->nr_pending);
|
atomic_inc(&rdev->nr_pending);
|
||||||
rcu_read_unlock();
|
|
||||||
if (r1_sync_page_io(rdev, sect, s,
|
if (r1_sync_page_io(rdev, sect, s,
|
||||||
conf->tmppage, READ)) {
|
conf->tmppage, READ)) {
|
||||||
atomic_add(s, &rdev->corrected_errors);
|
atomic_add(s, &rdev->corrected_errors);
|
||||||
|
@ -2350,8 +2337,7 @@ static void fix_read_error(struct r1conf *conf, int read_disk,
|
||||||
rdev->bdev);
|
rdev->bdev);
|
||||||
}
|
}
|
||||||
rdev_dec_pending(rdev, mddev);
|
rdev_dec_pending(rdev, mddev);
|
||||||
} else
|
}
|
||||||
rcu_read_unlock();
|
|
||||||
}
|
}
|
||||||
sectors -= s;
|
sectors -= s;
|
||||||
sect += s;
|
sect += s;
|
||||||
|
@ -2732,7 +2718,6 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
|
||||||
|
|
||||||
r1_bio = raid1_alloc_init_r1buf(conf);
|
r1_bio = raid1_alloc_init_r1buf(conf);
|
||||||
|
|
||||||
rcu_read_lock();
|
|
||||||
/*
|
/*
|
||||||
* If we get a correctably read error during resync or recovery,
|
* If we get a correctably read error during resync or recovery,
|
||||||
* we might want to read from a different device. So we
|
* we might want to read from a different device. So we
|
||||||
|
@ -2753,7 +2738,7 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
|
||||||
struct md_rdev *rdev;
|
struct md_rdev *rdev;
|
||||||
bio = r1_bio->bios[i];
|
bio = r1_bio->bios[i];
|
||||||
|
|
||||||
rdev = rcu_dereference(conf->mirrors[i].rdev);
|
rdev = conf->mirrors[i].rdev;
|
||||||
if (rdev == NULL ||
|
if (rdev == NULL ||
|
||||||
test_bit(Faulty, &rdev->flags)) {
|
test_bit(Faulty, &rdev->flags)) {
|
||||||
if (i < conf->raid_disks)
|
if (i < conf->raid_disks)
|
||||||
|
@ -2811,7 +2796,6 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
|
||||||
bio->bi_opf |= MD_FAILFAST;
|
bio->bi_opf |= MD_FAILFAST;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
rcu_read_unlock();
|
|
||||||
if (disk < 0)
|
if (disk < 0)
|
||||||
disk = wonly;
|
disk = wonly;
|
||||||
r1_bio->read_disk = disk;
|
r1_bio->read_disk = disk;
|
||||||
|
|
Loading…
Add table
Reference in a new issue