md/raid10: avoid deadlock on recovery.

When disk failure happens and the array has a spare drive, resync thread kicks in and starts to refill the spare. However it may get blocked by a retry thread that resubmits failed IO to a mirror and itself can get blocked on a barrier raised by the resync thread. Acked-by: Nigel Croxon <ncroxon@redhat.com> Signed-off-by: Vitaly Mayatskikh <vmayatskikh@digitalocean.com> Signed-off-by: Song Liu <songliubraving@fb.com>
2025-03-06 20:59:54 +01:00 · 2020-03-03 13:14:40 -05:00 · 2020-03-03 13:14:40 -05:00 · fe630de009
commit fe630de009
parent c333f9495c
1 changed files with 11 additions and 3 deletions
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@ -980,6 +980,7 @@ static void wait_barrier(struct r10conf *conf)
 {
 	spin_lock_irq(&conf->resync_lock);
 	if (conf->barrier) {
 		struct bio_list *bio_list = current->bio_list;
 		conf->nr_waiting++;
 		/* Wait for the barrier to drop.
 		 * However if there are already pending
@ -994,9 +995,16 @@ static void wait_barrier(struct r10conf *conf)
 		wait_event_lock_irq(conf->wait_barrier,
 				    !conf->barrier ||
 				    (atomic_read(&conf->nr_pending) &&
-				     current->bio_list &&
+				     bio_list &&
-				     (!bio_list_empty(&current->bio_list[0]) ||
+				     (!bio_list_empty(&bio_list[0]) ||
-				      !bio_list_empty(&current->bio_list[1]))),
+				      !bio_list_empty(&bio_list[1]))) ||
 				     /* move on if recovery thread is
 				      * blocked by us
 				      */
 				     (conf->mddev->thread->tsk == current &&
 				      test_bit(MD_RECOVERY_RUNNING,
 					       &conf->mddev->recovery) &&
 				      conf->nr_queued > 0),
 				    conf->resync_lock);
 		conf->nr_waiting--;
 		if (!conf->nr_waiting)