Merge: gfs2: No more self recovery

MR: https://gitlab.com/redhat/centos-stream/src/kernel/centos-stream-9/-/merge_requests/7136 JIRA: https://issues.redhat.com/browse/RHEL-103348 When a node withdraws and it turns out that it is the only node that has the filesystem mounted, gfs2 currently tries to replay the local journal to bring the filesystem back into a consistent state. Not only is that a very bad idea, it has also never worked because gfs2_recover_func() will refuse to do anything during a withdraw. However, before even getting to this point, gfs2_recover_func() dereferences sdp->sd_jdesc->jd_inode. This was a use-after-free before commit 04133b607a78 ("gfs2: Prevent double iput for journal on error") and is a NULL pointer dereference since then. Simply get rid of self recovery to fix that. Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com> Approved-by: Abhi Das <adas@redhat.com> Approved-by: Paul Evans <pevans@redhat.com> Approved-by: CKI KWF Bot <cki-ci-bot+kwf-gitlab-com@redhat.com> Merged-by: Jarod Wilson <jarod@redhat.com>
2025-08-18 07:20:29 -07:00 · 2025-08-18 07:20:29 -07:00 · 506636b355
parent 5dbb1536d2 c2adcfc34c
commit 506636b355
1 changed files with 26 additions and 35 deletions
--- a/fs/gfs2/util.c
+++ b/fs/gfs2/util.c
@ -224,32 +224,23 @@ static void signal_our_withdraw(struct gfs2_sbd *sdp)
 	 */
 	ret = gfs2_glock_nq(&sdp->sd_live_gh);

-	/*
-	 * If we actually got the "live" lock in EX mode, there are no other
-	 * nodes available to replay our journal. So we try to replay it
-	 * ourselves. We hold the "live" glock to prevent other mounters
-	 * during recovery, then just dequeue it and reacquire it in our
-	 * normal SH mode. Just in case the problem that caused us to
-	 * withdraw prevents us from recovering our journal (e.g. io errors
-	 * and such) we still check if the journal is clean before proceeding
-	 * but we may wait forever until another mounter does the recovery.
-	 */
-	if (ret == 0) {
-		fs_warn(sdp, "No other mounters found. Trying to recover our "
-			"own journal jid %d.\n", sdp->sd_lockstruct.ls_jid);
-		if (gfs2_recover_journal(sdp->sd_jdesc, 1))
-			fs_warn(sdp, "Unable to recover our journal jid %d.\n",
-				sdp->sd_lockstruct.ls_jid);
-		gfs2_glock_dq_wait(&sdp->sd_live_gh);
-		gfs2_holder_reinit(LM_ST_SHARED,
-				   LM_FLAG_NOEXP | GL_EXACT | GL_NOPID,
-				   &sdp->sd_live_gh);
-		gfs2_glock_nq(&sdp->sd_live_gh);
-	}
-
 	gfs2_glock_put(live_gl); /* drop extra reference we acquired */
 	clear_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags);

+	/*
+	 * If we actually got the "live" lock in EX mode, there are no other
+	 * nodes available to replay our journal.
+	 */
+	if (ret == 0) {
+		fs_warn(sdp, "No other mounters found.\n");
+		/*
+		 * We are about to release the lockspace.  By keeping live_gl
+		 * locked here, we ensure that the next mounter coming along
+		 * will be a "first" mounter which will perform recovery.
+		 */
+		goto skip_recovery;
+	}
+
 	/*
 	 * At this point our journal is evicted, so we need to get a new inode
 	 * for it. Once done, we need to call gfs2_find_jhead which
@ -315,19 +306,19 @@ int gfs2_withdraw(struct gfs2_sbd *sdp)
 	struct lm_lockstruct *ls = &sdp->sd_lockstruct;
 	const struct lm_lockops *lm = ls->ls_ops;

-	if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW &&
-	    test_and_set_bit(SDF_WITHDRAWN, &sdp->sd_flags)) {
-		if (!test_bit(SDF_WITHDRAW_IN_PROG, &sdp->sd_flags))
-			return -1;
+	if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW) {
+		unsigned long old = READ_ONCE(sdp->sd_flags), new;

-		wait_on_bit(&sdp->sd_flags, SDF_WITHDRAW_IN_PROG,
+		do {
+			if (old & BIT(SDF_WITHDRAWN)) {
+				wait_on_bit(&sdp->sd_flags,
+					    SDF_WITHDRAW_IN_PROG,
 					    TASK_UNINTERRUPTIBLE);
 				return -1;
 			}
+			new = old | BIT(SDF_WITHDRAWN) | BIT(SDF_WITHDRAW_IN_PROG);
+		} while (unlikely(!try_cmpxchg(&sdp->sd_flags, &old, new)));

-	set_bit(SDF_WITHDRAW_IN_PROG, &sdp->sd_flags);
-
-	if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW) {
 		fs_err(sdp, "about to withdraw this file system\n");
 		BUG_ON(sdp->sd_args.ar_debug);