diff --git a/phoenix-core-server/src/main/java/org/apache/phoenix/replication/reader/ReplicationLogDiscoveryReplay.java b/phoenix-core-server/src/main/java/org/apache/phoenix/replication/reader/ReplicationLogDiscoveryReplay.java index 361a93489ca..f20ff7ae217 100644 --- a/phoenix-core-server/src/main/java/org/apache/phoenix/replication/reader/ReplicationLogDiscoveryReplay.java +++ b/phoenix-core-server/src/main/java/org/apache/phoenix/replication/reader/ReplicationLogDiscoveryReplay.java @@ -488,12 +488,13 @@ protected HAGroupStoreRecord getHAGroupRecord() throws IOException { /** * Determines whether failover should be triggered based on completion criteria. Failover is safe * to trigger when all of the following conditions are met: 1. A failover has been requested - * (failoverPending is true) 2. No files are currently in the in-progress directory 3. No new - * files exist from the next round to process up to the current timestamp round. The third + * (failoverPending is true) 2. The replication replay state is SYNC (not SYNCED_RECOVERY, + * DEGRADED, or NOT_INITIALIZED) 3. No files are currently in the in-progress directory 4. No new + * files exist from the next round to process up to the current timestamp round. The fourth * condition checks for new files in the range from nextRoundToProcess (derived from * getLastRoundProcessed()) to currentTimestampRound (derived from current time). This ensures all - * replication logs up to the current time have been processed before transitioning the cluster - * from STANDBY to ACTIVE state. + * replication logs up to the current time have been processed and any pending rewind has + * completed before transitioning the cluster from STANDBY to ACTIVE state. * @return true if all conditions are met and failover should be triggered, false otherwise * @throws IOException if there's an error checking file status */ @@ -504,6 +505,14 @@ protected boolean shouldTriggerFailover() throws IOException { LOG.debug("Failover not triggered. failoverPending is false."); return false; } + // Check if replay state is SYNC; block failover during SYNCED_RECOVERY (rewind pending), + // DEGRADED, or NOT_INITIALIZED to prevent bypassing the rewind logic + if (replicationReplayState.get() != ReplicationReplayState.SYNC) { + LOG.debug("Failover not triggered. Replay state is {}, not SYNC.", + replicationReplayState.get()); + return false; + } + // Check if in-progress directory is empty boolean isInProgressDirectoryEmpty = replicationLogTracker.getInProgressFiles().isEmpty(); if (!isInProgressDirectoryEmpty) { diff --git a/phoenix-core/src/it/java/org/apache/phoenix/replication/reader/ReplicationLogDiscoveryReplayTestIT.java b/phoenix-core/src/it/java/org/apache/phoenix/replication/reader/ReplicationLogDiscoveryReplayTestIT.java index 2b162bea0e4..8d7568f4227 100644 --- a/phoenix-core/src/it/java/org/apache/phoenix/replication/reader/ReplicationLogDiscoveryReplayTestIT.java +++ b/phoenix-core/src/it/java/org/apache/phoenix/replication/reader/ReplicationLogDiscoveryReplayTestIT.java @@ -23,6 +23,8 @@ import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import static org.mockito.Mockito.doReturn; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; import java.io.IOException; @@ -1922,6 +1924,8 @@ public void testShouldTriggerFailover() throws IOException { discovery.setLastRoundInSync(testRound); discovery.setLastRoundProcessed(testRound); discovery.setFailoverPending(true); + discovery + .setReplicationReplayState(ReplicationLogDiscoveryReplay.ReplicationReplayState.SYNC); assertTrue("Should trigger failover when all conditions are met", discovery.shouldTriggerFailover()); @@ -1953,6 +1957,8 @@ public void testShouldTriggerFailover() throws IOException { discovery.setLastRoundInSync(testRound); discovery.setLastRoundProcessed(testRound); discovery.setFailoverPending(true); + discovery + .setReplicationReplayState(ReplicationLogDiscoveryReplay.ReplicationReplayState.SYNC); assertFalse("Should not trigger failover when in-progress files are not empty", discovery.shouldTriggerFailover()); @@ -1969,6 +1975,8 @@ public void testShouldTriggerFailover() throws IOException { discovery.setLastRoundInSync(testRound); discovery.setLastRoundProcessed(testRound); discovery.setFailoverPending(true); + discovery + .setReplicationReplayState(ReplicationLogDiscoveryReplay.ReplicationReplayState.SYNC); assertFalse( "Should not trigger failover when new files exist from next round to current timestamp round", @@ -2018,6 +2026,8 @@ public void testShouldTriggerFailover() throws IOException { discovery.setLastRoundInSync(testRound); discovery.setLastRoundProcessed(testRound); discovery.setFailoverPending(true); + discovery + .setReplicationReplayState(ReplicationLogDiscoveryReplay.ReplicationReplayState.SYNC); assertFalse("Should not trigger failover when both in-progress and new files exist", discovery.shouldTriggerFailover()); @@ -2039,6 +2049,61 @@ public void testShouldTriggerFailover() throws IOException { discovery.shouldTriggerFailover()); } + // Test Case 9: SYNCED_RECOVERY state with all other conditions met - should return false + // Also verifies short-circuit: no file I/O should occur when state is not SYNC + { + Mockito.clearInvocations(tracker); + TestableReplicationLogDiscoveryReplay discovery = + new TestableReplicationLogDiscoveryReplay(tracker, haGroupStoreRecord); + discovery.setLastRoundInSync(testRound); + discovery.setLastRoundProcessed(testRound); + discovery.setFailoverPending(true); + discovery.setReplicationReplayState( + ReplicationLogDiscoveryReplay.ReplicationReplayState.SYNCED_RECOVERY); + + assertFalse( + "Should not trigger failover when replay state is SYNCED_RECOVERY (rewind pending)", + discovery.shouldTriggerFailover()); + verify(tracker, never()).getInProgressFiles(); + verify(tracker, never()).getNewFiles(nextRoundToProcess, currentTimestampRound); + } + + // Test Case 10: DEGRADED state with all other conditions met - should return false + // Also verifies short-circuit: no file I/O should occur when state is not SYNC + { + Mockito.clearInvocations(tracker); + TestableReplicationLogDiscoveryReplay discovery = + new TestableReplicationLogDiscoveryReplay(tracker, haGroupStoreRecord); + discovery.setLastRoundInSync(testRound); + discovery.setLastRoundProcessed(testRound); + discovery.setFailoverPending(true); + discovery + .setReplicationReplayState(ReplicationLogDiscoveryReplay.ReplicationReplayState.DEGRADED); + + assertFalse("Should not trigger failover when replay state is DEGRADED", + discovery.shouldTriggerFailover()); + verify(tracker, never()).getInProgressFiles(); + verify(tracker, never()).getNewFiles(nextRoundToProcess, currentTimestampRound); + } + + // Test Case 11: NOT_INITIALIZED state with all other conditions met - should return false + // Also verifies short-circuit: no file I/O should occur when state is not SYNC + { + Mockito.clearInvocations(tracker); + TestableReplicationLogDiscoveryReplay discovery = + new TestableReplicationLogDiscoveryReplay(tracker, haGroupStoreRecord); + discovery.setLastRoundInSync(testRound); + discovery.setLastRoundProcessed(testRound); + discovery.setFailoverPending(true); + discovery.setReplicationReplayState( + ReplicationLogDiscoveryReplay.ReplicationReplayState.NOT_INITIALIZED); + + assertFalse("Should not trigger failover when replay state is NOT_INITIALIZED", + discovery.shouldTriggerFailover()); + verify(tracker, never()).getInProgressFiles(); + verify(tracker, never()).getNewFiles(nextRoundToProcess, currentTimestampRound); + } + } finally { EnvironmentEdgeManager.reset(); }