From 79023d313f9402b27e03a0c7b8d7d946548960bf Mon Sep 17 00:00:00 2001 From: Himanshu Gwalani Date: Fri, 17 Apr 2026 20:05:44 +0530 Subject: [PATCH 1/2] Adding additional condtion for failover trigger --- .../reader/ReplicationLogDiscoveryReplay.java | 17 ++++-- .../ReplicationLogDiscoveryReplayTestIT.java | 60 +++++++++++++++++++ 2 files changed, 73 insertions(+), 4 deletions(-) diff --git a/phoenix-core-server/src/main/java/org/apache/phoenix/replication/reader/ReplicationLogDiscoveryReplay.java b/phoenix-core-server/src/main/java/org/apache/phoenix/replication/reader/ReplicationLogDiscoveryReplay.java index 361a93489ca..bf099dbb0f2 100644 --- a/phoenix-core-server/src/main/java/org/apache/phoenix/replication/reader/ReplicationLogDiscoveryReplay.java +++ b/phoenix-core-server/src/main/java/org/apache/phoenix/replication/reader/ReplicationLogDiscoveryReplay.java @@ -488,12 +488,13 @@ protected HAGroupStoreRecord getHAGroupRecord() throws IOException { /** * Determines whether failover should be triggered based on completion criteria. Failover is safe * to trigger when all of the following conditions are met: 1. A failover has been requested - * (failoverPending is true) 2. No files are currently in the in-progress directory 3. No new - * files exist from the next round to process up to the current timestamp round. The third + * (failoverPending is true) 2. The replication replay state is SYNC (not SYNCED_RECOVERY, + * DEGRADED, or NOT_INITIALIZED) 3. No files are currently in the in-progress directory 4. No new + * files exist from the next round to process up to the current timestamp round. The fourth * condition checks for new files in the range from nextRoundToProcess (derived from * getLastRoundProcessed()) to currentTimestampRound (derived from current time). This ensures all - * replication logs up to the current time have been processed before transitioning the cluster - * from STANDBY to ACTIVE state. + * replication logs up to the current time have been processed and any pending rewind has completed + * before transitioning the cluster from STANDBY to ACTIVE state. * @return true if all conditions are met and failover should be triggered, false otherwise * @throws IOException if there's an error checking file status */ @@ -504,6 +505,14 @@ protected boolean shouldTriggerFailover() throws IOException { LOG.debug("Failover not triggered. failoverPending is false."); return false; } + // Check if replay state is SYNC; block failover during SYNCED_RECOVERY (rewind pending), + // DEGRADED, or NOT_INITIALIZED to prevent bypassing the rewind logic + if (replicationReplayState.get() != ReplicationReplayState.SYNC) { + LOG.debug("Failover not triggered. Replay state is {}, not SYNC.", + replicationReplayState.get()); + return false; + } + // Check if in-progress directory is empty boolean isInProgressDirectoryEmpty = replicationLogTracker.getInProgressFiles().isEmpty(); if (!isInProgressDirectoryEmpty) { diff --git a/phoenix-core/src/it/java/org/apache/phoenix/replication/reader/ReplicationLogDiscoveryReplayTestIT.java b/phoenix-core/src/it/java/org/apache/phoenix/replication/reader/ReplicationLogDiscoveryReplayTestIT.java index 2b162bea0e4..883e96e57a1 100644 --- a/phoenix-core/src/it/java/org/apache/phoenix/replication/reader/ReplicationLogDiscoveryReplayTestIT.java +++ b/phoenix-core/src/it/java/org/apache/phoenix/replication/reader/ReplicationLogDiscoveryReplayTestIT.java @@ -1922,6 +1922,8 @@ public void testShouldTriggerFailover() throws IOException { discovery.setLastRoundInSync(testRound); discovery.setLastRoundProcessed(testRound); discovery.setFailoverPending(true); + discovery.setReplicationReplayState( + ReplicationLogDiscoveryReplay.ReplicationReplayState.SYNC); assertTrue("Should trigger failover when all conditions are met", discovery.shouldTriggerFailover()); @@ -1953,6 +1955,8 @@ public void testShouldTriggerFailover() throws IOException { discovery.setLastRoundInSync(testRound); discovery.setLastRoundProcessed(testRound); discovery.setFailoverPending(true); + discovery.setReplicationReplayState( + ReplicationLogDiscoveryReplay.ReplicationReplayState.SYNC); assertFalse("Should not trigger failover when in-progress files are not empty", discovery.shouldTriggerFailover()); @@ -1969,6 +1973,8 @@ public void testShouldTriggerFailover() throws IOException { discovery.setLastRoundInSync(testRound); discovery.setLastRoundProcessed(testRound); discovery.setFailoverPending(true); + discovery.setReplicationReplayState( + ReplicationLogDiscoveryReplay.ReplicationReplayState.SYNC); assertFalse( "Should not trigger failover when new files exist from next round to current timestamp round", @@ -2018,6 +2024,8 @@ public void testShouldTriggerFailover() throws IOException { discovery.setLastRoundInSync(testRound); discovery.setLastRoundProcessed(testRound); discovery.setFailoverPending(true); + discovery.setReplicationReplayState( + ReplicationLogDiscoveryReplay.ReplicationReplayState.SYNC); assertFalse("Should not trigger failover when both in-progress and new files exist", discovery.shouldTriggerFailover()); @@ -2039,6 +2047,58 @@ public void testShouldTriggerFailover() throws IOException { discovery.shouldTriggerFailover()); } + // Test Case 9: SYNCED_RECOVERY state with all other conditions met - should return false + { + when(tracker.getInProgressFiles()).thenReturn(Collections.emptyList()); + when(tracker.getNewFiles(nextRoundToProcess, currentTimestampRound)) + .thenReturn(Collections.emptyList()); + TestableReplicationLogDiscoveryReplay discovery = + new TestableReplicationLogDiscoveryReplay(tracker, haGroupStoreRecord); + discovery.setLastRoundInSync(testRound); + discovery.setLastRoundProcessed(testRound); + discovery.setFailoverPending(true); + discovery.setReplicationReplayState( + ReplicationLogDiscoveryReplay.ReplicationReplayState.SYNCED_RECOVERY); + + assertFalse( + "Should not trigger failover when replay state is SYNCED_RECOVERY (rewind pending)", + discovery.shouldTriggerFailover()); + } + + // Test Case 10: DEGRADED state with all other conditions met - should return false + { + when(tracker.getInProgressFiles()).thenReturn(Collections.emptyList()); + when(tracker.getNewFiles(nextRoundToProcess, currentTimestampRound)) + .thenReturn(Collections.emptyList()); + TestableReplicationLogDiscoveryReplay discovery = + new TestableReplicationLogDiscoveryReplay(tracker, haGroupStoreRecord); + discovery.setLastRoundInSync(testRound); + discovery.setLastRoundProcessed(testRound); + discovery.setFailoverPending(true); + discovery.setReplicationReplayState( + ReplicationLogDiscoveryReplay.ReplicationReplayState.DEGRADED); + + assertFalse("Should not trigger failover when replay state is DEGRADED", + discovery.shouldTriggerFailover()); + } + + // Test Case 11: NOT_INITIALIZED state with all other conditions met - should return false + { + when(tracker.getInProgressFiles()).thenReturn(Collections.emptyList()); + when(tracker.getNewFiles(nextRoundToProcess, currentTimestampRound)) + .thenReturn(Collections.emptyList()); + TestableReplicationLogDiscoveryReplay discovery = + new TestableReplicationLogDiscoveryReplay(tracker, haGroupStoreRecord); + discovery.setLastRoundInSync(testRound); + discovery.setLastRoundProcessed(testRound); + discovery.setFailoverPending(true); + discovery.setReplicationReplayState( + ReplicationLogDiscoveryReplay.ReplicationReplayState.NOT_INITIALIZED); + + assertFalse("Should not trigger failover when replay state is NOT_INITIALIZED", + discovery.shouldTriggerFailover()); + } + } finally { EnvironmentEdgeManager.reset(); } From a63ece93c6cc562660e88e8afaef003d76d53b49 Mon Sep 17 00:00:00 2001 From: Himanshu Gwalani Date: Sun, 19 Apr 2026 12:20:30 +0530 Subject: [PATCH 2/2] Applying spotless --- .../reader/ReplicationLogDiscoveryReplay.java | 4 +- .../ReplicationLogDiscoveryReplayTestIT.java | 43 +++++++++++-------- 2 files changed, 26 insertions(+), 21 deletions(-) diff --git a/phoenix-core-server/src/main/java/org/apache/phoenix/replication/reader/ReplicationLogDiscoveryReplay.java b/phoenix-core-server/src/main/java/org/apache/phoenix/replication/reader/ReplicationLogDiscoveryReplay.java index bf099dbb0f2..f20ff7ae217 100644 --- a/phoenix-core-server/src/main/java/org/apache/phoenix/replication/reader/ReplicationLogDiscoveryReplay.java +++ b/phoenix-core-server/src/main/java/org/apache/phoenix/replication/reader/ReplicationLogDiscoveryReplay.java @@ -493,8 +493,8 @@ protected HAGroupStoreRecord getHAGroupRecord() throws IOException { * files exist from the next round to process up to the current timestamp round. The fourth * condition checks for new files in the range from nextRoundToProcess (derived from * getLastRoundProcessed()) to currentTimestampRound (derived from current time). This ensures all - * replication logs up to the current time have been processed and any pending rewind has completed - * before transitioning the cluster from STANDBY to ACTIVE state. + * replication logs up to the current time have been processed and any pending rewind has + * completed before transitioning the cluster from STANDBY to ACTIVE state. * @return true if all conditions are met and failover should be triggered, false otherwise * @throws IOException if there's an error checking file status */ diff --git a/phoenix-core/src/it/java/org/apache/phoenix/replication/reader/ReplicationLogDiscoveryReplayTestIT.java b/phoenix-core/src/it/java/org/apache/phoenix/replication/reader/ReplicationLogDiscoveryReplayTestIT.java index 883e96e57a1..8d7568f4227 100644 --- a/phoenix-core/src/it/java/org/apache/phoenix/replication/reader/ReplicationLogDiscoveryReplayTestIT.java +++ b/phoenix-core/src/it/java/org/apache/phoenix/replication/reader/ReplicationLogDiscoveryReplayTestIT.java @@ -23,6 +23,8 @@ import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import static org.mockito.Mockito.doReturn; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; import java.io.IOException; @@ -1922,8 +1924,8 @@ public void testShouldTriggerFailover() throws IOException { discovery.setLastRoundInSync(testRound); discovery.setLastRoundProcessed(testRound); discovery.setFailoverPending(true); - discovery.setReplicationReplayState( - ReplicationLogDiscoveryReplay.ReplicationReplayState.SYNC); + discovery + .setReplicationReplayState(ReplicationLogDiscoveryReplay.ReplicationReplayState.SYNC); assertTrue("Should trigger failover when all conditions are met", discovery.shouldTriggerFailover()); @@ -1955,8 +1957,8 @@ public void testShouldTriggerFailover() throws IOException { discovery.setLastRoundInSync(testRound); discovery.setLastRoundProcessed(testRound); discovery.setFailoverPending(true); - discovery.setReplicationReplayState( - ReplicationLogDiscoveryReplay.ReplicationReplayState.SYNC); + discovery + .setReplicationReplayState(ReplicationLogDiscoveryReplay.ReplicationReplayState.SYNC); assertFalse("Should not trigger failover when in-progress files are not empty", discovery.shouldTriggerFailover()); @@ -1973,8 +1975,8 @@ public void testShouldTriggerFailover() throws IOException { discovery.setLastRoundInSync(testRound); discovery.setLastRoundProcessed(testRound); discovery.setFailoverPending(true); - discovery.setReplicationReplayState( - ReplicationLogDiscoveryReplay.ReplicationReplayState.SYNC); + discovery + .setReplicationReplayState(ReplicationLogDiscoveryReplay.ReplicationReplayState.SYNC); assertFalse( "Should not trigger failover when new files exist from next round to current timestamp round", @@ -2024,8 +2026,8 @@ public void testShouldTriggerFailover() throws IOException { discovery.setLastRoundInSync(testRound); discovery.setLastRoundProcessed(testRound); discovery.setFailoverPending(true); - discovery.setReplicationReplayState( - ReplicationLogDiscoveryReplay.ReplicationReplayState.SYNC); + discovery + .setReplicationReplayState(ReplicationLogDiscoveryReplay.ReplicationReplayState.SYNC); assertFalse("Should not trigger failover when both in-progress and new files exist", discovery.shouldTriggerFailover()); @@ -2048,10 +2050,9 @@ public void testShouldTriggerFailover() throws IOException { } // Test Case 9: SYNCED_RECOVERY state with all other conditions met - should return false + // Also verifies short-circuit: no file I/O should occur when state is not SYNC { - when(tracker.getInProgressFiles()).thenReturn(Collections.emptyList()); - when(tracker.getNewFiles(nextRoundToProcess, currentTimestampRound)) - .thenReturn(Collections.emptyList()); + Mockito.clearInvocations(tracker); TestableReplicationLogDiscoveryReplay discovery = new TestableReplicationLogDiscoveryReplay(tracker, haGroupStoreRecord); discovery.setLastRoundInSync(testRound); @@ -2063,30 +2064,32 @@ public void testShouldTriggerFailover() throws IOException { assertFalse( "Should not trigger failover when replay state is SYNCED_RECOVERY (rewind pending)", discovery.shouldTriggerFailover()); + verify(tracker, never()).getInProgressFiles(); + verify(tracker, never()).getNewFiles(nextRoundToProcess, currentTimestampRound); } // Test Case 10: DEGRADED state with all other conditions met - should return false + // Also verifies short-circuit: no file I/O should occur when state is not SYNC { - when(tracker.getInProgressFiles()).thenReturn(Collections.emptyList()); - when(tracker.getNewFiles(nextRoundToProcess, currentTimestampRound)) - .thenReturn(Collections.emptyList()); + Mockito.clearInvocations(tracker); TestableReplicationLogDiscoveryReplay discovery = new TestableReplicationLogDiscoveryReplay(tracker, haGroupStoreRecord); discovery.setLastRoundInSync(testRound); discovery.setLastRoundProcessed(testRound); discovery.setFailoverPending(true); - discovery.setReplicationReplayState( - ReplicationLogDiscoveryReplay.ReplicationReplayState.DEGRADED); + discovery + .setReplicationReplayState(ReplicationLogDiscoveryReplay.ReplicationReplayState.DEGRADED); assertFalse("Should not trigger failover when replay state is DEGRADED", discovery.shouldTriggerFailover()); + verify(tracker, never()).getInProgressFiles(); + verify(tracker, never()).getNewFiles(nextRoundToProcess, currentTimestampRound); } // Test Case 11: NOT_INITIALIZED state with all other conditions met - should return false + // Also verifies short-circuit: no file I/O should occur when state is not SYNC { - when(tracker.getInProgressFiles()).thenReturn(Collections.emptyList()); - when(tracker.getNewFiles(nextRoundToProcess, currentTimestampRound)) - .thenReturn(Collections.emptyList()); + Mockito.clearInvocations(tracker); TestableReplicationLogDiscoveryReplay discovery = new TestableReplicationLogDiscoveryReplay(tracker, haGroupStoreRecord); discovery.setLastRoundInSync(testRound); @@ -2097,6 +2100,8 @@ public void testShouldTriggerFailover() throws IOException { assertFalse("Should not trigger failover when replay state is NOT_INITIALIZED", discovery.shouldTriggerFailover()); + verify(tracker, never()).getInProgressFiles(); + verify(tracker, never()).getNewFiles(nextRoundToProcess, currentTimestampRound); } } finally {