diff --git a/.github/workflows/integration_tests.yml b/.github/workflows/integration_tests.yml index d4a18b47..b2cdd268 100644 --- a/.github/workflows/integration_tests.yml +++ b/.github/workflows/integration_tests.yml @@ -5,6 +5,7 @@ on: push: branches: - main + - feat/gdb-failover # temporary permissions: id-token: write # This is required for requesting the JWT @@ -17,7 +18,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: [ "3.11", "3.12", "3.13" ] + python-version: [ "3.11", "3.12", "3.13" ] environment: [ "mysql", "pg" ] steps: diff --git a/aws_advanced_python_wrapper/failover_v2_plugin.py b/aws_advanced_python_wrapper/failover_v2_plugin.py index 578de683..04ec0314 100644 --- a/aws_advanced_python_wrapper/failover_v2_plugin.py +++ b/aws_advanced_python_wrapper/failover_v2_plugin.py @@ -17,7 +17,6 @@ from typing import TYPE_CHECKING, Any, Callable, Dict, Optional, Set from aws_advanced_python_wrapper.pep249_methods import DbApiMethod -from aws_advanced_python_wrapper.utils.utils import LogUtils if TYPE_CHECKING: from aws_advanced_python_wrapper.host_list_provider import HostListProviderService @@ -41,6 +40,7 @@ WrapperProperties) from aws_advanced_python_wrapper.utils.rds_url_type import RdsUrlType from aws_advanced_python_wrapper.utils.rds_utils import RdsUtils +from aws_advanced_python_wrapper.utils.retry_util import RetryUtil from aws_advanced_python_wrapper.utils.telemetry.telemetry import \ TelemetryTraceLevel @@ -326,55 +326,32 @@ def _failover_writer(self) -> None: "failover to writer host", TelemetryTraceLevel.NESTED) failover_start_time = time.time() + failover_end_time = failover_start_time + self._failover_timeout_sec + retry_util = RetryUtil() + result = None try: if not self._plugin_service.force_monitoring_refresh_host_list(True, self._failover_timeout_sec): raise FailoverFailedError(Messages.get("FailoverPlugin.UnableToRefreshHostList")) - updated_hosts = self._plugin_service.all_hosts - writer_candidate = next((host for host in updated_hosts if host.role == HostRole.WRITER), None) + result = retry_util.get_writer_connection( + self._plugin_service, self._properties, self, True, failover_end_time) - if writer_candidate is None: - raise FailoverFailedError(Messages.get_formatted( - "FailoverPlugin.NoWriterHostInTopology", - LogUtils.log_topology(updated_hosts))) - - logger.info("FailoverPlugin.FoundWriterCandidate", writer_candidate) - - allowed_hosts = self._plugin_service.hosts - if not any(host.host == writer_candidate.host and host.port == writer_candidate.port - for host in allowed_hosts): - raise FailoverFailedError( - Messages.get_formatted( - "FailoverPlugin.NewWriterNotAllowed", - "" if writer_candidate is None else writer_candidate.host, - LogUtils.log_topology(allowed_hosts))) - - try: - writer_candidate_conn = self._plugin_service.connect(writer_candidate, self._properties, self) - except Exception as e: - raise FailoverFailedError(Messages.get_formatted( - "FailoverPlugin.ExceptionConnectingToWriter", e)) - - role = self._plugin_service.get_host_role(writer_candidate_conn) - if role != HostRole.WRITER: - try: - self._plugin_service.driver_dialect.execute( - DbApiMethod.CONNECTION_CLOSE.method_name, lambda: writer_candidate_conn.close()) - except Exception: - pass - raise FailoverFailedError(Messages.get_formatted( - "FailoverPlugin.WriterFailoverConnectedToReader", - writer_candidate.host)) - - self._plugin_service.set_current_connection(writer_candidate_conn, writer_candidate) - logger.info("FailoverPlugin.EstablishedConnection", self._plugin_service.current_host_info) - self._throw_failover_success_exception() + if result.connection is not None and result.host_info is not None: + self._plugin_service.set_current_connection(result.connection, result.host_info) + result = None # Prevents closing the returned connection in the finally block. + logger.info("FailoverPlugin.EstablishedConnection", self._plugin_service.current_host_info) + self._throw_failover_success_exception() except FailoverSuccessError as ex: if telemetry_context: telemetry_context.set_success(True) telemetry_context.set_exception(ex) raise ex + except TimeoutError as ex: + if telemetry_context: + telemetry_context.set_success(False) + telemetry_context.set_exception(ex) + raise FailoverFailedError(str(ex)) except Exception as ex: if telemetry_context: telemetry_context.set_success(False) @@ -383,6 +360,8 @@ def _failover_writer(self) -> None: finally: elapsed_time = (time.time() - failover_start_time) * 1000 logger.info("FailoverPlugin.WriterFailoverTime", elapsed_time) + if result is not None and result.connection is not self._plugin_service.current_connection: + RetryUtil.close_connection(self._plugin_service, result.connection) if telemetry_context: telemetry_context.close_context() if self._telemetry_failover_additional_top_trace: diff --git a/aws_advanced_python_wrapper/gdb_failover_plugin.py b/aws_advanced_python_wrapper/gdb_failover_plugin.py new file mode 100644 index 00000000..23b8f015 --- /dev/null +++ b/aws_advanced_python_wrapper/gdb_failover_plugin.py @@ -0,0 +1,282 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). +# You may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import time +from typing import TYPE_CHECKING, Callable, List, Optional + +if TYPE_CHECKING: + from aws_advanced_python_wrapper.plugin_service import PluginService + +from aws_advanced_python_wrapper.errors import (AwsWrapperError, + FailoverFailedError, + FailoverSuccessError) +from aws_advanced_python_wrapper.failover_v2_plugin import FailoverV2Plugin +from aws_advanced_python_wrapper.hostinfo import HostInfo, HostRole +from aws_advanced_python_wrapper.plugin import Plugin, PluginFactory +from aws_advanced_python_wrapper.utils.gdb_failover_mode import \ + GlobalDbFailoverMode +from aws_advanced_python_wrapper.utils.log import Logger +from aws_advanced_python_wrapper.utils.messages import Messages +from aws_advanced_python_wrapper.utils.properties import (Properties, + WrapperProperties) +from aws_advanced_python_wrapper.utils.rds_url_type import RdsUrlType +from aws_advanced_python_wrapper.utils.retry_util import RetryUtil +from aws_advanced_python_wrapper.utils.telemetry.telemetry import \ + TelemetryTraceLevel +from aws_advanced_python_wrapper.utils.utils import LogUtils + +logger = Logger(__name__) + + +class GdbFailoverPlugin(FailoverV2Plugin): + + def __init__(self, plugin_service: PluginService, props: Properties): + super().__init__(plugin_service, props) + + self._home_region: Optional[str] = None + self._active_home_failover_mode: Optional[GlobalDbFailoverMode] = None + self._inactive_home_failover_mode: Optional[GlobalDbFailoverMode] = None + self._retry_util = RetryUtil() + + telemetry_factory = self._plugin_service.get_telemetry_factory() + self._failover_writer_triggered_counter = telemetry_factory.create_counter("writer_failover.triggered.count") + self._failover_writer_success_counter = telemetry_factory.create_counter( + "writer_failover.completed.success.count") + self._failover_writer_failed_counter = telemetry_factory.create_counter( + "writer_failover.completed.failed.count") + self._failover_reader_triggered_counter = telemetry_factory.create_counter("reader_failover.triggered.count") + self._failover_reader_success_counter = telemetry_factory.create_counter( + "reader_failover.completed.success.count") + self._failover_reader_failed_counter = telemetry_factory.create_counter( + "reader_failover.completed.failed.count") + + @staticmethod + def _inc(counter) -> None: + if counter is not None: + counter.inc() + + def _init_failover_mode(self) -> None: + if self._rds_url_type is not None: + return + + initial_host_info = self._host_list_provider_service.initial_connection_host_info + if initial_host_info is None: + raise AwsWrapperError(Messages.get("GdbFailoverPlugin.MissingInitialHost")) + self._rds_url_type = self._rds_helper.identify_rds_type(initial_host_info.host) + + self._home_region = WrapperProperties.FAILOVER_HOME_REGION.get(self._properties) + if self._home_region is None or not self._home_region.strip(): + if not self._rds_url_type.has_region: + raise AwsWrapperError(Messages.get("GdbFailoverPlugin.MissingHomeRegion")) + self._home_region = self._rds_helper.get_rds_region(initial_host_info.host) + if self._home_region is None or not self._home_region.strip(): + raise AwsWrapperError(Messages.get("GdbFailoverPlugin.MissingHomeRegion")) + + logger.debug("FailoverPlugin.ParameterValue", "failover_home_region", self._home_region) + + self._active_home_failover_mode = GlobalDbFailoverMode.from_value( + WrapperProperties.ACTIVE_HOME_FAILOVER_MODE.get(self._properties)) + self._inactive_home_failover_mode = GlobalDbFailoverMode.from_value( + WrapperProperties.INACTIVE_HOME_FAILOVER_MODE.get(self._properties)) + + default_mode = GlobalDbFailoverMode.STRICT_WRITER \ + if self._rds_url_type in (RdsUrlType.RDS_WRITER_CLUSTER, RdsUrlType.RDS_GLOBAL_WRITER_CLUSTER) \ + else GlobalDbFailoverMode.HOME_READER_OR_WRITER + if self._active_home_failover_mode is None: + self._active_home_failover_mode = default_mode + if self._inactive_home_failover_mode is None: + self._inactive_home_failover_mode = default_mode + + logger.debug("FailoverPlugin.ParameterValue", "active_home_failover_mode", self._active_home_failover_mode) + logger.debug("FailoverPlugin.ParameterValue", "inactive_home_failover_mode", self._inactive_home_failover_mode) + + def _is_home_region(self, region: Optional[str]) -> bool: + return self._home_region is not None and region is not None \ + and self._home_region.casefold() == region.casefold() + + def _failover(self) -> None: + context = self._plugin_service.get_telemetry_factory().open_telemetry_context( + "failover", TelemetryTraceLevel.NESTED) + + failover_start_time = time.time() + failover_end_time = failover_start_time + self._failover_timeout_sec + try: + logger.info("GdbFailoverPlugin.StartFailover") + + # It's expected that this synchronously returns once topology is stabilized, + # i.e. when the cluster control plane has already chosen a new writer. + if not self._plugin_service.force_monitoring_refresh_host_list(True, self._failover_timeout_sec): + # Assume this is a writer failover for telemetry purposes. + self._inc(self._failover_writer_triggered_counter) + self._inc(self._failover_writer_failed_counter) + logger.error("FailoverPlugin.UnableToRefreshHostList") + raise FailoverFailedError(Messages.get("FailoverPlugin.UnableToRefreshHostList")) + + updated_hosts = self._plugin_service.all_hosts + writer_candidate = next((host for host in updated_hosts if host.role == HostRole.WRITER), None) + + if writer_candidate is None: + self._inc(self._failover_writer_triggered_counter) + self._inc(self._failover_writer_failed_counter) + message = Messages.get_formatted( + "FailoverPlugin.NoWriterHostInTopology", LogUtils.log_topology(updated_hosts)) + logger.error(message) + raise FailoverFailedError(message) + + # Check writer region + writer_region = self._rds_helper.get_rds_region(writer_candidate.host) + is_home_region = self._is_home_region(writer_region) + logger.debug("GdbFailoverPlugin.IsHomeRegion", is_home_region) + + current_failover_mode = self._active_home_failover_mode if is_home_region \ + else self._inactive_home_failover_mode + logger.debug("GdbFailoverPlugin.CurrentFailoverMode", current_failover_mode) + + self._failover_with_mode(current_failover_mode, writer_candidate, failover_end_time) + + logger.info("FailoverPlugin.EstablishedConnection", self._plugin_service.current_host_info) + self._throw_failover_success_exception() + + except FailoverSuccessError as ex: + if context: + context.set_success(True) + context.set_exception(ex) + raise ex + except Exception as ex: + if context: + context.set_success(False) + context.set_exception(ex) + raise ex + finally: + elapsed_time = (time.time() - failover_start_time) * 1000 + logger.debug("GdbFailoverPlugin.FailoverElapsed", elapsed_time) + if context: + context.close_context() + if self._telemetry_failover_additional_top_trace: + self._plugin_service.get_telemetry_factory().post_copy( + context, TelemetryTraceLevel.FORCE_TOP_LEVEL) + + def _failover_with_mode( + self, + mode: Optional[GlobalDbFailoverMode], + writer_candidate: HostInfo, + failover_end_time: float) -> None: + match mode: + case GlobalDbFailoverMode.STRICT_WRITER: + self._failover_to_writer(writer_candidate, failover_end_time) + case GlobalDbFailoverMode.STRICT_HOME_READER: + self._failover_to_allowed_host( + lambda: [host for host in self._plugin_service.hosts + if host.role == HostRole.READER + and self._is_home_region(self._rds_helper.get_rds_region(host.host))], + HostRole.READER, + failover_end_time) + case GlobalDbFailoverMode.STRICT_OUT_OF_HOME_READER: + self._failover_to_allowed_host( + lambda: [host for host in self._plugin_service.hosts + if host.role == HostRole.READER + and not self._is_home_region(self._rds_helper.get_rds_region(host.host))], + HostRole.READER, + failover_end_time) + case GlobalDbFailoverMode.STRICT_ANY_READER: + self._failover_to_allowed_host( + lambda: [host for host in self._plugin_service.hosts if host.role == HostRole.READER], + HostRole.READER, + failover_end_time) + case GlobalDbFailoverMode.HOME_READER_OR_WRITER: + self._failover_to_allowed_host( + lambda: [host for host in self._plugin_service.hosts + if host.role == HostRole.WRITER + or (host.role == HostRole.READER + and self._is_home_region(self._rds_helper.get_rds_region(host.host)))], + None, + failover_end_time) + case GlobalDbFailoverMode.OUT_OF_HOME_READER_OR_WRITER: + self._failover_to_allowed_host( + lambda: [host for host in self._plugin_service.hosts + if host.role == HostRole.WRITER + or (host.role == HostRole.READER + and not self._is_home_region(self._rds_helper.get_rds_region(host.host)))], + None, + failover_end_time) + case GlobalDbFailoverMode.ANY_READER_OR_WRITER: + self._failover_to_allowed_host( + lambda: list(self._plugin_service.hosts), + None, + failover_end_time) + case _: + raise AwsWrapperError(Messages.get_formatted("GdbFailoverPlugin.UnsupportedFailoverMode", mode)) + + def _failover_to_writer(self, writer_candidate: HostInfo, failover_end_time: float) -> None: + self._inc(self._failover_writer_triggered_counter) + + result = None + try: + result = self._retry_util.get_writer_connection( + self._plugin_service, self._properties, self, True, failover_end_time) + self._plugin_service.set_current_connection(result.connection, result.host_info) + result = None # Prevents closing the returned connection in the finally block. + self._inc(self._failover_writer_success_counter) + except TimeoutError: + self._inc(self._failover_writer_failed_counter) + logger.error("FailoverPlugin.ExceptionConnectingToWriter", writer_candidate.host) + raise FailoverFailedError( + Messages.get_formatted("FailoverPlugin.ExceptionConnectingToWriter", writer_candidate.host)) + finally: + if result is not None and result.connection is not self._plugin_service.current_connection: + RetryUtil.close_connection(self._plugin_service, result.connection) + + def _failover_to_allowed_host( + self, + allowed_hosts: Callable[[], Optional[List[HostInfo]]], + verify_role: Optional[HostRole], + failover_end_time: float) -> None: + self._inc(self._failover_reader_triggered_counter) + + result = None + try: + result = self._retry_util.get_allowed_connection( + self._plugin_service, + self._properties, + self, + allowed_hosts, + self._failover_reader_host_selector_strategy, + verify_role, + failover_end_time) + self._plugin_service.set_current_connection(result.connection, result.host_info) + result = None # Prevents closing the returned connection in the finally block. + self._inc(self._failover_reader_success_counter) + except TimeoutError: + self._inc(self._failover_reader_failed_counter) + logger.error("FailoverPlugin.UnableToConnectToReader") + raise FailoverFailedError(Messages.get("FailoverPlugin.UnableToConnectToReader")) + finally: + if result is not None and result.connection is not self._plugin_service.current_connection: + RetryUtil.close_connection(self._plugin_service, result.connection) + + def _failover_reader(self) -> None: + # Not used by the GDB Failover Plugin. See _failover() for implementation details. + raise AwsWrapperError(Messages.get_formatted("Plugin.UnsupportedMethod", "_failover_reader")) + + def _failover_writer(self) -> None: + # Not used by the GDB Failover Plugin. See _failover() for implementation details. + raise AwsWrapperError(Messages.get_formatted("Plugin.UnsupportedMethod", "_failover_writer")) + + +class GdbFailoverPluginFactory(PluginFactory): + @staticmethod + def get_instance(plugin_service: PluginService, props: Properties) -> Plugin: + return GdbFailoverPlugin(plugin_service, props) diff --git a/aws_advanced_python_wrapper/plugin_service.py b/aws_advanced_python_wrapper/plugin_service.py index 82189c3d..72d16455 100644 --- a/aws_advanced_python_wrapper/plugin_service.py +++ b/aws_advanced_python_wrapper/plugin_service.py @@ -29,6 +29,8 @@ FastestResponseStrategyPluginFactory from aws_advanced_python_wrapper.federated_plugin import \ FederatedAuthPluginFactory +from aws_advanced_python_wrapper.gdb_failover_plugin import \ + GdbFailoverPluginFactory from aws_advanced_python_wrapper.limitless_plugin import LimitlessPluginFactory from aws_advanced_python_wrapper.okta_plugin import OktaAuthPluginFactory from aws_advanced_python_wrapper.states.session_state_service import ( @@ -828,6 +830,7 @@ class PluginManager(CanReleaseResources): "host_monitoring_v2": HostMonitoringV2PluginFactory, "failover": FailoverPluginFactory, "failover_v2": FailoverV2PluginFactory, + "gdb_failover": GdbFailoverPluginFactory, "read_write_splitting": ReadWriteSplittingPluginFactory, "srw": SimpleReadWriteSplittingPluginFactory, "fastest_response_strategy": FastestResponseStrategyPluginFactory, @@ -857,6 +860,7 @@ class PluginManager(CanReleaseResources): SimpleReadWriteSplittingPluginFactory: 310, FailoverPluginFactory: 400, FailoverV2PluginFactory: 410, + GdbFailoverPluginFactory: 420, HostMonitoringPluginFactory: 500, HostMonitoringV2PluginFactory: 510, BlueGreenPluginFactory: 550, diff --git a/aws_advanced_python_wrapper/resources/aws_advanced_python_wrapper_messages.properties b/aws_advanced_python_wrapper/resources/aws_advanced_python_wrapper_messages.properties index b5f0da3c..d73c2367 100644 --- a/aws_advanced_python_wrapper/resources/aws_advanced_python_wrapper_messages.properties +++ b/aws_advanced_python_wrapper/resources/aws_advanced_python_wrapper_messages.properties @@ -170,6 +170,22 @@ FailoverPlugin.WriterFailoverConnectedToReader=[Failover] Writer failover unexpe FailoverPlugin.WriterFailoverTime=[Failover] Writer failover elapsed: {}ms. FailoverPlugin.ReaderFailoverTime=[Failover] Reader failover elapsed: {}ms. +RetryUtil.NoWriterHost=[RetryUtil] Unable to find writer in updated host list: {} +RetryUtil.NewWriterNotAllowed=[RetryUtil] The failover process identified the new writer but the host is not in the list of allowed hosts. New writer host: '{}'. Allowed hosts: {} +RetryUtil.ExceptionConnectingToWriter=[RetryUtil] An exception occurred while trying to connect to the new writer '{}': {} +RetryUtil.CandidateNone=[RetryUtil] Unable to find a host with role '{}' in updated host list. +RetryUtil.Timeout=[RetryUtil] Not able to establish a connection before timing out. + +GdbFailoverPlugin.StartFailover=[GdbFailover] Starting Global Database failover procedure. +GdbFailoverPlugin.MissingInitialHost=[GdbFailover] Unable to retrieve the initial host info. Ensure connection string is correctly passed into the initial connection. +GdbFailoverPlugin.MissingHomeRegion=[GdbFailover] A home region is required. Set the 'failover_home_region' parameter or use a connection endpoint that includes a region. +GdbFailoverPlugin.IsHomeRegion=[GdbFailover] Global Database primary region is home region: {} +GdbFailoverPlugin.CurrentFailoverMode=[GdbFailover] Current failover mode: {} +GdbFailoverPlugin.FailoverElapsed=[GdbFailover] Failover elapsed: {}ms. +GdbFailoverPlugin.UnsupportedFailoverMode=[GdbFailover] Unsupported failover mode: {} + +GlobalDbFailoverMode.InvalidValue=[GlobalDbFailoverMode] Invalid Global Database failover mode value: '{}'. + FastestResponseStrategyPlugin.RandomHostSelected=[FastestResponseStrategyPlugin] Fastest host not calculated. Random host selected instead. FastestResponseStrategyPlugin.UnsupportedHostSelectorStrategy=[FastestResponseStrategyPlugin] Unsupported host selector strategy: '{}'. To use the fastest response strategy plugin, please ensure the property reader_host_selector_strategy is set to fastest_response. diff --git a/aws_advanced_python_wrapper/utils/gdb_failover_mode.py b/aws_advanced_python_wrapper/utils/gdb_failover_mode.py new file mode 100644 index 00000000..441d15e7 --- /dev/null +++ b/aws_advanced_python_wrapper/utils/gdb_failover_mode.py @@ -0,0 +1,52 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). +# You may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from enum import Enum, auto +from typing import Optional + +from aws_advanced_python_wrapper.errors import AwsWrapperError +from aws_advanced_python_wrapper.utils.messages import Messages + + +class GlobalDbFailoverMode(Enum): + STRICT_WRITER = auto() + STRICT_HOME_READER = auto() + STRICT_OUT_OF_HOME_READER = auto() + STRICT_ANY_READER = auto() + HOME_READER_OR_WRITER = auto() + OUT_OF_HOME_READER_OR_WRITER = auto() + ANY_READER_OR_WRITER = auto() + + @classmethod + def from_value(cls, value: Optional[str]) -> Optional["GlobalDbFailoverMode"]: + if value is None or not value.strip(): + return None + + normalized = value.strip().lower().replace("_", "-") + mode = _NAME_TO_VALUE.get(normalized) + if mode is None: + raise AwsWrapperError( + Messages.get_formatted("GlobalDbFailoverMode.InvalidValue", value)) + return mode + + +_NAME_TO_VALUE = { + "strict-writer": GlobalDbFailoverMode.STRICT_WRITER, + "strict-home-reader": GlobalDbFailoverMode.STRICT_HOME_READER, + "strict-out-of-home-reader": GlobalDbFailoverMode.STRICT_OUT_OF_HOME_READER, + "strict-any-reader": GlobalDbFailoverMode.STRICT_ANY_READER, + "home-reader-or-writer": GlobalDbFailoverMode.HOME_READER_OR_WRITER, + "out-of-home-reader-or-writer": GlobalDbFailoverMode.OUT_OF_HOME_READER_OR_WRITER, + "any-reader-or-writer": GlobalDbFailoverMode.ANY_READER_OR_WRITER, +} diff --git a/aws_advanced_python_wrapper/utils/properties.py b/aws_advanced_python_wrapper/utils/properties.py index efec458f..75a2518f 100644 --- a/aws_advanced_python_wrapper/utils/properties.py +++ b/aws_advanced_python_wrapper/utils/properties.py @@ -290,6 +290,27 @@ class WrapperProperties: "Enable/disable cluster-aware failover if the initial connection to the database fails due to a network exception.", False) + # GdbFailoverPlugin properties + FAILOVER_HOME_REGION = WrapperProperty( + "failover_home_region", + """Defines the home region for Global Database failover. Examples: 'us-west-2', 'us-east-1'. + If omitted, the value is parsed from the connection url when the endpoint includes a region. + This parameter is required when connecting using an IP address, custom domain, or Global Database + endpoint that has no region.""") + ACTIVE_HOME_FAILOVER_MODE = WrapperProperty( + "active_home_failover_mode", + """The failover mode to use when the Global Database primary region is the home region. + Possible values: strict-writer, strict-home-reader, strict-out-of-home-reader, strict-any-reader, + home-reader-or-writer, out-of-home-reader-or-writer, any-reader-or-writer. If omitted, the default + depends on the connection url (strict-writer for a writer/global writer cluster endpoint, otherwise + home-reader-or-writer).""") + INACTIVE_HOME_FAILOVER_MODE = WrapperProperty( + "inactive_home_failover_mode", + """The failover mode to use when the Global Database primary region is not the home region. + Possible values are the same as for active_home_failover_mode. If omitted, the default depends on + the connection url (strict-writer for a writer/global writer cluster endpoint, otherwise + home-reader-or-writer).""") + # ClusterTopologyMonitor properties CLUSTER_TOPOLOGY_HIGH_REFRESH_RATE_MS = WrapperProperty( "cluster_topology_high_refresh_rate_ms", diff --git a/aws_advanced_python_wrapper/utils/rds_utils.py b/aws_advanced_python_wrapper/utils/rds_utils.py index e8cce41c..aed146ca 100644 --- a/aws_advanced_python_wrapper/utils/rds_utils.py +++ b/aws_advanced_python_wrapper/utils/rds_utils.py @@ -108,7 +108,7 @@ class RdsUtils: r"(?Pcluster-|cluster-ro-)+" \ r"(?P[a-zA-Z0-9]+\.rds\.(?P[a-zA-Z0-9\-]+)" \ r"\.(amazonaws\.com|c2s\.ic\.gov|sc2s\.sgov\.gov))$" - ELB_PATTERN = r"^(?.+)\.elb\.((?[a-zA-Z0-9\-]+)\.amazonaws\.com)$" + ELB_PATTERN = r"^(?P.+)\.elb\.((?P[a-zA-Z0-9\-]+)\.amazonaws\.com)$" IP_V4 = r"^(([1-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){1}" \ r"(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){2}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])" diff --git a/aws_advanced_python_wrapper/utils/retry_util.py b/aws_advanced_python_wrapper/utils/retry_util.py new file mode 100644 index 00000000..c461be35 --- /dev/null +++ b/aws_advanced_python_wrapper/utils/retry_util.py @@ -0,0 +1,171 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). +# You may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import time +from typing import TYPE_CHECKING, Callable, List, Optional + +from aws_advanced_python_wrapper.host_availability import HostAvailability +from aws_advanced_python_wrapper.hostinfo import HostInfo, HostRole +from aws_advanced_python_wrapper.pep249_methods import DbApiMethod +from aws_advanced_python_wrapper.utils.log import Logger +from aws_advanced_python_wrapper.utils.messages import Messages +from aws_advanced_python_wrapper.utils.utils import LogUtils + +if TYPE_CHECKING: + from aws_advanced_python_wrapper.pep249 import Connection + from aws_advanced_python_wrapper.plugin import Plugin + from aws_advanced_python_wrapper.plugin_service import PluginService + from aws_advanced_python_wrapper.utils.properties import Properties + +logger = Logger(__name__) + + +class RetryUtil: + _SHORT_DELAY_SEC = 0.1 + + class Results: + def __init__(self, connection: Connection, host_info: HostInfo): + self._connection = connection + self._host_info = host_info + + @property + def connection(self) -> Connection: + return self._connection + + @property + def host_info(self) -> HostInfo: + return self._host_info + + def get_writer_connection( + self, + plugin_service: PluginService, + properties: Properties, + plugin: Optional[Plugin], + verify_role: bool, + timeout_end_time: float) -> RetryUtil.Results: + def allowed_writer_hosts() -> Optional[List[HostInfo]]: + updated_hosts = plugin_service.all_hosts + writer = next((host for host in updated_hosts if host.role == HostRole.WRITER), None) + if writer is None: + logger.debug("RetryUtil.NoWriterHost", LogUtils.log_topology(updated_hosts)) + return None + + allowed_hosts = plugin_service.hosts + if not any(host.host == writer.host and host.port == writer.port for host in allowed_hosts): + logger.debug("RetryUtil.NewWriterNotAllowed", writer.host, LogUtils.log_topology(allowed_hosts)) + return None + return [writer] + + return self.get_allowed_connection( + plugin_service, + properties, + plugin, + allowed_writer_hosts, + None, + HostRole.WRITER if verify_role else None, + timeout_end_time) + + def get_allowed_connection( + self, + plugin_service: PluginService, + properties: Properties, + plugin: Optional[Plugin], + allowed_hosts: Callable[[], Optional[List[HostInfo]]], + strategy: Optional[str], + verify_role: Optional[HostRole], + retry_end_time: float) -> RetryUtil.Results: + if strategy is None or not strategy.strip(): + strategy = "random" + + candidate_conn: Optional[Connection] = None + try: + while time.time() < retry_end_time: + # The roles in this list might not be accurate, depending on whether the new + # topology has become available yet. + plugin_service.refresh_host_list() + updated_allowed_hosts = allowed_hosts() + if updated_allowed_hosts is None: + self._short_delay() + continue + + # Make a copy of hosts and mark them available so the host selector considers them. + remaining_hosts = [self._available_copy(host) for host in updated_allowed_hosts] + if not remaining_hosts: + self._short_delay() + continue + + while remaining_hosts and time.time() < retry_end_time: + candidate_host = None + try: + # The host selector requires a non-null role, so default to READER when + # no specific role needs to be verified. + candidate_host = plugin_service.get_host_info_by_strategy( + verify_role if verify_role is not None else HostRole.READER, + strategy, + remaining_hosts) + except Exception: + # Strategy can't get a host according to the requested conditions. + # Do nothing + pass + + if candidate_host is None: + logger.debug("RetryUtil.CandidateNone", verify_role) + self._short_delay() + break # Exit loop over remaining_hosts and refresh topology. + + try: + candidate_conn = plugin_service.connect(candidate_host, properties, plugin) + # Roles in the host list might be stale, so verify the role with a query. + role = plugin_service.get_host_role(candidate_conn) if verify_role is not None else None + if verify_role is None or verify_role == role: + updated_host_info = HostInfo( + candidate_host.host, + candidate_host.port, + role if role is not None else candidate_host.role) + result = RetryUtil.Results(candidate_conn, updated_host_info) + candidate_conn = None # Prevents closing the returned connection below. + return result + except Exception as ex: + logger.debug("RetryUtil.ExceptionConnectingToWriter", candidate_host.host, ex) + + # The connection couldn't be opened or the role is not as expected, so it is not valid. + remaining_hosts = [host for host in remaining_hosts + if not (host.host == candidate_host.host and host.port == candidate_host.port)] + if candidate_conn is not None: + self.close_connection(plugin_service, candidate_conn) + candidate_conn = None + + raise TimeoutError(Messages.get("RetryUtil.Timeout")) + finally: + if candidate_conn is not None: + self.close_connection(plugin_service, candidate_conn) + + @staticmethod + def _available_copy(host: HostInfo) -> HostInfo: + host_copy = host.__copy__() + host_copy.set_availability(HostAvailability.AVAILABLE) + return host_copy + + @staticmethod + def close_connection(plugin_service: PluginService, conn: Connection) -> None: + try: + plugin_service.driver_dialect.execute( + DbApiMethod.CONNECTION_CLOSE.method_name, lambda: conn.close()) + except Exception: + pass + + def _short_delay(self) -> None: + time.sleep(self._SHORT_DELAY_SEC) diff --git a/docs/using-the-python-wrapper/using-plugins/UsingTheGdbFailoverPlugin.md b/docs/using-the-python-wrapper/using-plugins/UsingTheGdbFailoverPlugin.md new file mode 100644 index 00000000..fed616bd --- /dev/null +++ b/docs/using-the-python-wrapper/using-plugins/UsingTheGdbFailoverPlugin.md @@ -0,0 +1,106 @@ +# Global Database (GDB) Failover Plugin +The AWS Advanced Python Wrapper uses the GDB Failover Plugin to provide minimal downtime in the event of a DB instance failure for Amazon Aurora Global Databases. The plugin is based on the [Failover Plugin v2](./UsingTheFailover2Plugin.md) and unless explicitly stated otherwise, most of the information and suggestions for the [Failover Plugin](./UsingTheFailoverPlugin.md) and [Failover Plugin v2](./UsingTheFailover2Plugin.md) are applicable to the GDB Failover Plugin. + +## Differences between the GDB Failover Plugin and the Failover Plugin v2 + +The GDB Failover Plugin introduces the notion of a *home region* and extends the configuration parameters to allow setting different failover logic for **in-home** and **out-of-home** cases. + +Driver configuration for the GDB Failover Plugin should include a home region defined with an AWS region name. This introduces two cases: +- **in-home**: when the primary region of the Global Database is the same as the specified home region. +- **out-of-home**: when GDB switches over to another region and the primary region is not the same as the specified home region. + +Users are allowed to specify different failover logic individually for the **in-home** and **out-of-home** cases. The following are two examples where in-home and out-of-home cases impact the failover logic. + +**Example 1** + +When a user application needs a writer connection, it makes sense to define the failover mode to follow the writer (`strict-writer`). However, some applications may choose not to follow a writer node when cross-region failover occurs (see [Configuration Example 3](#configuration-example-3) below). +- **in-home**: when in-region failover occurs, the wrapper reconnects to a new writer node and continues serving the user application with a write connection. +- **out-of-home**: the application turns to inactive mode and stops performing writes, prioritizing performance due to reduced connection latency over being connected to a writer node in another region. + +**Example 2** + +When a user application needs a reader connection, prioritize reader connections in the home region to reduce connection latency (see [Configuration Example 2](#configuration-example-2) below). +- **in-home**: the primary region of the Global Database is the same as the specified home region; connect to any such reader. +- **out-of-home**: connect to readers that are in the specified home region; these readers are no longer part of the primary region of the Global Database. + +The mentioned scenarios can be configured with the GDB Failover Plugin. The [Failover Plugin v2](./UsingTheFailover2Plugin.md) and [Failover Plugin](./UsingTheFailoverPlugin.md) don't support the home region and can't be used for the cases mentioned above. + +## Using the GDB Failover Plugin +The GDB Failover Plugin will not be enabled by default. To enable it, explicitly include the plugin code `gdb_failover` in the [`plugins`](../UsingThePythonWrapper.md#connection-plugin-manager-parameters) value, or add it to the current [driver profile](../UsingThePythonWrapper.md#connection-plugin-manager-parameters). After you load the plugin, the failover feature will be enabled. + +Please refer to the [failover configuration guide](../FailoverConfigurationGuide.md) for tips to keep in mind when using the failover plugins. + +> [!WARNING] +> Do not use the `gdb_failover`, `failover`, and/or `failover_v2` plugins (or their combination) at the same time for the same connection! + +The GDB Failover Plugin requires a Global Aurora dialect. Set `dialect` to `global-aurora-mysql` or `global-aurora-pg`, and provide the per-region instance host patterns via `global_cluster_instance_host_patterns`. For complete Aurora Global Database configuration, see the [Aurora Global Databases](https://docs.aws.amazon.com/AmazonRDS/latest/AuroraUserGuide/aurora-global-database.html) documentation. + +### GDB Failover Plugin Configuration Parameters +In addition to the parameters that you can configure for the underlying driver, you can pass the following parameters for the AWS Advanced Python Wrapper to specify additional failover behavior. + +| Parameter | Value | Required | Description | Default Value | +|------------------------------------------|:--------|:------------------------------------------------------------------------------------------------------------------------------------------:|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `failover_home_region` | String | If connecting using an IP address, a custom domain URL, a Global Database endpoint, or another endpoint with no region: Yes

Otherwise: No | Defines a home region. Examples: `us-west-2`, `us-east-1`. If this parameter is omitted, the value is parsed from the connection url. For regional cluster endpoints and instance endpoints, it's set to the region of the provided endpoint. If the provided endpoint has no region (for example, a Global Database endpoint or IP address) this parameter is mandatory. | | +| `active_home_failover_mode` | String | No | Defines the failover mode when the GDB primary region **is the home region**. Possible values: `strict-writer`, `strict-home-reader`, `strict-out-of-home-reader`, `strict-any-reader`, `home-reader-or-writer`, `out-of-home-reader-or-writer`, `any-reader-or-writer`. See [Failover Modes](#failover-modes) below. | Depends on connection url. For an Aurora writer cluster endpoint or Global Database endpoint, it's `strict-writer`. Otherwise, it's `home-reader-or-writer`. | +| `inactive_home_failover_mode` | String | No | Defines the failover mode when the GDB primary region **is not the home region**. Possible values are the same as for `active_home_failover_mode`. | Depends on connection url. For an Aurora writer cluster endpoint or Global Database endpoint, it's `strict-writer`. Otherwise, it's `home-reader-or-writer`. | +| `global_cluster_instance_host_patterns` | String | For Global Databases: Yes

Otherwise: No | A comma-separated list of instance host patterns, one per region of the Global Database. A `?` character in each pattern is a placeholder for the DB instance identifiers. Each entry uses the format `region:host:port` or `region:host`. Example: `us-east-2:?.XYZ1.us-east-2.rds.amazonaws.com,us-west-2:?.XYZ2.us-west-2.rds.amazonaws.com`. | | +| `failover_timeout_sec` | Integer | No | Maximum allowed time in seconds to attempt reconnecting to a new writer or reader instance after a cluster failover is initiated. | `300` | +| `cluster_topology_high_refresh_rate_ms` | Integer | No | Interval of time in milliseconds to wait between attempts to update cluster topology after the writer has come back online following a failover event. | `100` | +| `failover_reader_host_selector_strategy` | String | No | Strategy used to select a reader node during failover. For more information on the available reader selection strategies, see this [table](../ReaderSelectionStrategies.md). | `random` | +| `telemetry_failover_additional_top_trace`| Boolean | No | Allows the wrapper to produce an additional telemetry span associated with failover. | `False` | + +### Failover Modes +The `active_home_failover_mode` and `inactive_home_failover_mode` parameters accept the following values: + +- `strict-writer` - Failover follows the writer node and connects to a new writer (in any region) when it changes. +- `strict-home-reader` - Connect to any available reader node in the home region. If no reader is available, raise an error. +- `strict-out-of-home-reader` - Connect to any available reader node that is *not* in the home region. If no reader is available, raise an error. +- `strict-any-reader` - Connect to any available reader node in any region. If no reader is available, raise an error. +- `home-reader-or-writer` - Connect to any available reader node in the home region. If no such reader is available, connect to a writer node (in any region). +- `out-of-home-reader-or-writer` - Connect to any available reader node that is *not* in the home region. If no such reader is available, connect to a writer node (in any region). +- `any-reader-or-writer` - Connect to any available reader or writer node in any region. + +Please refer to the original [Failover Plugin](./UsingTheFailoverPlugin.md) and [Failover Plugin v2](./UsingTheFailover2Plugin.md) for more details about error codes, configurations, connection pooling, and sample code. + +### Failover Configuration Examples + +#### Configuration Example 1 +**Goal:** Provide a user application with a writer connection. The application is deployed in the `us-west-1` region and connects to a Global Database with the `us-east-1`, `us-east-2`, and `us-west-1` regions. + +**Solution:** Use the following configuration parameters: +- `failover_home_region=us-west-1` +- `active_home_failover_mode=strict-writer` +- `inactive_home_failover_mode=strict-writer` +- `global_cluster_instance_host_patterns=us-east-1:?.XYZ1.us-east-1.rds.amazonaws.com,us-east-2:?.XYZ2.us-east-2.rds.amazonaws.com,us-west-1:?.XYZ3.us-west-1.rds.amazonaws.com` (replace `XYZ1`, `XYZ2`, `XYZ3` with values that correspond to your database) +- `dialect=global-aurora-mysql` (or `global-aurora-pg`) +- `plugins=initial_connection,gdb_failover,host_monitoring_v2` +- use the Global Database endpoint in your connection string + +#### Configuration Example 2 +**Goal:** Provide a user application with a reader connection in `us-west-1`. The application is deployed in the `us-west-1` region. If the GDB primary region switches over, prioritize reader connections from the `us-west-1` region. + +**Solution:** Use the following configuration parameters: +- `failover_home_region=us-west-1` +- `active_home_failover_mode=strict-home-reader` +- `inactive_home_failover_mode=strict-home-reader` +- `global_cluster_instance_host_patterns=us-east-1:?.XYZ1.us-east-1.rds.amazonaws.com,us-east-2:?.XYZ2.us-east-2.rds.amazonaws.com,us-west-1:?.XYZ3.us-west-1.rds.amazonaws.com` +- `dialect=global-aurora-mysql` (or `global-aurora-pg`) +- `plugins=initial_connection,gdb_failover,host_monitoring_v2` +- use the cluster reader endpoint in region `us-west-1` in your connection string + +#### Configuration Example 3 +**Goal:** Provide a user application with a writer connection when the primary region is closest to the deployed region. When the primary region switches over to another region and network latency becomes unacceptable, the application deployment in region `us-west-1` becomes inactive and lets applications in other regions process business transactions. + +**Solution:** Use the following configuration parameters for the application deployment in the `us-west-1` region: +- `failover_home_region=us-west-1` +- `active_home_failover_mode=strict-writer` +- `inactive_home_failover_mode=strict-any-reader` +- `global_cluster_instance_host_patterns=us-east-1:?.XYZ1.us-east-1.rds.amazonaws.com,us-east-2:?.XYZ2.us-east-2.rds.amazonaws.com,us-west-1:?.XYZ3.us-west-1.rds.amazonaws.com` +- `dialect=global-aurora-mysql` (or `global-aurora-pg`) +- `plugins=initial_connection,gdb_failover,host_monitoring_v2` +- use the cluster writer endpoint in region `us-west-1` in your connection string + +**Explanation:** While the primary region of the GDB is `us-west-1`, the application deployed in this region needs writable connections. By using the cluster writer endpoint, the wrapper opens connections to a writer node. If in-region failover occurs, the wrapper reconnects to a new writer node in the same `us-west-1` region (in-home, `active_home_failover_mode=strict-writer`). When a cross-region failover event occurs and the new writer is, for example, in `us-east-1`, the wrapper uses `inactive_home_failover_mode=strict-any-reader` and serves a reader connection from any available region. + +> [!WARNING] +> These examples cover failover and failover-related settings only. A complete driver configuration may require settings for other plugins. diff --git a/tests/integration/container/test_aurora_failover.py b/tests/integration/container/test_aurora_failover.py index 18c25c9f..883a47b1 100644 --- a/tests/integration/container/test_aurora_failover.py +++ b/tests/integration/container/test_aurora_failover.py @@ -79,6 +79,10 @@ def props(self): "cluster_id": "cluster1" }) + WrapperProperties.FAILOVER_HOME_REGION.set(p, TestEnvironment.get_current().get_aurora_region()) + WrapperProperties.ACTIVE_HOME_FAILOVER_MODE.set(p, "strict-writer") + WrapperProperties.INACTIVE_HOME_FAILOVER_MODE.set(p, "strict-writer") + features = TestEnvironment.get_current().get_features() if TestEnvironmentFeatures.TELEMETRY_TRACES_ENABLED in features \ or TestEnvironmentFeatures.TELEMETRY_METRICS_ENABLED in features: @@ -98,7 +102,7 @@ def proxied_props(self, props, conn_utils): WrapperProperties.CLUSTER_INSTANCE_HOST_PATTERN.set(props_copy, f"?.{endpoint_suffix}:{conn_utils.proxy_port}") return props_copy - @pytest.mark.parametrize("plugins", ["failover", "failover_v2"]) + @pytest.mark.parametrize("plugins", ["failover", "failover_v2", "gdb_failover"]) @enable_on_features([TestEnvironmentFeatures.FAILOVER_SUPPORTED]) def test_fail_from_writer_to_new_writer_fail_on_connection_invocation( self, test_driver: TestDriver, props, conn_utils, aurora_utility, plugins): @@ -119,7 +123,7 @@ def test_fail_from_writer_to_new_writer_fail_on_connection_invocation( assert aurora_utility.is_db_instance_writer(current_connection_id) is True assert current_connection_id != initial_writer_id - @pytest.mark.parametrize("plugins", ["failover", "failover_v2"]) + @pytest.mark.parametrize("plugins", ["failover", "failover_v2", "gdb_failover"]) @enable_on_features([TestEnvironmentFeatures.FAILOVER_SUPPORTED]) def test_fail_from_writer_to_new_writer_fail_on_connection_bound_object_invocation( self, test_driver: TestDriver, props, conn_utils, aurora_utility, plugins): @@ -141,7 +145,8 @@ def test_fail_from_writer_to_new_writer_fail_on_connection_bound_object_invocati assert current_connection_id != initial_writer_id @pytest.mark.parametrize("plugins", ["failover,host_monitoring", "failover,host_monitoring_v2", - "failover_v2,host_monitoring", "failover_v2,host_monitoring_v2"]) + "failover_v2,host_monitoring", "failover_v2,host_monitoring_v2", + "gdb_failover,host_monitoring", "gdb_failover,host_monitoring_v2"]) @enable_on_features([TestEnvironmentFeatures.NETWORK_OUTAGES_ENABLED, TestEnvironmentFeatures.ABORT_CONNECTION_SUPPORTED]) def test_fail_from_reader_to_writer( @@ -168,7 +173,7 @@ def test_fail_from_reader_to_writer( assert writer_id == current_connection_id assert aurora_utility.is_db_instance_writer(current_connection_id) is True - @pytest.mark.parametrize("plugins", ["failover", "failover_v2"]) + @pytest.mark.parametrize("plugins", ["failover", "failover_v2", "gdb_failover"]) @enable_on_features([TestEnvironmentFeatures.FAILOVER_SUPPORTED]) def test_fail_from_writer_with_session_states_autocommit(self, test_driver: TestDriver, props, conn_utils, aurora_utility, plugins): @@ -209,7 +214,7 @@ def test_fail_from_writer_with_session_states_autocommit(self, test_driver: Test # Assert autocommit is still False after failover. assert conn.autocommit is False - @pytest.mark.parametrize("plugins", ["failover", "failover_v2"]) + @pytest.mark.parametrize("plugins", ["failover", "failover_v2", "gdb_failover"]) @enable_on_features([TestEnvironmentFeatures.FAILOVER_SUPPORTED]) def test_fail_from_writer_with_session_states_readonly(self, test_driver: TestDriver, props, conn_utils, aurora_utility, plugins): @@ -237,7 +242,7 @@ def test_fail_from_writer_with_session_states_readonly(self, test_driver: TestDr # Assert readonly is still True after failover. assert conn.read_only is True - @pytest.mark.parametrize("plugins", ["failover", "failover_v2"]) + @pytest.mark.parametrize("plugins", ["failover", "failover_v2", "gdb_failover"]) @enable_on_features([TestEnvironmentFeatures.FAILOVER_SUPPORTED]) def test_writer_fail_within_transaction_set_autocommit_false( self, test_driver: TestDriver, test_environment: TestEnvironment, props, conn_utils, aurora_utility, plugins): @@ -280,7 +285,7 @@ def test_writer_fail_within_transaction_set_autocommit_false( cursor_3.execute("DROP TABLE IF EXISTS test3_2") conn.commit() - @pytest.mark.parametrize("plugins", ["failover", "failover_v2"]) + @pytest.mark.parametrize("plugins", ["failover", "failover_v2", "gdb_failover"]) @enable_on_features([TestEnvironmentFeatures.FAILOVER_SUPPORTED]) def test_writer_fail_within_transaction_start_transaction( self, test_driver: TestDriver, test_environment: TestEnvironment, props, conn_utils, aurora_utility, @@ -326,7 +331,8 @@ def test_writer_fail_within_transaction_start_transaction( cursor_3.execute("DROP TABLE IF EXISTS test3_3") conn.commit() - @pytest.mark.parametrize("plugins", ["aurora_connection_tracker,failover", "aurora_connection_tracker,failover_v2"]) + @pytest.mark.parametrize("plugins", ["aurora_connection_tracker,failover", "aurora_connection_tracker,failover_v2", + "aurora_connection_tracker,gdb_failover"]) @enable_on_features([TestEnvironmentFeatures.FAILOVER_SUPPORTED]) @pytest.mark.repeat(5) # Run this test case a few more times since it is a flakey test def test_writer_failover_in_idle_connections( diff --git a/tests/unit/test_failover_v2_plugin.py b/tests/unit/test_failover_v2_plugin.py index e3614010..030bce70 100644 --- a/tests/unit/test_failover_v2_plugin.py +++ b/tests/unit/test_failover_v2_plugin.py @@ -216,6 +216,7 @@ def test_failover_writer_success(self, failover_v2_plugin): failover_v2_plugin._plugin_service.force_monitoring_refresh_host_list.return_value = True failover_v2_plugin._plugin_service.all_hosts = [writer_host] failover_v2_plugin._plugin_service.hosts = [writer_host] + failover_v2_plugin._plugin_service.get_host_info_by_strategy.return_value = writer_host failover_v2_plugin._plugin_service.connect.return_value = MagicMock() failover_v2_plugin._plugin_service.get_host_role.return_value = HostRole.WRITER failover_v2_plugin._throw_failover_success_exception = MagicMock(side_effect=FailoverSuccessError()) @@ -227,8 +228,10 @@ def test_failover_writer_success(self, failover_v2_plugin): def test_failover_writer_no_writer_found(self, failover_v2_plugin): reader_host = HostInfo("reader.com", 5432, HostRole.READER) + failover_v2_plugin._failover_timeout_sec = 0.05 failover_v2_plugin._plugin_service.force_monitoring_refresh_host_list.return_value = True failover_v2_plugin._plugin_service.all_hosts = [reader_host] + failover_v2_plugin._plugin_service.hosts = [reader_host] with pytest.raises(FailoverFailedError): failover_v2_plugin._failover_writer() @@ -241,9 +244,11 @@ def test_failover_writer_refresh_failed(self, failover_v2_plugin): def test_failover_writer_connection_failed(self, failover_v2_plugin): writer_host = HostInfo("new-writer.com", 5432, HostRole.WRITER) + failover_v2_plugin._failover_timeout_sec = 0.05 failover_v2_plugin._plugin_service.force_monitoring_refresh_host_list.return_value = True failover_v2_plugin._plugin_service.all_hosts = [writer_host] failover_v2_plugin._plugin_service.hosts = [writer_host] + failover_v2_plugin._plugin_service.get_host_info_by_strategy.return_value = writer_host failover_v2_plugin._plugin_service.connect.side_effect = Exception("Connection failed") with pytest.raises(FailoverFailedError): @@ -251,9 +256,11 @@ def test_failover_writer_connection_failed(self, failover_v2_plugin): def test_failover_writer_connected_to_reader(self, failover_v2_plugin): writer_host = HostInfo("new-writer.com", 5432, HostRole.WRITER) + failover_v2_plugin._failover_timeout_sec = 0.05 failover_v2_plugin._plugin_service.force_monitoring_refresh_host_list.return_value = True failover_v2_plugin._plugin_service.all_hosts = [writer_host] failover_v2_plugin._plugin_service.hosts = [writer_host] + failover_v2_plugin._plugin_service.get_host_info_by_strategy.return_value = writer_host failover_v2_plugin._plugin_service.connect.return_value = MagicMock() failover_v2_plugin._plugin_service.get_host_role.return_value = HostRole.READER