diff --git a/Containerfile b/Containerfile index 8dcbf7b..6f037c0 100644 --- a/Containerfile +++ b/Containerfile @@ -1,9 +1,16 @@ FROM fedora:latest -RUN dnf install -y python3-ogr python3-copr python3-koji python3-pip fedpkg krb5-workstation && dnf clean all +RUN dnf install -y python3-ogr python3-copr python3-koji python3-pip fedpkg krb5-workstation openssh-clients && dnf clean all RUN pip3 install --upgrade sentry-sdk && pip3 check +# Configure SSH to not prompt for host key verification +RUN mkdir -p /root/.ssh && \ + echo "Host pkgs.fedoraproject.org" >> /root/.ssh/config && \ + echo " StrictHostKeyChecking accept-new" >> /root/.ssh/config && \ + echo " UserKnownHostsFile /dev/null" >> /root/.ssh/config && \ + chmod 600 /root/.ssh/config + RUN pip3 install git+https://github.com/packit/validation.git CMD ["validation"] diff --git a/src/validation/deployment.py b/src/validation/deployment.py index fddbfab..f4b398b 100644 --- a/src/validation/deployment.py +++ b/src/validation/deployment.py @@ -27,6 +27,7 @@ class DeploymentInfo: name: str app_name: str pr_comment: str + pr_comment_test: str pr_comment_vm_image_build: str opened_pr_trigger__packit_yaml_fix: Optional[YamlFix] copr_user: str @@ -39,6 +40,7 @@ class DeploymentInfo: name="prod", app_name="Packit-as-a-Service", pr_comment="/packit build", + pr_comment_test="/packit test", pr_comment_vm_image_build="/packit vm-image-build", opened_pr_trigger__packit_yaml_fix=None, copr_user="packit", @@ -50,6 +52,7 @@ class DeploymentInfo: name="stg", app_name="Packit-as-a-Service-stg", pr_comment="/packit-stg build", + pr_comment_test="/packit-stg test", pr_comment_vm_image_build="/packit-stg vm-image-build", opened_pr_trigger__packit_yaml_fix=YamlFix( from_str="---", diff --git a/src/validation/helpers.py b/src/validation/helpers.py index 4b96e09..254efa9 100644 --- a/src/validation/helpers.py +++ b/src/validation/helpers.py @@ -36,7 +36,15 @@ def sentry_sdk(): if sentry_secret := getenv("SENTRY_SECRET"): import sentry_sdk - sentry_sdk.init(sentry_secret) + from validation.deployment import DEPLOYMENT + + # Set environment based on deployment to distinguish in Sentry + environment = f"validation-{DEPLOYMENT.name}" + + sentry_sdk.init( + sentry_secret, + environment=environment, + ) return sentry_sdk logging.warning("SENTRY_SECRET was not set!") diff --git a/src/validation/testcase/base.py b/src/validation/testcase/base.py index 187ebea..c56ecd5 100644 --- a/src/validation/testcase/base.py +++ b/src/validation/testcase/base.py @@ -173,7 +173,14 @@ def trigger_build(self): msg = "Cannot post comment: PR is not set" raise ValueError(msg) - comment = self.comment or self.deployment.pr_comment + # For skip_build tests, use test comment instead of build comment + if self.comment: + comment = self.comment + elif self.pr and "skip" in self.pr.title.lower() and "build" in self.pr.title.lower(): + comment = self.deployment.pr_comment_test + logging.info("Using test comment for skip_build test: %s", comment) + else: + comment = self.deployment.pr_comment try: self.pr.comment(comment) except GithubAPIException as e: diff --git a/src/validation/testcase/gitlab.py b/src/validation/testcase/gitlab.py index e13080d..2602623 100644 --- a/src/validation/testcase/gitlab.py +++ b/src/validation/testcase/gitlab.py @@ -2,14 +2,16 @@ # # SPDX-License-Identifier: MIT +import asyncio import logging -from datetime import timedelta +from datetime import datetime, timedelta, timezone from gitlab import GitlabGetError from ogr.abstract import CommitFlag, CommitStatus from ogr.services.gitlab import GitlabProject from validation.testcase.base import Testcase +from validation.utils.trigger import Trigger class GitlabTestcase(Testcase): @@ -134,3 +136,93 @@ def create_empty_commit(self, branch: str, commit_msg: str) -> str: } commit = self.project.gitlab_repo.commits.create(data) return commit.id + + async def check_pending_check_runs(self): + """ + Override to add extended wait time for GitLab opened PR webhook delays. + GitLab webhook delivery can be delayed by over an hour during high load. + """ + # First, try the normal check with standard timeouts + initial_failure_msg = self.failure_msg + await super().check_pending_check_runs() + + # If this is an opened PR trigger and the initial check failed + if ( + self.trigger == Trigger.pr_opened + and self.failure_msg != initial_failure_msg + and "Commit statuses did not appear in time" in self.failure_msg + ): + logging.error( + "GitLab webhook delivery delayed - statuses did not appear within %d minutes. " + "This is a known issue with GitLab webhook queuing during high load. " + "Waiting an additional 60 minutes for delayed webhook delivery...", + self.CHECK_TIME_FOR_STATUSES_TO_APPEAR, + ) + + # Clear the failure message and wait longer + self.failure_msg = initial_failure_msg + + # Wait up to 60 more minutes for statuses to appear + watch_end = datetime.now(tz=timezone.utc) + timedelta(minutes=60) + all_statuses = self.get_statuses() + status_names = [self.get_status_name(status) for status in all_statuses] + + while len(status_names) == 0: + if datetime.now(tz=timezone.utc) > watch_end: + logging.error( + "GitLab webhook still not received after extended 60 minute wait. " + "Total wait time: %d minutes. " + "This indicates a significant GitLab webhook delay.", + self.CHECK_TIME_FOR_STATUSES_TO_APPEAR + 60, + ) + self.failure_msg += ( + f"Commit statuses did not appear even after extended wait " + f"({self.CHECK_TIME_FOR_STATUSES_TO_APPEAR + 60} minutes total).\n" + "Note: GitLab webhook delivery was significantly delayed.\n" + ) + return + await asyncio.sleep(30) + all_statuses = self.get_statuses() + status_names = [self.get_status_name(status) for status in all_statuses] + + logging.error( + "GitLab webhook eventually received after extended wait. " + "Statuses appeared, continuing with test validation. " + "This delay is expected for GitLab during high load periods.", + ) + + # Continue with phase 2: wait for statuses to be set to pending + logging.info( + "Watching pending statuses for commit %s", + self.head_commit, + ) + + await asyncio.sleep(5) + + watch_end = datetime.now(tz=timezone.utc) + timedelta( + minutes=self.CHECK_TIME_FOR_REACTION, + ) + + while True: + if datetime.now(tz=timezone.utc) > watch_end: + self.failure_msg += ( + f"Commit statuses were not set to pending in time " + f"({self.CHECK_TIME_FOR_REACTION} minutes).\n" + ) + return + + new_statuses = [ + status + for status in self.get_statuses() + if self.get_status_name(status) in status_names + ] + + for status in new_statuses: + if not self.is_status_completed(status): + logging.info( + "At least one commit status is now pending/running: %s", + self.get_status_name(status), + ) + return + + await asyncio.sleep(30) diff --git a/src/validation/testcase/pagure.py b/src/validation/testcase/pagure.py index c571c90..7a906f3 100644 --- a/src/validation/testcase/pagure.py +++ b/src/validation/testcase/pagure.py @@ -229,15 +229,34 @@ def _setup_git_repo(self): git_hostname = hostname ssh_url = f"ssh://{user}@{git_hostname}/forks/{user}/{self.project.namespace}/{self.project.repo}.git" - logging.debug("Changing fork remote to SSH: %s", ssh_url) + logging.debug("Configuring fork remote to SSH: %s", ssh_url) - subprocess.run( # noqa: S603 - ["git", "remote", "set-url", user, ssh_url], # noqa: S607 + # Check if remote exists, create if not + check_remote = subprocess.run( # noqa: S603 + ["git", "remote", "get-url", user], # noqa: S607 cwd=self._temp_dir, - check=True, capture_output=True, + check=False, ) - logging.debug("Fork remote configured with SSH URL") + + if check_remote.returncode == 0: + # Remote exists, update URL + subprocess.run( # noqa: S603 + ["git", "remote", "set-url", user, ssh_url], # noqa: S607 + cwd=self._temp_dir, + check=True, + capture_output=True, + ) + logging.debug("Updated existing fork remote") + else: + # Remote doesn't exist, create it + subprocess.run( # noqa: S603 + ["git", "remote", "add", user, ssh_url], # noqa: S607 + cwd=self._temp_dir, + check=True, + capture_output=True, + ) + logging.debug("Created new fork remote") # Configure git to use SSH key if provided ssh_key_path = os.getenv("PAGURE_SSH_KEY") diff --git a/src/validation/tests/github.py b/src/validation/tests/github.py index ef8b0ec..b8366b2 100644 --- a/src/validation/tests/github.py +++ b/src/validation/tests/github.py @@ -15,8 +15,8 @@ class GithubTests(Tests): # We need at least 100 requests per test, and we run multiple comment tests # So require at least sufficient quota min_required_rate_limit = 1200 - # Space out tests to avoid hitting rate limits - test_stagger_seconds = 60 + # Space out tests to avoid hitting rate limits and reduce load on packit-service + test_stagger_seconds = 180 def __init__(self): github_service = GithubService(token=getenv("GITHUB_TOKEN")) diff --git a/src/validation/tests/gitlab.py b/src/validation/tests/gitlab.py index 7f39aaa..bc238ab 100644 --- a/src/validation/tests/gitlab.py +++ b/src/validation/tests/gitlab.py @@ -17,9 +17,9 @@ class GitlabTests(Tests): # (new PR, push trigger, comment tests) # GitLab has more generous limits than GitHub (2000/min vs 5000/hour) min_required_rate_limit = 250 - # Stagger tests by 60 seconds to avoid race conditions in packit-service + # Stagger tests to avoid overloading packit-service instances # when multiple events arrive simultaneously for the same project - test_stagger_seconds = 60 + test_stagger_seconds = 180 def __init__( self,