Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 7 additions & 3 deletions percy/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,14 @@ def get_cache(cls, session_id, property):

@classmethod
def cleanup_cache(cls):
# Iterate a list copy so we can rewrite (or in future, delete) entries
# without `RuntimeError: dictionary changed size during iteration`.
now = time.time()
for session_id, session in cls.CACHE.items():
timestamp = session[cls.TIMEOUT_KEY]
for session_id, session in list(cls.CACHE.items()):
timestamp = session.get(cls.TIMEOUT_KEY)
if timestamp is None:
continue
if now - timestamp >= cls.CACHE_TIMEOUT:
cls.CACHE[session_id] = {
cls.session_details: session[cls.session_details]
cls.session_details: session.get(cls.session_details)
}
147 changes: 147 additions & 0 deletions percy/screenshot.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,146 @@ def fetch_percy_dom():
return response.text


def _get_origin(url):
"""Return scheme://host:port for a URL, or '' if it can't be parsed."""
if not url:
return ""
try:
parsed = urlparse(url)
if not parsed.scheme or not parsed.netloc:
return ""
return f"{parsed.scheme}://{parsed.netloc}"
except Exception: # pragma: no cover
return ""


def _same_origin(url, page_origin):
"""True when `url`'s origin (scheme + host + port) matches the page origin."""
if not page_origin:
return False
return _get_origin(url) == page_origin


def _walk_nodes(node, closed_pairs, page_origin=""):
"""Walk CDP DOM tree to find closed shadow roots.

Same-origin child frame documents share the parent's JS realm and the
`window.__percyClosedShadowRoots` WeakMap that PercyDOM.serialize reads,
so we recurse INTO them. Cross-origin frames live in a different realm
(their resolveNode objectIds wouldn't belong to our execution context),
so they're skipped. A contentDocument with no resolvable origin is also
skipped defensively."""
if "contentDocument" in node:

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

HIGH — Walker skips ALL iframes; selenium-dotnet #436 had the same initial bug and was fixed on CE re-review.

if "contentDocument" in node:
    return

Same-origin iframes share the parent JS realm and the window.__percyClosedShadowRoots WeakMap that PercyDOM.serialize reads. Closed shadow roots inside same-origin iframes SHOULD be captured. Cross-origin iframes should still be skipped (different JS realm; resolveNode objectIds wouldn't belong to our execution context anyway).

selenium-dotnet's fix (commit fix(snapshot): recurse closed shadow walk into same-origin iframes):

  • Same origin (GetOrigin(documentURL) == pageOrigin) → recurse INTO contentDocument.
  • Cross origin → skip.
  • Missing documentURL → defensive skip.

Suggested Python port:

if "contentDocument" in node:
    cd = node["contentDocument"]
    document_url = cd.get("documentURL")
    if not document_url:
        return
    if _same_origin(document_url, page_url):
        _walk_nodes(cd, closed_pairs)
    return

_same_origin should use urllib.parse.urlparse to compare scheme + host + port. Add tests for all three branches (same-origin captured, cross-origin skipped, missing URL skipped) — mirroring the dotnet test suite added in that PR.

content_doc = node["contentDocument"]
document_url = content_doc.get("documentURL")
if not document_url:
return
if not _same_origin(document_url, page_origin):
log(
"Skipping cross-origin frame document during"
f" closed-shadow walk: {document_url}",
lvl="debug"
)
return
# Same-origin frame: walk into the contentDocument as if it were any
# other subtree, then continue with this node's own children below.
_walk_nodes(content_doc, closed_pairs, page_origin)
if "shadowRoots" in node:
for sr in node["shadowRoots"]:
if sr.get("shadowRootType") == "closed":
closed_pairs.append({
"hostBackendNodeId": node["backendNodeId"],
"shadowBackendNodeId": sr["backendNodeId"]
})
_walk_nodes(sr, closed_pairs, page_origin)
if "children" in node:
for child in node["children"]:
_walk_nodes(child, closed_pairs, page_origin)


# pylint: disable=too-many-locals
def expose_closed_shadow_roots(page):
"""Use CDP to discover closed shadow roots and expose them to PercyDOM.serialize().
Closed shadow roots are inaccessible from JS (element.shadowRoot === null),
but CDP's DOM domain can pierce them."""
cdp_session = None
dom_enabled = False
try:
cdp_session = page.context.new_cdp_session(page)

cdp_session.send("DOM.enable")

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

HIGH — DOM.enable is called but DOM.disable is never invoked.

selenium-dotnet #436 and selenium-ruby #32 both pair enable/disable explicitly in their try/finally and gate disable on whether enable succeeded (so a failing enable doesn't emit a spurious disable).

cdp_session.detach() in the finally block does mostly release the domain in practice, but the explicit pairing is the documented pattern and matches every sibling SDK that uses CDP.

Suggested fix:

dom_enabled = False
try:
    cdp_session.send("DOM.enable")
    dom_enabled = True
    # ... existing walk + resolve + register ...
finally:
    if dom_enabled:
        try:
            cdp_session.send("DOM.disable")
        except Exception:
            pass
    try:
        cdp_session.detach()
    except Exception:
        pass

Also add a test mirroring dotnet's DOM.disable lifecycle suite: DOM.disable is sent after a successful run, sent after a mid-walk failure, NOT sent when DOM.enable itself raised.

dom_enabled = True
doc_result = cdp_session.send(
"DOM.getDocument", {"depth": -1, "pierce": True}
)
root = doc_result["root"]

# Compute the top-level page origin once so the walker can recurse
# into same-origin child frame documents but skip cross-origin ones.
page_origin = _get_origin(page.url)

closed_pairs = []
_walk_nodes(root, closed_pairs, page_origin)

if not closed_pairs:
return

log(
f"Found {len(closed_pairs)} closed shadow root(s),"
" exposing via CDP",
lvl="debug"
)

weakmap_script = (
"() => { window.__percyClosedShadowRoots ="
" window.__percyClosedShadowRoots || new WeakMap(); }"
)
page.evaluate(weakmap_script)

fn_decl = (
"function(shadowRoot) {"
" window.__percyClosedShadowRoots"
".set(this, shadowRoot); }"
)
for pair in closed_pairs:
host_id = pair["hostBackendNodeId"]
host_result = cdp_session.send(
"DOM.resolveNode", {"backendNodeId": host_id}
)
host_object_id = host_result["object"]["objectId"]

shadow_id = pair["shadowBackendNodeId"]
shadow_result = cdp_session.send(
"DOM.resolveNode", {"backendNodeId": shadow_id}
)
shadow_object_id = shadow_result["object"]["objectId"]

cdp_session.send("Runtime.callFunctionOn", {
"functionDeclaration": fn_decl,
"objectId": host_object_id,
"arguments": [{"objectId": shadow_object_id}]
})
except Exception as err:
log(
f"Could not expose closed shadow roots via CDP: {err}",
lvl="debug"
)
finally:
# Release the DOM domain so subsequent CDP commands don't keep
# emitting DOM events for this session. Only sent when DOM.enable
# succeeded — a failing enable must not emit a spurious disable.
if dom_enabled:
try:
cdp_session.send("DOM.disable")
except Exception: # pragma: no cover
pass
if cdp_session: # pragma: no branch
try:
cdp_session.detach()
except Exception: # pragma: no cover
pass


def process_frame(page, frame, options, percy_dom_script):
"""
Processes a single cross-origin frame to capture its snapshot and resources.
Expand Down Expand Up @@ -439,6 +579,9 @@ def capture_responsive_dom(page, cookies, percy_dom_script=None, config=None, **
if PERCY_RESPONSIVE_CAPTURE_RELOAD_PAGE:
page.reload()
page.evaluate(percy_dom_script)
# Re-prime the closed-shadow-root WeakMap — page.reload() creates a
# new document and erases window.__percyClosedShadowRoots.
expose_closed_shadow_roots(page)
page.evaluate("PercyDOM.waitForResize()")
resize_count = 0

Expand Down Expand Up @@ -492,6 +635,10 @@ def percy_snapshot(page, name, **kwargs):
# Inject the DOM serialization script
percy_dom_script = fetch_percy_dom()
page.evaluate(percy_dom_script)

# Expose closed shadow roots via CDP before serialization
expose_closed_shadow_roots(page)

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

MEDIUM — expose_closed_shadow_roots is called once but the WeakMap is wiped on responsive reloads.

Responsive captures reload the page at each width and re-evaluate percy_dom_script. The window.__percyClosedShadowRoots WeakMap is gone after page.reload(), so closed shadow roots will be missing in responsive DOMs at widths beyond the first.

selenium-ruby #32 explicitly re-primes after refresh in its responsive snapshot loop (lib/percy.rb:549). Search every page.evaluate(percy_dom_script) and page.reload() site in this file and precede each with expose_closed_shadow_roots(page).

Add a unit test that drives the responsive capture path through change_window_dimension_and_wait and asserts the WeakMap is re-populated at the second width.


cookies = page.context.cookies()

# Serialize and capture the DOM
Expand Down
9 changes: 9 additions & 0 deletions tests/test_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,3 +61,12 @@ def test_cleanup_cache(self):
self.assertIn(self.session_id, self.cache.CACHE)
self.assertIn("session_details", self.cache.CACHE[self.session_id])
self.assertNotIn("key-1", self.cache.CACHE[self.session_id])

def test_cleanup_cache_skips_entry_missing_timeout_key(self):
orphan_id = "orphan_session"
self.cache.CACHE[orphan_id] = {Cache.session_details: {"hashed_id": "x"}}
self.cache.cleanup_cache()
self.assertEqual(
self.cache.CACHE[orphan_id], {Cache.session_details: {"hashed_id": "x"}}
)
del self.cache.CACHE[orphan_id]
Loading
Loading