From 0a96c62deedad8fda8e39ee05181314cd53d15cd Mon Sep 17 00:00:00 2001 From: karthikmudunuri <102793643+karthikmudunuri@users.noreply.github.com> Date: Sun, 31 May 2026 13:46:35 +0530 Subject: [PATCH] fix(pptx): carry verbatim custGeom OOXML in deck JSON for cross-process serialize --- .changeset/custgeom-verbatim-roundtrip.md | 9 +++ .../src/lib/pptx/__tests__/roundtrip.test.ts | 38 ++++++++++ .../lib/pptx/__tests__/synth-writers.test.ts | 72 ++++++++++++++++++- packages/slidewise/src/lib/pptx/deckToPptx.ts | 31 ++++++++ packages/slidewise/src/lib/pptx/pptxToDeck.ts | 19 +++++ packages/slidewise/src/lib/types.ts | 15 ++++ 6 files changed, 183 insertions(+), 1 deletion(-) create mode 100644 .changeset/custgeom-verbatim-roundtrip.md diff --git a/.changeset/custgeom-verbatim-roundtrip.md b/.changeset/custgeom-verbatim-roundtrip.md new file mode 100644 index 0000000..d39bef6 --- /dev/null +++ b/.changeset/custgeom-verbatim-roundtrip.md @@ -0,0 +1,9 @@ +--- +"@textcortex/slidewise": patch +--- + +fix(pptx): carry verbatim custGeom OOXML in the deck JSON so vector shapes survive cross-process serialize + +The high-fidelity replay of imported elements relies on two **module-global** registries (`sourceBufferCache`, `elementSourceRegistry`) populated only by `parsePptx` and never written to the deck JSON. In a pipeline that parses in one process and serializes in another (parse client-side → store deck JSON → serialize server-side), those registries are empty, so every element is re-synthesised from its deck fields. Synthesis can't represent OOXML even-odd / winding, so complex `custGeom` vectors (e.g. a bicycle diagram) render blank even though simpler ones (the brand logo) happen to survive. + +The importer now stamps the verbatim `` of **self-contained** custGeom shapes (no `r:embed` / `r:id` / `a:schemeClr` references) onto the element as `pristineOoxml = { xml, snapshot }`, which rides along in the deck JSON. On serialize, an unedited such shape (snapshot still matches) is replayed verbatim — exact source geometry and winding — instead of being re-synthesised; its `cNvPr/@id` is rewritten to avoid spTree collisions. Edited shapes fall back to synthesis. This is the same persist-in-JSON pattern already used for embedded fonts (`deck.fonts`), scoped to vector shapes to keep JSON bloat negligible (~a few KB per deck). diff --git a/packages/slidewise/src/lib/pptx/__tests__/roundtrip.test.ts b/packages/slidewise/src/lib/pptx/__tests__/roundtrip.test.ts index 383a767..c4ada28 100644 --- a/packages/slidewise/src/lib/pptx/__tests__/roundtrip.test.ts +++ b/packages/slidewise/src/lib/pptx/__tests__/roundtrip.test.ts @@ -247,6 +247,44 @@ describe("pptx round-trip", () => { ).toBe(true); }); + it("stamps verbatim pristineOoxml on imported self-contained custGeom shapes", async () => { + // Round a custGeom shape (solid fill, no scheme/rId refs) through a real + // serialize→parse. The importer should carry its verbatim in the + // deck JSON so a later cross-process serialize can replay it. + const deck = makeDeck([ + { + ...baseElement, + id: "logo", + type: "shape", + x: 100, + y: 100, + w: 400, + h: 300, + shape: "rect", + fill: "#EA1B0A", + path: { + d: "M 0 0 L 100 0 L 100 100 L 0 100 Z", + viewW: 100, + viewH: 100, + fillRule: "evenodd", + }, + }, + ]); + + const out = await roundtrip(deck); + const shape = out.slides[0].elements.find( + (e) => e.type === "shape" && e.path + ); + expect(shape && shape.type === "shape").toBe(true); + if (!shape || shape.type !== "shape") return; + expect(shape.pristineOoxml).toBeTruthy(); + expect(shape.pristineOoxml?.xml).toContain(""); + // The stored snapshot matches the element as imported (so the serializer + // treats it as pristine until edited). + expect(typeof shape.pristineOoxml?.snapshot).toBe("string"); + expect(shape.pristineOoxml?.snapshot.length).toBeGreaterThan(0); + }); + it("preserves slide background colour", async () => { const deck: Deck = { version: CURRENT_DECK_VERSION, diff --git a/packages/slidewise/src/lib/pptx/__tests__/synth-writers.test.ts b/packages/slidewise/src/lib/pptx/__tests__/synth-writers.test.ts index c5d9fc9..44dfe39 100644 --- a/packages/slidewise/src/lib/pptx/__tests__/synth-writers.test.ts +++ b/packages/slidewise/src/lib/pptx/__tests__/synth-writers.test.ts @@ -1,8 +1,9 @@ import { describe, it, expect } from "vitest"; import JSZip from "jszip"; import { serializeDeck } from "../index"; +import { snapshotElement } from "../pptxToDeck"; import { CURRENT_DECK_VERSION } from "@/lib/schema/migrate"; -import type { Deck } from "@/lib/types"; +import type { Deck, ShapeElement } from "@/lib/types"; /** * Tests for the synth-OOXML writers added in the full-fidelity export work: @@ -102,6 +103,75 @@ describe("synth writers (PRs 1, 2, 3, 4, 5, 6, 7)", () => { expect(slide).not.toMatch(/ { + const shape: ShapeElement = { + ...base, + id: "bikeXYZ", + type: "shape", + shape: "rect", + x: 100, + y: 100, + w: 400, + h: 300, + fill: "#EA1B0A", + path: { + d: "M 0 0 L 100 0 L 100 100 Z", + viewW: 100, + viewH: 100, + fillRule: "evenodd", + }, + }; + // Self-contained verbatim with a colliding low cNvPr id + a marker. + const verbatim = + `` + + `` + + `` + + `` + + `` + + `` + + ``; + shape.pristineOoxml = { xml: verbatim, snapshot: snapshotElement(shape) }; + + const zip = await generate( + makeDeck([{ id: "s", background: "#FFFFFF", elements: [shape] }]) + ); + const slide = await zip.file("ppt/slides/slide1.xml")?.async("string"); + // The exact source geometry was replayed (marker + source fill present)… + expect(slide).toContain('data-marker="VERBATIM_BIKE"'); + expect(slide).toContain(''); + // …and the colliding cNvPr id="7" was rewritten to a fresh high id. + expect(slide).not.toMatch(/]*\bid="7"/); + }); + + it("falls back to synthesis when a pristine-OOXML shape was edited", async () => { + const shape: ShapeElement = { + ...base, + id: "bikeEdited", + type: "shape", + shape: "rect", + x: 100, + y: 100, + w: 400, + h: 300, + fill: "#EA1B0A", + path: { d: "M 0 0 L 100 0 L 100 100 Z", viewW: 100, viewH: 100 }, + }; + const verbatim = + `` + + ``; + shape.pristineOoxml = { xml: verbatim, snapshot: snapshotElement(shape) }; + // Edit the shape AFTER the snapshot was taken → snapshot diverges. + shape.x = 999; + + const zip = await generate( + makeDeck([{ id: "s", background: "#FFFFFF", elements: [shape] }]) + ); + const slide = await zip.file("ppt/slides/slide1.xml")?.async("string"); + // Verbatim replay is rejected; the synth path emits the custGeom from path.d. + expect(slide).not.toContain("VERBATIM_BIKE"); + expect(slide).toContain(""); + }); + it("PR 2: emits for shapes with linear-gradient fill", async () => { const deck = makeDeck([ { diff --git a/packages/slidewise/src/lib/pptx/deckToPptx.ts b/packages/slidewise/src/lib/pptx/deckToPptx.ts index 388477d..2528f7e 100644 --- a/packages/slidewise/src/lib/pptx/deckToPptx.ts +++ b/packages/slidewise/src/lib/pptx/deckToPptx.ts @@ -32,6 +32,7 @@ import { synthesiseEmbeddedFonts, effectLstXml, parseFill, + freshNvId, RID_MARKER_RE, slidewiseShapeName, type MediaPayload, @@ -167,6 +168,16 @@ function shouldSynthesise(el: SlideElement): boolean { function synthesiseInto(synth: SynthSlideEntry, el: SlideElement): void { if (el.type === "shape") { + // Cross-process replay: an unedited custGeom shape carries its verbatim + // source `` in the deck JSON (see `stampPristineOoxml`). Replaying + // it preserves the exact source winding/geometry that synthesis can't — + // this is what un-blanks complex vectors when the import-time source + // registry isn't available (parse + serialize in different processes). + const verbatim = pristineShapeXml(el); + if (verbatim) { + synth.shapeXml.push(verbatim); + return; + } const out = synthesiseShape(el); synth.shapeXml.push(out.xml); for (const m of out.media) synth.media.push(m); @@ -187,6 +198,26 @@ function synthesiseInto(synth: SynthSlideEntry, el: SlideElement): void { } } +/** + * Verbatim `` for a custGeom shape that carries deck-JSON-persisted + * source OOXML and hasn't been edited. Returns null when there's no pristine + * XML or the element diverged from its import snapshot (then the caller + * synthesises from `path.d`). The source `cNvPr/@id` is rewritten to a fresh + * high id so it can't collide with whatever pptxgenjs allocated in the spTree. + * + * NB: same-process serialize never reaches here for these shapes — they're + * caught earlier by `isPristineImportedElement` (registry hit) and replayed + * through the source archive. This path is the cross-process fallback. + */ +function pristineShapeXml(el: SlideElement): string | null { + if (el.type !== "shape" || !el.pristineOoxml) return null; + if (snapshotElement(el) !== el.pristineOoxml.snapshot) return null; + return el.pristineOoxml.xml.replace( + /(]*\bid=")\d+(")/, + `$1${freshNvId()}$2` + ); +} + /** * Render a single child for ``. We only synthesise shapes/charts * inside groups for v1 — text / image / line children remain renderable diff --git a/packages/slidewise/src/lib/pptx/pptxToDeck.ts b/packages/slidewise/src/lib/pptx/pptxToDeck.ts index 4c9bcaf..a63e09e 100644 --- a/packages/slidewise/src/lib/pptx/pptxToDeck.ts +++ b/packages/slidewise/src/lib/pptx/pptxToDeck.ts @@ -480,6 +480,25 @@ function registerElementSource( snapshot: snapshotElement(element), slidePath, }); + stampPristineOoxml(element, rawXml); +} + +/** + * For a self-contained custGeom (vector) shape, copy its verbatim `` + * source XML onto the element so it survives JSON serialization (the + * `elementSourceRegistry` above is module-global and lost across processes). + * A serialize in a fresh process can then replay the exact source geometry + * instead of re-synthesising from `path.d` — synthesis can't represent OOXML + * even-odd winding, which is what blanks complex vectors like the eon bicycle. + * + * Restricted to shapes whose XML carries no external references + * (`r:embed` / `r:id` / `r:link` images, `a:schemeClr` theme colours) so the + * fragment stays valid without the source archive or its theme. + */ +function stampPristineOoxml(element: SlideElement, rawXml: string): void { + if (element.type !== "shape" || !element.path) return; + if (/\br:(embed|id|link)=|` OOXML captured at import for a self-contained custGeom + * (vector) shape, carried *in the deck JSON* so a serialize running in a + * different process from the import (parse client-side → store JSON → + * serialize server-side) can replay the exact source geometry rather than + * re-synthesising from `path.d`. Synthesis can't express OOXML even-odd / + * winding faithfully, so complex vectors blank when the process-global + * source registry isn't available. Only populated for shapes whose source + * XML has no external references (`r:embed` / `r:id` / `a:schemeClr`), so it + * stays valid without the source archive or theme. `snapshot` is the element + * snapshot at import; the serializer replays the XML only while the element + * is unedited (snapshot still matches), otherwise it falls back to synthesis. + * Host-opaque — do not author by hand. + */ + pristineOoxml?: { xml: string; snapshot: string }; } export interface ShapePath {