diff --git a/fern/assets/image-generation-for-multimodal-data-pipelines/examples/airport-security-scans/01.jpg b/fern/assets/image-generation-for-multimodal-data-pipelines/examples/airport-security-scans/01.jpg new file mode 100644 index 000000000..24da61dbd Binary files /dev/null and b/fern/assets/image-generation-for-multimodal-data-pipelines/examples/airport-security-scans/01.jpg differ diff --git a/fern/assets/image-generation-for-multimodal-data-pipelines/examples/airport-security-scans/02.jpg b/fern/assets/image-generation-for-multimodal-data-pipelines/examples/airport-security-scans/02.jpg new file mode 100644 index 000000000..afb97c724 Binary files /dev/null and b/fern/assets/image-generation-for-multimodal-data-pipelines/examples/airport-security-scans/02.jpg differ diff --git a/fern/assets/image-generation-for-multimodal-data-pipelines/examples/airport-security-scans/03.jpg b/fern/assets/image-generation-for-multimodal-data-pipelines/examples/airport-security-scans/03.jpg new file mode 100644 index 000000000..71b94c340 Binary files /dev/null and b/fern/assets/image-generation-for-multimodal-data-pipelines/examples/airport-security-scans/03.jpg differ diff --git a/fern/assets/image-generation-for-multimodal-data-pipelines/examples/airport-security-scans/04.jpg b/fern/assets/image-generation-for-multimodal-data-pipelines/examples/airport-security-scans/04.jpg new file mode 100644 index 000000000..625f90002 Binary files /dev/null and b/fern/assets/image-generation-for-multimodal-data-pipelines/examples/airport-security-scans/04.jpg differ diff --git a/fern/assets/image-generation-for-multimodal-data-pipelines/examples/airport-security-scans/05.jpg b/fern/assets/image-generation-for-multimodal-data-pipelines/examples/airport-security-scans/05.jpg new file mode 100644 index 000000000..761172068 Binary files /dev/null and b/fern/assets/image-generation-for-multimodal-data-pipelines/examples/airport-security-scans/05.jpg differ diff --git a/fern/assets/image-generation-for-multimodal-data-pipelines/examples/crop-disease-detection-images/01.jpg b/fern/assets/image-generation-for-multimodal-data-pipelines/examples/crop-disease-detection-images/01.jpg new file mode 100644 index 000000000..44fcae4dd Binary files /dev/null and b/fern/assets/image-generation-for-multimodal-data-pipelines/examples/crop-disease-detection-images/01.jpg differ diff --git a/fern/assets/image-generation-for-multimodal-data-pipelines/examples/crop-disease-detection-images/02.jpg b/fern/assets/image-generation-for-multimodal-data-pipelines/examples/crop-disease-detection-images/02.jpg new file mode 100644 index 000000000..93196b252 Binary files /dev/null and b/fern/assets/image-generation-for-multimodal-data-pipelines/examples/crop-disease-detection-images/02.jpg differ diff --git a/fern/assets/image-generation-for-multimodal-data-pipelines/examples/crop-disease-detection-images/03.jpg b/fern/assets/image-generation-for-multimodal-data-pipelines/examples/crop-disease-detection-images/03.jpg new file mode 100644 index 000000000..7c2ffba71 Binary files /dev/null and b/fern/assets/image-generation-for-multimodal-data-pipelines/examples/crop-disease-detection-images/03.jpg differ diff --git a/fern/assets/image-generation-for-multimodal-data-pipelines/examples/crop-disease-detection-images/04.jpg b/fern/assets/image-generation-for-multimodal-data-pipelines/examples/crop-disease-detection-images/04.jpg new file mode 100644 index 000000000..73c1a45f0 Binary files /dev/null and b/fern/assets/image-generation-for-multimodal-data-pipelines/examples/crop-disease-detection-images/04.jpg differ diff --git a/fern/assets/image-generation-for-multimodal-data-pipelines/examples/crop-disease-detection-images/05.jpg b/fern/assets/image-generation-for-multimodal-data-pipelines/examples/crop-disease-detection-images/05.jpg new file mode 100644 index 000000000..b05ffe269 Binary files /dev/null and b/fern/assets/image-generation-for-multimodal-data-pipelines/examples/crop-disease-detection-images/05.jpg differ diff --git a/fern/assets/image-generation-for-multimodal-data-pipelines/examples/drone-aerial-inspection/01.jpg b/fern/assets/image-generation-for-multimodal-data-pipelines/examples/drone-aerial-inspection/01.jpg new file mode 100644 index 000000000..d1f5a1822 Binary files /dev/null and b/fern/assets/image-generation-for-multimodal-data-pipelines/examples/drone-aerial-inspection/01.jpg differ diff --git a/fern/assets/image-generation-for-multimodal-data-pipelines/examples/drone-aerial-inspection/02.jpg b/fern/assets/image-generation-for-multimodal-data-pipelines/examples/drone-aerial-inspection/02.jpg new file mode 100644 index 000000000..ca16ecd0e Binary files /dev/null and b/fern/assets/image-generation-for-multimodal-data-pipelines/examples/drone-aerial-inspection/02.jpg differ diff --git a/fern/assets/image-generation-for-multimodal-data-pipelines/examples/drone-aerial-inspection/03.jpg b/fern/assets/image-generation-for-multimodal-data-pipelines/examples/drone-aerial-inspection/03.jpg new file mode 100644 index 000000000..3e582fd06 Binary files /dev/null and b/fern/assets/image-generation-for-multimodal-data-pipelines/examples/drone-aerial-inspection/03.jpg differ diff --git a/fern/assets/image-generation-for-multimodal-data-pipelines/examples/drone-aerial-inspection/04.jpg b/fern/assets/image-generation-for-multimodal-data-pipelines/examples/drone-aerial-inspection/04.jpg new file mode 100644 index 000000000..106d976c7 Binary files /dev/null and b/fern/assets/image-generation-for-multimodal-data-pipelines/examples/drone-aerial-inspection/04.jpg differ diff --git a/fern/assets/image-generation-for-multimodal-data-pipelines/examples/drone-aerial-inspection/05.jpg b/fern/assets/image-generation-for-multimodal-data-pipelines/examples/drone-aerial-inspection/05.jpg new file mode 100644 index 000000000..ffa729b57 Binary files /dev/null and b/fern/assets/image-generation-for-multimodal-data-pipelines/examples/drone-aerial-inspection/05.jpg differ diff --git a/fern/assets/image-generation-for-multimodal-data-pipelines/examples/funny-pet-image-edits/01.png b/fern/assets/image-generation-for-multimodal-data-pipelines/examples/funny-pet-image-edits/01.png new file mode 100644 index 000000000..a9035e252 Binary files /dev/null and b/fern/assets/image-generation-for-multimodal-data-pipelines/examples/funny-pet-image-edits/01.png differ diff --git a/fern/assets/image-generation-for-multimodal-data-pipelines/examples/funny-pet-image-edits/02.png b/fern/assets/image-generation-for-multimodal-data-pipelines/examples/funny-pet-image-edits/02.png new file mode 100644 index 000000000..8b73d15f3 Binary files /dev/null and b/fern/assets/image-generation-for-multimodal-data-pipelines/examples/funny-pet-image-edits/02.png differ diff --git a/fern/assets/image-generation-for-multimodal-data-pipelines/examples/funny-pet-image-edits/03.png b/fern/assets/image-generation-for-multimodal-data-pipelines/examples/funny-pet-image-edits/03.png new file mode 100644 index 000000000..55ff6d4c7 Binary files /dev/null and b/fern/assets/image-generation-for-multimodal-data-pipelines/examples/funny-pet-image-edits/03.png differ diff --git a/fern/assets/image-generation-for-multimodal-data-pipelines/examples/funny-pet-image-edits/04.png b/fern/assets/image-generation-for-multimodal-data-pipelines/examples/funny-pet-image-edits/04.png new file mode 100644 index 000000000..fd64223a4 Binary files /dev/null and b/fern/assets/image-generation-for-multimodal-data-pipelines/examples/funny-pet-image-edits/04.png differ diff --git a/fern/assets/image-generation-for-multimodal-data-pipelines/examples/funny-pet-image-edits/05.png b/fern/assets/image-generation-for-multimodal-data-pipelines/examples/funny-pet-image-edits/05.png new file mode 100644 index 000000000..511b46c0e Binary files /dev/null and b/fern/assets/image-generation-for-multimodal-data-pipelines/examples/funny-pet-image-edits/05.png differ diff --git a/fern/assets/image-generation-for-multimodal-data-pipelines/examples/humanoid-robot-scene-understanding/01.jpg b/fern/assets/image-generation-for-multimodal-data-pipelines/examples/humanoid-robot-scene-understanding/01.jpg new file mode 100644 index 000000000..af8572014 Binary files /dev/null and b/fern/assets/image-generation-for-multimodal-data-pipelines/examples/humanoid-robot-scene-understanding/01.jpg differ diff --git a/fern/assets/image-generation-for-multimodal-data-pipelines/examples/humanoid-robot-scene-understanding/02.jpg b/fern/assets/image-generation-for-multimodal-data-pipelines/examples/humanoid-robot-scene-understanding/02.jpg new file mode 100644 index 000000000..2df280dd4 Binary files /dev/null and b/fern/assets/image-generation-for-multimodal-data-pipelines/examples/humanoid-robot-scene-understanding/02.jpg differ diff --git a/fern/assets/image-generation-for-multimodal-data-pipelines/examples/humanoid-robot-scene-understanding/03.jpg b/fern/assets/image-generation-for-multimodal-data-pipelines/examples/humanoid-robot-scene-understanding/03.jpg new file mode 100644 index 000000000..b65d2de00 Binary files /dev/null and b/fern/assets/image-generation-for-multimodal-data-pipelines/examples/humanoid-robot-scene-understanding/03.jpg differ diff --git a/fern/assets/image-generation-for-multimodal-data-pipelines/examples/humanoid-robot-scene-understanding/04.jpg b/fern/assets/image-generation-for-multimodal-data-pipelines/examples/humanoid-robot-scene-understanding/04.jpg new file mode 100644 index 000000000..6df15a306 Binary files /dev/null and b/fern/assets/image-generation-for-multimodal-data-pipelines/examples/humanoid-robot-scene-understanding/04.jpg differ diff --git a/fern/assets/image-generation-for-multimodal-data-pipelines/examples/humanoid-robot-scene-understanding/05.jpg b/fern/assets/image-generation-for-multimodal-data-pipelines/examples/humanoid-robot-scene-understanding/05.jpg new file mode 100644 index 000000000..0f8bd3f2e Binary files /dev/null and b/fern/assets/image-generation-for-multimodal-data-pipelines/examples/humanoid-robot-scene-understanding/05.jpg differ diff --git a/fern/assets/image-generation-for-multimodal-data-pipelines/examples/medical-extremity-xrays/01.jpg b/fern/assets/image-generation-for-multimodal-data-pipelines/examples/medical-extremity-xrays/01.jpg new file mode 100644 index 000000000..26eef7533 Binary files /dev/null and b/fern/assets/image-generation-for-multimodal-data-pipelines/examples/medical-extremity-xrays/01.jpg differ diff --git a/fern/assets/image-generation-for-multimodal-data-pipelines/examples/medical-extremity-xrays/02.jpg b/fern/assets/image-generation-for-multimodal-data-pipelines/examples/medical-extremity-xrays/02.jpg new file mode 100644 index 000000000..a643583a4 Binary files /dev/null and b/fern/assets/image-generation-for-multimodal-data-pipelines/examples/medical-extremity-xrays/02.jpg differ diff --git a/fern/assets/image-generation-for-multimodal-data-pipelines/examples/medical-extremity-xrays/03.jpg b/fern/assets/image-generation-for-multimodal-data-pipelines/examples/medical-extremity-xrays/03.jpg new file mode 100644 index 000000000..9089f8faf Binary files /dev/null and b/fern/assets/image-generation-for-multimodal-data-pipelines/examples/medical-extremity-xrays/03.jpg differ diff --git a/fern/assets/image-generation-for-multimodal-data-pipelines/examples/medical-extremity-xrays/04.jpg b/fern/assets/image-generation-for-multimodal-data-pipelines/examples/medical-extremity-xrays/04.jpg new file mode 100644 index 000000000..1ffcc2545 Binary files /dev/null and b/fern/assets/image-generation-for-multimodal-data-pipelines/examples/medical-extremity-xrays/04.jpg differ diff --git a/fern/assets/image-generation-for-multimodal-data-pipelines/examples/medical-extremity-xrays/05.jpg b/fern/assets/image-generation-for-multimodal-data-pipelines/examples/medical-extremity-xrays/05.jpg new file mode 100644 index 000000000..11c9766a8 Binary files /dev/null and b/fern/assets/image-generation-for-multimodal-data-pipelines/examples/medical-extremity-xrays/05.jpg differ diff --git a/fern/assets/image-generation-for-multimodal-data-pipelines/examples/product-image-variations/01.jpg b/fern/assets/image-generation-for-multimodal-data-pipelines/examples/product-image-variations/01.jpg new file mode 100644 index 000000000..f0cecdabf Binary files /dev/null and b/fern/assets/image-generation-for-multimodal-data-pipelines/examples/product-image-variations/01.jpg differ diff --git a/fern/assets/image-generation-for-multimodal-data-pipelines/examples/product-image-variations/02.png b/fern/assets/image-generation-for-multimodal-data-pipelines/examples/product-image-variations/02.png new file mode 100644 index 000000000..8171e4b18 Binary files /dev/null and b/fern/assets/image-generation-for-multimodal-data-pipelines/examples/product-image-variations/02.png differ diff --git a/fern/assets/image-generation-for-multimodal-data-pipelines/examples/product-image-variations/03.png b/fern/assets/image-generation-for-multimodal-data-pipelines/examples/product-image-variations/03.png new file mode 100644 index 000000000..e155e9054 Binary files /dev/null and b/fern/assets/image-generation-for-multimodal-data-pipelines/examples/product-image-variations/03.png differ diff --git a/fern/assets/image-generation-for-multimodal-data-pipelines/examples/product-image-variations/04.jpg b/fern/assets/image-generation-for-multimodal-data-pipelines/examples/product-image-variations/04.jpg new file mode 100644 index 000000000..8e7f6ca58 Binary files /dev/null and b/fern/assets/image-generation-for-multimodal-data-pipelines/examples/product-image-variations/04.jpg differ diff --git a/fern/assets/image-generation-for-multimodal-data-pipelines/examples/product-image-variations/05.png b/fern/assets/image-generation-for-multimodal-data-pipelines/examples/product-image-variations/05.png new file mode 100644 index 000000000..80f231140 Binary files /dev/null and b/fern/assets/image-generation-for-multimodal-data-pipelines/examples/product-image-variations/05.png differ diff --git a/fern/assets/image-generation-for-multimodal-data-pipelines/examples/rich-document-images/01.jpg b/fern/assets/image-generation-for-multimodal-data-pipelines/examples/rich-document-images/01.jpg new file mode 100644 index 000000000..5a9f436fb Binary files /dev/null and b/fern/assets/image-generation-for-multimodal-data-pipelines/examples/rich-document-images/01.jpg differ diff --git a/fern/assets/image-generation-for-multimodal-data-pipelines/examples/rich-document-images/02.jpg b/fern/assets/image-generation-for-multimodal-data-pipelines/examples/rich-document-images/02.jpg new file mode 100644 index 000000000..29d3fe2cc Binary files /dev/null and b/fern/assets/image-generation-for-multimodal-data-pipelines/examples/rich-document-images/02.jpg differ diff --git a/fern/assets/image-generation-for-multimodal-data-pipelines/examples/rich-document-images/03.jpg b/fern/assets/image-generation-for-multimodal-data-pipelines/examples/rich-document-images/03.jpg new file mode 100644 index 000000000..fe4960bd7 Binary files /dev/null and b/fern/assets/image-generation-for-multimodal-data-pipelines/examples/rich-document-images/03.jpg differ diff --git a/fern/assets/image-generation-for-multimodal-data-pipelines/examples/rich-document-images/04.jpg b/fern/assets/image-generation-for-multimodal-data-pipelines/examples/rich-document-images/04.jpg new file mode 100644 index 000000000..cb84286b7 Binary files /dev/null and b/fern/assets/image-generation-for-multimodal-data-pipelines/examples/rich-document-images/04.jpg differ diff --git a/fern/assets/image-generation-for-multimodal-data-pipelines/examples/rich-document-images/05.jpg b/fern/assets/image-generation-for-multimodal-data-pipelines/examples/rich-document-images/05.jpg new file mode 100644 index 000000000..6c9e7361d Binary files /dev/null and b/fern/assets/image-generation-for-multimodal-data-pipelines/examples/rich-document-images/05.jpg differ diff --git a/fern/assets/image-generation-for-multimodal-data-pipelines/examples/traffic-scenarios/01.jpg b/fern/assets/image-generation-for-multimodal-data-pipelines/examples/traffic-scenarios/01.jpg new file mode 100644 index 000000000..1928eb51f Binary files /dev/null and b/fern/assets/image-generation-for-multimodal-data-pipelines/examples/traffic-scenarios/01.jpg differ diff --git a/fern/assets/image-generation-for-multimodal-data-pipelines/examples/traffic-scenarios/02.jpg b/fern/assets/image-generation-for-multimodal-data-pipelines/examples/traffic-scenarios/02.jpg new file mode 100644 index 000000000..71337cfbe Binary files /dev/null and b/fern/assets/image-generation-for-multimodal-data-pipelines/examples/traffic-scenarios/02.jpg differ diff --git a/fern/assets/image-generation-for-multimodal-data-pipelines/examples/traffic-scenarios/03.jpg b/fern/assets/image-generation-for-multimodal-data-pipelines/examples/traffic-scenarios/03.jpg new file mode 100644 index 000000000..fe817ac28 Binary files /dev/null and b/fern/assets/image-generation-for-multimodal-data-pipelines/examples/traffic-scenarios/03.jpg differ diff --git a/fern/assets/image-generation-for-multimodal-data-pipelines/examples/traffic-scenarios/04.jpg b/fern/assets/image-generation-for-multimodal-data-pipelines/examples/traffic-scenarios/04.jpg new file mode 100644 index 000000000..321e86ffc Binary files /dev/null and b/fern/assets/image-generation-for-multimodal-data-pipelines/examples/traffic-scenarios/04.jpg differ diff --git a/fern/assets/image-generation-for-multimodal-data-pipelines/examples/traffic-scenarios/05.jpg b/fern/assets/image-generation-for-multimodal-data-pipelines/examples/traffic-scenarios/05.jpg new file mode 100644 index 000000000..63f1de222 Binary files /dev/null and b/fern/assets/image-generation-for-multimodal-data-pipelines/examples/traffic-scenarios/05.jpg differ diff --git a/fern/assets/image-generation-for-multimodal-data-pipelines/image-generation-hero.jpg b/fern/assets/image-generation-for-multimodal-data-pipelines/image-generation-hero.jpg new file mode 100644 index 000000000..a3a0bb633 Binary files /dev/null and b/fern/assets/image-generation-for-multimodal-data-pipelines/image-generation-hero.jpg differ diff --git a/fern/assets/recipes/image_generation/agriculture_crop_imagery.py b/fern/assets/recipes/image_generation/agriculture_crop_imagery.py new file mode 100644 index 000000000..44c333b33 --- /dev/null +++ b/fern/assets/recipes/image_generation/agriculture_crop_imagery.py @@ -0,0 +1,288 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# /// script +# requires-python = ">=3.10" +# dependencies = [ +# "data-designer", +# ] +# /// +"""Agriculture Crop Disease Detection Image Recipe + +Generate synthetic crop disease detection images with controlled variation over +crop type, growth stage, viewpoint, disease or confounding condition, severity, +weather, irrigation, and field condition. The objective is to create examples +where the expected crop-health label is known, including healthy negatives and +hard confounders, so teams can evaluate detection prompts, build labeling +rubrics, calibrate reviewers, and prototype crop-disease workflows before using +governed field imagery. + +Prerequisites: + - An image-generation provider key for the selected model. The defaults use + OpenRouter, so set OPENROUTER_API_KEY before running. + +Run: + uv run agriculture_crop_imagery.py --num-records 10 +""" + +from __future__ import annotations + +import argparse +from pathlib import Path + +import data_designer.config as dd +from data_designer.interface import DataDesigner, DatasetCreationResults + +DEFAULT_MODEL_PROVIDER = "openrouter" +DEFAULT_MODEL_ID = "google/gemini-3.1-flash-image-preview" +DEFAULT_MODEL_ALIAS = "agriculture-image-model" + + +def build_model_configs( + *, + model_provider: str, + model_id: str, + model_alias: str, + image_size: str, + aspect_ratio: str, + max_parallel_requests: int, +) -> list[dd.ModelConfig]: + return [ + dd.ModelConfig( + alias=model_alias, + model=model_id, + provider=model_provider, + inference_parameters=dd.ImageInferenceParams( + extra_body={ + "n": 1, + "generationConfig": { + "imageConfig": { + "aspectRatio": aspect_ratio, + "imageSize": image_size, + } + }, + }, + max_parallel_requests=max_parallel_requests, + ), + skip_health_check=True, + ) + ] + + +def add_category(config_builder: dd.DataDesignerConfigBuilder, name: str, values: list[str]) -> None: + config_builder.add_column( + dd.SamplerColumnConfig( + name=name, + sampler_type=dd.SamplerType.CATEGORY, + params=dd.CategorySamplerParams(values=values), + ) + ) + + +def add_visual_variation_id(config_builder: dd.DataDesignerConfigBuilder) -> None: + """Add a unique row-level key that discourages duplicate image generations.""" + config_builder.add_column( + dd.SamplerColumnConfig( + name="visual_variation_id", + sampler_type=dd.SamplerType.UUID, + params=dd.UUIDSamplerParams(prefix="crop-", short_form=True), + ) + ) + + +def build_config( + *, + model_provider: str = DEFAULT_MODEL_PROVIDER, + model_id: str = DEFAULT_MODEL_ID, + model_alias: str = DEFAULT_MODEL_ALIAS, + image_size: str = "1024", + aspect_ratio: str = "4:3", + max_parallel_requests: int = 10, +) -> dd.DataDesignerConfigBuilder: + model_configs = build_model_configs( + model_provider=model_provider, + model_id=model_id, + model_alias=model_alias, + image_size=image_size, + aspect_ratio=aspect_ratio, + max_parallel_requests=max_parallel_requests, + ) + config_builder = dd.DataDesignerConfigBuilder(model_configs=model_configs) + add_visual_variation_id(config_builder) + + add_category( + config_builder, + "crop_type", + [ + "corn", + "soybean", + "wheat", + "rice", + "tomato", + "grape vineyard", + "apple orchard", + "lettuce", + "potato", + "strawberry", + ], + ) + add_category( + config_builder, + "growth_stage", + [ + "seedling", + "vegetative growth", + "flowering", + "fruiting", + "grain fill", + "near harvest", + ], + ) + add_category( + config_builder, + "viewpoint", + [ + "close-up leaf-level scouting photo", + "row-level field photo", + "drone oblique field view", + "top-down drone crop-row view", + "greenhouse bench view", + "orchard row view", + ], + ) + add_category( + config_builder, + "disease_or_condition", + [ + "powdery mildew on leaves", + "rust-colored fungal pustules on leaf surfaces", + "early blight with concentric brown leaf spots", + "late blight with irregular dark lesions", + "bacterial leaf spot with small dark speckles", + "downy mildew patches on leaf undersides", + "leaf curl with mosaic discoloration", + "insect feeding damage as a disease confounder", + "nutrient deficiency yellowing as a disease confounder", + ], + ) + add_category( + config_builder, + "severity", + [ + "low severity affecting isolated plants", + "moderate severity affecting patches", + "high severity affecting large field sections", + ], + ) + add_category( + config_builder, + "field_condition", + [ + "uniform crop stand", + "patchy emergence", + "uneven row spacing", + "visible irrigation lines", + "muddy soil after rain", + "dry cracked soil", + "mulched bed system", + ], + ) + add_category( + config_builder, + "weather_lighting", + [ + "bright midday sun", + "soft overcast light", + "golden hour light", + "after-rain humid conditions", + "hazy smoky sky", + "greenhouse diffuse lighting", + ], + ) + + config_builder.add_column( + dd.ImageColumnConfig( + name="crop_image", + prompt=AGRICULTURE_IMAGE_PROMPT, + model_alias=model_alias, + ) + ) + + return config_builder + + +def create_dataset( + config_builder: dd.DataDesignerConfigBuilder, + *, + num_records: int, + dataset_name: str, + artifact_path: Path | str | None = None, +) -> DatasetCreationResults: + data_designer = DataDesigner(artifact_path=artifact_path) + data_designer.validate(config_builder) + return data_designer.create(config_builder, num_records=num_records, dataset_name=dataset_name) + + +AGRICULTURE_IMAGE_PROMPT = """\ +Create a realistic crop disease detection image. + +Scene requirements: +- Visual variation ID, for internal diversity only: {{ visual_variation_id }} +- Crop type: {{ crop_type }} +- Growth stage: {{ growth_stage }} +- Viewpoint: {{ viewpoint }} +- Disease or condition: {{ disease_or_condition }} +- Severity: {{ severity }} +- Field condition: {{ field_condition }} +- Weather and lighting: {{ weather_lighting }} + +Make the image useful for crop disease detection, visual QA, reviewer +calibration, and data-labeling experiments. The requested crop, condition, +severity, and field context should be visually inspectable. Show realistic +plant structure, leaves, rows, soil, and disease symptoms when requested. For +healthy examples, show clear healthy leaves or canopy with no visible disease. +For confounders, make the non-disease condition plausible enough to test a +classifier or VLM prompt. Do not include real farm names, readable license +plates, watermarks, or people as the primary subject. Generate exactly one +final crop image for this row. Do not return alternate versions, a grid, a pair +of examples, before/after panels, or multiple frames. Use the visual variation +ID only as an internal diversity key; never render it as text. +""" + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description="Generate synthetic crop disease detection imagery.") + parser.add_argument("--num-records", type=int, default=10, help="Number of crop images to generate.") + parser.add_argument("--dataset-name", default="crop-disease-detection-images", help="Output dataset name.") + parser.add_argument("--artifact-path", type=Path, default=None, help="Optional Data Designer artifact directory.") + parser.add_argument("--model-provider", default=DEFAULT_MODEL_PROVIDER, help="Image model provider name.") + parser.add_argument("--model-id", default=DEFAULT_MODEL_ID, help="Provider model ID.") + parser.add_argument("--model-alias", default=DEFAULT_MODEL_ALIAS, help="Alias used by image columns.") + parser.add_argument("--image-size", default="1024", help="Provider-specific image size value.") + parser.add_argument("--aspect-ratio", default="4:3", help="Provider-specific aspect ratio value.") + parser.add_argument("--max-parallel-requests", type=int, default=10, help="Maximum parallel image requests.") + return parser.parse_args() + + +def main() -> None: + args = parse_args() + config_builder = build_config( + model_provider=args.model_provider, + model_id=args.model_id, + model_alias=args.model_alias, + image_size=args.image_size, + aspect_ratio=args.aspect_ratio, + max_parallel_requests=args.max_parallel_requests, + ) + results = create_dataset( + config_builder, + num_records=args.num_records, + dataset_name=args.dataset_name, + artifact_path=args.artifact_path, + ) + dataset = results.load_dataset() + print(f"Generated {len(dataset)} crop disease detection image rows.") + print(f"Dataset artifacts: {results.artifact_storage.base_dataset_path}") + + +if __name__ == "__main__": + main() diff --git a/fern/assets/recipes/image_generation/airport_security_scans.py b/fern/assets/recipes/image_generation/airport_security_scans.py new file mode 100644 index 000000000..93948893d --- /dev/null +++ b/fern/assets/recipes/image_generation/airport_security_scans.py @@ -0,0 +1,299 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# /// script +# requires-python = ">=3.10" +# dependencies = [ +# "data-designer", +# ] +# /// +"""Airport Baggage Screening Image Generation Recipe + +Generate synthetic airport baggage-screening style images with controlled +variation over scanner style, bag density, benign clutter, material mix, object +overlap, and high-level defensive threat-type labels. + +Security note: + This recipe is intended for defensive model development, evaluation, + curriculum data, and human-review tooling. Do not use it to plan, optimize, + or describe ways to bypass real screening systems. The prompts avoid + operational bypass details and use high-level threat types rather than + concealment instructions. + +Prerequisites: + - An image-generation provider key for the selected model. The defaults use + OpenRouter, so set OPENROUTER_API_KEY before running. + +Run: + uv run airport_security_scans.py --num-records 10 +""" + +from __future__ import annotations + +import argparse +from pathlib import Path + +import data_designer.config as dd +from data_designer.interface import DataDesigner, DatasetCreationResults + +DEFAULT_MODEL_PROVIDER = "openrouter" +DEFAULT_MODEL_ID = "google/gemini-3.1-flash-image-preview" +DEFAULT_MODEL_ALIAS = "baggage-screening-image-model" + + +def build_model_configs( + *, + model_provider: str, + model_id: str, + model_alias: str, + image_size: str, + aspect_ratio: str, + max_parallel_requests: int, +) -> list[dd.ModelConfig]: + """Build a provider-agnostic image-generation model config.""" + return [ + dd.ModelConfig( + alias=model_alias, + model=model_id, + provider=model_provider, + inference_parameters=dd.ImageInferenceParams( + extra_body={ + "n": 1, + "generationConfig": { + "imageConfig": { + "aspectRatio": aspect_ratio, + "imageSize": image_size, + } + }, + }, + max_parallel_requests=max_parallel_requests, + ), + skip_health_check=True, + ) + ] + + +def add_category(config_builder: dd.DataDesignerConfigBuilder, name: str, values: list[str]) -> None: + """Add a categorical sampler column.""" + config_builder.add_column( + dd.SamplerColumnConfig( + name=name, + sampler_type=dd.SamplerType.CATEGORY, + params=dd.CategorySamplerParams(values=values), + ) + ) + + +def add_visual_variation_id(config_builder: dd.DataDesignerConfigBuilder) -> None: + """Add a unique row-level key that discourages duplicate image generations.""" + config_builder.add_column( + dd.SamplerColumnConfig( + name="visual_variation_id", + sampler_type=dd.SamplerType.UUID, + params=dd.UUIDSamplerParams(prefix="scan-", short_form=True), + ) + ) + + +def build_config( + *, + model_provider: str = DEFAULT_MODEL_PROVIDER, + model_id: str = DEFAULT_MODEL_ID, + model_alias: str = DEFAULT_MODEL_ALIAS, + image_size: str = "1024", + aspect_ratio: str = "4:3", + max_parallel_requests: int = 10, +) -> dd.DataDesignerConfigBuilder: + """Build an airport baggage-screening image-generation pipeline.""" + model_configs = build_model_configs( + model_provider=model_provider, + model_id=model_id, + model_alias=model_alias, + image_size=image_size, + aspect_ratio=aspect_ratio, + max_parallel_requests=max_parallel_requests, + ) + config_builder = dd.DataDesignerConfigBuilder(model_configs=model_configs) + add_visual_variation_id(config_builder) + + add_category( + config_builder, + "scanner_style", + [ + "dual-energy X-ray baggage scan with pseudo-color material mapping", + "computed tomography baggage scan slice rendered as pseudo-color X-ray", + "top-down carry-on baggage screening view", + "side-view checked-bag screening image", + ], + ) + + add_category( + config_builder, + "bag_type", + [ + "small carry-on roller bag", + "soft backpack", + "messenger bag", + "hard-shell suitcase", + "duffel bag", + "camera equipment case", + ], + ) + + add_category( + config_builder, + "bag_density", + [ + "sparse packing with many empty regions", + "moderate packing density", + "dense packing with overlapping objects", + "very dense packing with cluttered object boundaries", + ], + ) + + add_category( + config_builder, + "benign_contents", + [ + "clothing, shoes, toiletries, and paperback books", + "laptop, chargers, headphones, notebooks, and snacks", + "camera body, lenses, batteries, cables, and clothing", + "children's toys, folded clothing, tablet, and water bottle", + "sports gear, towel, shoes, and plastic accessories", + "business travel items, documents, laptop, and power adapters", + ], + ) + + add_category( + config_builder, + "material_mix", + [ + "mostly fabric and plastic with a few small metal objects", + "electronics-heavy bag with cables and batteries", + "mixed organic, plastic, and metal materials", + "mostly low-density organic material with scattered dense regions", + "many small overlapping metal and plastic objects", + ], + ) + + add_category( + config_builder, + "threat_type", + [ + "none - clear benign bag with no threat-like visual pattern", + "dense electronics cluster requiring secondary review", + "oversized liquid-container-like region requiring secondary review", + "sharp-object-like silhouette requiring secondary review", + "unknown dense object requiring secondary review", + "clutter and overlapping objects preventing confident clearance", + "organic anomaly requiring secondary review", + "ambiguous tool-like silhouette requiring secondary review", + ], + ) + + add_category( + config_builder, + "image_quality", + [ + "clean scanner output with crisp object boundaries", + "slightly noisy scanner output", + "low-contrast scan with compressed dynamic range", + "scan with mild motion blur", + "scan with color palette shifted toward orange and blue material classes", + ], + ) + + config_builder.add_column( + dd.ImageColumnConfig( + name="baggage_scan", + prompt=AIRPORT_SECURITY_SCAN_PROMPT, + model_alias=model_alias, + ) + ) + + return config_builder + + +def create_dataset( + config_builder: dd.DataDesignerConfigBuilder, + *, + num_records: int, + dataset_name: str, + artifact_path: Path | str | None = None, +) -> DatasetCreationResults: + data_designer = DataDesigner(artifact_path=artifact_path) + data_designer.validate(config_builder) + return data_designer.create(config_builder, num_records=num_records, dataset_name=dataset_name) + + +AIRPORT_SECURITY_SCAN_PROMPT = """\ +Create a synthetic airport baggage-screening training image that shows only the scan content. + +Image requirements: +- Visual variation ID, for internal diversity only: {{ visual_variation_id }} +- Scanner style: {{ scanner_style }} +- Bag type: {{ bag_type }} +- Bag density: {{ bag_density }} +- Benign contents: {{ benign_contents }} +- Material mix: {{ material_mix }} +- Threat type metadata target, not text to render: {{ threat_type }} +- Image quality: {{ image_quality }} + +Render the image as a realistic pseudo-color baggage scan, not a normal photo. +Show overlapping objects, material-color variation, partial occlusion, and +scanner-like attenuation. The image should be useful for defensive model +development and human-review training. + +Generate exactly one final scan image for this row. Do not return alternate +versions, a grid, a pair of examples, a before/after image, multiple scans, or +multiple panels. Use the visual variation ID only as an internal diversity key +for object placement, scanner angle, and material pattern; never render it as +text. + +The output must be the scan image only. Do not add labels, legends, captions, +classification text, bounding boxes, arrows, callouts, segmentation overlays, +heatmaps, UI panels, scanner controls, watermarks, timestamps, filenames, row +IDs, colored outlines, or any additional layer of text. Do not include +operational airport details, real airport names, passenger names, barcodes, +boarding passes, bypass instructions, or anything that describes how to hide or +evade detection. Use the threat type only to shape the broad visual contents of +the bag scan. +""" + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description="Generate synthetic airport baggage-screening images.") + parser.add_argument("--num-records", type=int, default=10, help="Number of baggage scan images to generate.") + parser.add_argument("--dataset-name", default="synthetic-baggage-screening-scans", help="Output dataset name.") + parser.add_argument("--artifact-path", type=Path, default=None, help="Optional Data Designer artifact directory.") + parser.add_argument("--model-provider", default=DEFAULT_MODEL_PROVIDER, help="Image model provider name.") + parser.add_argument("--model-id", default=DEFAULT_MODEL_ID, help="Provider model ID.") + parser.add_argument("--model-alias", default=DEFAULT_MODEL_ALIAS, help="Alias used by image columns.") + parser.add_argument("--image-size", default="1024", help="Provider-specific image size value.") + parser.add_argument("--aspect-ratio", default="4:3", help="Provider-specific aspect ratio value.") + parser.add_argument("--max-parallel-requests", type=int, default=10, help="Maximum parallel image requests.") + return parser.parse_args() + + +def main() -> None: + args = parse_args() + config_builder = build_config( + model_provider=args.model_provider, + model_id=args.model_id, + model_alias=args.model_alias, + image_size=args.image_size, + aspect_ratio=args.aspect_ratio, + max_parallel_requests=args.max_parallel_requests, + ) + results = create_dataset( + config_builder, + num_records=args.num_records, + dataset_name=args.dataset_name, + artifact_path=args.artifact_path, + ) + dataset = results.load_dataset() + print(f"Generated {len(dataset)} synthetic baggage-screening rows.") + print(f"Dataset artifacts: {results.artifact_storage.base_dataset_path}") + + +if __name__ == "__main__": + main() diff --git a/fern/assets/recipes/image_generation/drone_aerial_inspection.py b/fern/assets/recipes/image_generation/drone_aerial_inspection.py new file mode 100644 index 000000000..41e0ab1c3 --- /dev/null +++ b/fern/assets/recipes/image_generation/drone_aerial_inspection.py @@ -0,0 +1,308 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# /// script +# requires-python = ">=3.10" +# dependencies = [ +# "data-designer", +# ] +# /// +"""Drone Aerial Inspection Image Generation Recipe + +Generate synthetic low-altitude drone inspection images with controlled +variation over site type, inspection target, altitude, camera angle, defect or +event, severity, occlusion, lighting, and surface condition. + +The recipe is intended for infrastructure inspection, property review, +construction monitoring, disaster-response review, visual QA, reviewer +calibration, and VLM evaluation. It avoids surveillance, military targeting, +evasion, or sensitive-facility prompts. + +Prerequisites: + - An image-generation provider key for the selected model. The defaults use + OpenRouter, so set OPENROUTER_API_KEY before running. + +Run: + uv run drone_aerial_inspection.py --num-records 10 +""" + +from __future__ import annotations + +import argparse +from pathlib import Path + +import data_designer.config as dd +from data_designer.interface import DataDesigner, DatasetCreationResults + +DEFAULT_MODEL_PROVIDER = "openrouter" +DEFAULT_MODEL_ID = "google/gemini-3.1-flash-image-preview" +DEFAULT_MODEL_ALIAS = "drone-aerial-inspection-model" + + +def build_model_configs( + *, + model_provider: str, + model_id: str, + model_alias: str, + image_size: str, + aspect_ratio: str, + max_parallel_requests: int, +) -> list[dd.ModelConfig]: + return [ + dd.ModelConfig( + alias=model_alias, + model=model_id, + provider=model_provider, + inference_parameters=dd.ImageInferenceParams( + extra_body={ + "n": 1, + "generationConfig": { + "imageConfig": { + "aspectRatio": aspect_ratio, + "imageSize": image_size, + } + }, + }, + max_parallel_requests=max_parallel_requests, + ), + skip_health_check=True, + ) + ] + + +def add_category(config_builder: dd.DataDesignerConfigBuilder, name: str, values: list[str]) -> None: + config_builder.add_column( + dd.SamplerColumnConfig( + name=name, + sampler_type=dd.SamplerType.CATEGORY, + params=dd.CategorySamplerParams(values=values), + ) + ) + + +def add_visual_variation_id(config_builder: dd.DataDesignerConfigBuilder) -> None: + """Add a unique row-level key that discourages duplicate image generations.""" + config_builder.add_column( + dd.SamplerColumnConfig( + name="visual_variation_id", + sampler_type=dd.SamplerType.UUID, + params=dd.UUIDSamplerParams(prefix="drone-", short_form=True), + ) + ) + + +def build_config( + *, + model_provider: str = DEFAULT_MODEL_PROVIDER, + model_id: str = DEFAULT_MODEL_ID, + model_alias: str = DEFAULT_MODEL_ALIAS, + image_size: str = "1024", + aspect_ratio: str = "16:9", + max_parallel_requests: int = 10, +) -> dd.DataDesignerConfigBuilder: + model_configs = build_model_configs( + model_provider=model_provider, + model_id=model_id, + model_alias=model_alias, + image_size=image_size, + aspect_ratio=aspect_ratio, + max_parallel_requests=max_parallel_requests, + ) + config_builder = dd.DataDesignerConfigBuilder(model_configs=model_configs) + add_visual_variation_id(config_builder) + + add_category( + config_builder, + "site_type", + [ + "residential roof and yard", + "commercial flat roof", + "bridge deck and support structure", + "rail corridor and track bed", + "solar farm rows", + "wind turbine tower and blades", + "construction site", + "roadway and drainage culvert", + "utility pipeline corridor", + "storm-affected neighborhood street", + ], + ) + add_category( + config_builder, + "inspection_target", + [ + "roof covering condition", + "surface cracking", + "standing water", + "vegetation encroachment", + "panel or blade damage", + "debris blocking access", + "material staging progress", + "erosion around infrastructure", + "storm or hail damage", + "construction progress milestone", + ], + ) + add_category( + config_builder, + "altitude", + [ + "very low drone pass, about 10 meters above the target", + "low drone pass, about 25 meters above the target", + "medium drone pass, about 60 meters above the target", + "higher overview pass, about 100 meters above the target", + ], + ) + add_category( + config_builder, + "camera_angle", + [ + "straight-down nadir view", + "oblique 45-degree inspection angle", + "shallow side-looking pass", + "close detail view with wide-angle lens", + "overview frame with the target centered", + ], + ) + add_category( + config_builder, + "defect_or_event", + [ + "no visible issue, normal baseline condition", + "small crack or seam separation", + "moderate staining or water pooling", + "missing roof shingles or damaged surface panels", + "debris scattered across the inspection area", + "vegetation growth obscuring part of the asset", + "erosion or washout near an edge", + "construction material staged in the wrong zone", + "storm damage with displaced objects", + "surface discoloration that may be benign", + ], + ) + add_category( + config_builder, + "severity", + [ + "none", + "minor and easy to miss", + "moderate and localized", + "severe and clearly visible", + ], + ) + add_category( + config_builder, + "occlusion", + [ + "clear unobstructed view", + "partially occluded by tree branches", + "partially occluded by shadows", + "partially occluded by temporary equipment", + "motion blur from drone movement", + ], + ) + add_category( + config_builder, + "weather_lighting", + [ + "bright midday sun", + "soft overcast light", + "golden hour light with long shadows", + "after-rain wet surfaces", + "hazy light with reduced contrast", + ], + ) + + config_builder.add_column( + dd.ImageColumnConfig( + name="drone_inspection_image", + prompt=DRONE_AERIAL_INSPECTION_PROMPT, + model_alias=model_alias, + ) + ) + + return config_builder + + +def create_dataset( + config_builder: dd.DataDesignerConfigBuilder, + *, + num_records: int, + dataset_name: str, + artifact_path: Path | str | None = None, +) -> DatasetCreationResults: + data_designer = DataDesigner(artifact_path=artifact_path) + data_designer.validate(config_builder) + return data_designer.create(config_builder, num_records=num_records, dataset_name=dataset_name) + + +DRONE_AERIAL_INSPECTION_PROMPT = """\ +Create a realistic low-altitude drone inspection image. + +Image requirements: +- Visual variation ID, for internal diversity only: {{ visual_variation_id }} +- Site type: {{ site_type }} +- Inspection target: {{ inspection_target }} +- Altitude: {{ altitude }} +- Camera angle: {{ camera_angle }} +- Defect or event: {{ defect_or_event }} +- Severity: {{ severity }} +- Occlusion: {{ occlusion }} +- Weather and lighting: {{ weather_lighting }} + +Render the image as if captured by a civilian inspection drone, not a satellite +or normal ground camera. Make the inspection target and requested defect, +event, baseline condition, or confounder visible enough for visual QA, +reviewer calibration, or VLM evaluation. Show realistic materials, shadows, +scale, surfaces, construction context, vegetation, drainage, roof texture, +panels, tracks, roads, or structural elements when requested. + +Render this as a clean raw drone camera frame. Do not include surveillance UI, +inspection report graphics, HUD elements, map overlays, crosshairs, targeting +reticles, bounding boxes, segmentation masks, heatmap colors, arrows, callouts, +measurement graphics, labels, timestamps, coordinates, real place names, +readable license plates, identifiable people, faces, watermarks, or any text +overlay. Do not frame it as a military, police, or sensitive-facility image. +Generate exactly one final drone inspection image for this row. Do not return +alternate versions, a grid, a pair of examples, before/after panels, or multiple +frames. Use the visual variation ID only as an internal diversity key; never +render it as text. +""" + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description="Generate synthetic drone aerial inspection imagery.") + parser.add_argument("--num-records", type=int, default=10, help="Number of drone inspection images to generate.") + parser.add_argument("--dataset-name", default="drone-aerial-inspection", help="Output dataset name.") + parser.add_argument("--artifact-path", type=Path, default=None, help="Optional Data Designer artifact directory.") + parser.add_argument("--model-provider", default=DEFAULT_MODEL_PROVIDER, help="Image model provider name.") + parser.add_argument("--model-id", default=DEFAULT_MODEL_ID, help="Provider model ID.") + parser.add_argument("--model-alias", default=DEFAULT_MODEL_ALIAS, help="Alias used by image columns.") + parser.add_argument("--image-size", default="1024", help="Provider-specific image size value.") + parser.add_argument("--aspect-ratio", default="16:9", help="Provider-specific aspect ratio value.") + parser.add_argument("--max-parallel-requests", type=int, default=10, help="Maximum parallel image requests.") + return parser.parse_args() + + +def main() -> None: + args = parse_args() + config_builder = build_config( + model_provider=args.model_provider, + model_id=args.model_id, + model_alias=args.model_alias, + image_size=args.image_size, + aspect_ratio=args.aspect_ratio, + max_parallel_requests=args.max_parallel_requests, + ) + results = create_dataset( + config_builder, + num_records=args.num_records, + dataset_name=args.dataset_name, + artifact_path=args.artifact_path, + ) + dataset = results.load_dataset() + print(f"Generated {len(dataset)} drone aerial inspection image rows.") + print(f"Dataset artifacts: {results.artifact_storage.base_dataset_path}") + + +if __name__ == "__main__": + main() diff --git a/fern/assets/recipes/image_generation/funny_pet_image_edits.py b/fern/assets/recipes/image_generation/funny_pet_image_edits.py new file mode 100644 index 000000000..539ffdcb6 --- /dev/null +++ b/fern/assets/recipes/image_generation/funny_pet_image_edits.py @@ -0,0 +1,404 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# /// script +# requires-python = ">=3.10" +# dependencies = [ +# "data-designer", +# ] +# /// +"""Funny Pet Image Editing Recipe + +Generate a base synthetic dog or cat image, then use image-to-image generation +to make the same pet scene funnier while preserving the pet's identity. + +Use this as a playful example of text-to-image followed by image-to-image: +generate a controlled reference image, edit it with additional sampled +conditions, then keep both the image and metadata for visual QA, judge +development, creative review, demos, and model-capability exploration. + +Prerequisites: + - An image-generation provider key for a model that supports image-to-image + editing through the chat-completions route. The defaults use OpenRouter + and Gemini 3.1 Flash Image Preview, so set OPENROUTER_API_KEY before running. + +Run: + uv run funny_pet_image_edits.py --num-records 10 +""" + +from __future__ import annotations + +import argparse +from pathlib import Path + +import data_designer.config as dd +from data_designer.interface import DataDesigner, DatasetCreationResults + +DEFAULT_MODEL_PROVIDER = "openrouter" +DEFAULT_MODEL_ID = "google/gemini-3.1-flash-image-preview" +DEFAULT_MODEL_ALIAS = "funny-pet-image-model" + + +def build_model_configs( + *, + model_provider: str, + model_id: str, + model_alias: str, + image_size: str, + aspect_ratio: str, + max_parallel_requests: int, +) -> list[dd.ModelConfig]: + """Build an image model config for text-to-image and image-to-image generation.""" + return [ + dd.ModelConfig( + alias=model_alias, + model=model_id, + provider=model_provider, + inference_parameters=dd.ImageInferenceParams( + extra_body={ + "n": 1, + "generationConfig": { + "imageConfig": { + "aspectRatio": aspect_ratio, + "imageSize": image_size, + } + }, + }, + max_parallel_requests=max_parallel_requests, + ), + skip_health_check=True, + ) + ] + + +def add_category(config_builder: dd.DataDesignerConfigBuilder, name: str, values: list[str]) -> None: + """Add a categorical sampler column.""" + config_builder.add_column( + dd.SamplerColumnConfig( + name=name, + sampler_type=dd.SamplerType.CATEGORY, + params=dd.CategorySamplerParams(values=values), + ) + ) + + +def add_visual_variation_id(config_builder: dd.DataDesignerConfigBuilder) -> None: + """Add a unique row-level key that discourages duplicate image generations.""" + config_builder.add_column( + dd.SamplerColumnConfig( + name="visual_variation_id", + sampler_type=dd.SamplerType.UUID, + params=dd.UUIDSamplerParams(prefix="pet-", short_form=True), + ) + ) + + +def build_config( + *, + model_provider: str = DEFAULT_MODEL_PROVIDER, + model_id: str = DEFAULT_MODEL_ID, + model_alias: str = DEFAULT_MODEL_ALIAS, + image_size: str = "1024", + aspect_ratio: str = "4:3", + max_parallel_requests: int = 10, +) -> dd.DataDesignerConfigBuilder: + """Build a funny pet text-to-image plus image-to-image pipeline.""" + model_configs = build_model_configs( + model_provider=model_provider, + model_id=model_id, + model_alias=model_alias, + image_size=image_size, + aspect_ratio=aspect_ratio, + max_parallel_requests=max_parallel_requests, + ) + config_builder = dd.DataDesignerConfigBuilder(model_configs=model_configs) + add_visual_variation_id(config_builder) + + add_category( + config_builder, + "pet_type", + [ + "dog", + "cat", + ], + ) + config_builder.add_column( + dd.SamplerColumnConfig( + name="pet_breed", + sampler_type=dd.SamplerType.SUBCATEGORY, + params=dd.SubcategorySamplerParams( + category="pet_type", + values={ + "dog": [ + "German shepherd", + "golden retriever", + "Pembroke Welsh corgi", + "French bulldog", + "Shih Tzu", + "beagle", + "border collie", + "mixed-breed terrier", + ], + "cat": [ + "orange tabby", + "black-and-white tuxedo cat", + "gray tabby", + "calico", + "Siamese cat", + "Maine Coon", + "British shorthair", + "domestic longhair", + ], + }, + ), + ) + ) + config_builder.add_column( + dd.SamplerColumnConfig( + name="pet_age", + sampler_type=dd.SamplerType.SUBCATEGORY, + params=dd.SubcategorySamplerParams( + category="pet_type", + values={ + "dog": [ + "puppy, 6 to 18 months old", + "young adult dog, 1 to 4 years old", + "adult dog, 4 to 7 years old", + "senior dog, 8 years or older", + ], + "cat": [ + "kitten, 4 to 12 months old", + "young adult cat, 1 to 4 years old", + "adult cat, 4 to 10 years old", + "senior cat, 11 years or older", + ], + }, + ), + ) + ) + add_category( + config_builder, + "base_activity", + [ + "sitting proudly at a small table", + "peeking over the edge of a sofa", + "standing on a kitchen chair", + "posing beside a cardboard box", + "lounging on a soft rug", + "looking directly at the camera with dramatic seriousness", + "balanced calmly beside a pile of toys", + ], + ) + add_category( + config_builder, + "base_setting", + [ + "sunny living room", + "cozy home office", + "tidy kitchen corner", + "soft studio backdrop", + "laundry room with folded towels", + "small apartment balcony with plants", + "quiet reading nook", + ], + ) + add_category( + config_builder, + "pet_expression", + [ + "deeply serious expression", + "wide-eyed confused expression", + "proud little smirk", + "sleepy but determined expression", + "mildly offended expression", + "curious head tilt", + ], + ) + add_category( + config_builder, + "base_photo_style", + [ + "natural phone photo with soft daylight", + "clean studio portrait with gentle shadows", + "warm editorial pet portrait", + "slightly low-angle comedic portrait", + "documentary-style candid photo", + ], + ) + add_category( + config_builder, + "comedy_edit_goal", + [ + "stage the pet as a tiny orchestra conductor for squeaky toys", + "stage the pet as a very serious chef inspecting a tiny bowl", + "stage the pet as a cardboard-spaceship pilot with abstract controls", + "stage the pet as a detective following a harmless trail of snack crumbs", + "stage the pet as a living-room sports champion with a tiny trophy", + "stage the pet as a tiny gardener supervising toy plants", + "stage the pet as a blanket-cape superhero in a cozy room", + "stage the pet as a toy stage performer under a tiny spotlight", + ], + ) + add_category( + config_builder, + "funny_prop", + [ + "tiny oversized glasses", + "miniature necktie", + "small chef hat", + "toy conductor baton", + "miniature trophy with no writing", + "blank tiny clipboard with no writing", + "cardboard rocket dashboard with colored circles only", + "toy magnifying glass", + "small paper crown", + "tiny blanket cape", + ], + ) + add_category( + config_builder, + "scene_escalation", + [ + "add a neatly arranged set of miniature props around the pet", + "add a playful spotlight and dramatic shadows", + "add a tiny stage setup made from household objects", + "add confetti-like paper shapes on the floor", + "add a pretend control panel made only of colored circles and blank buttons", + "add an audience of plush toys in the background", + "add a whimsical but tidy tabletop set", + "add toy vegetables, an empty bowl, and a tiny spoon", + "add squeaky toys arranged like an orchestra", + ], + ) + add_category( + config_builder, + "humor_style", + [ + "deadpan absurdity", + "cozy wholesome comedy", + "overly dramatic tiny-professional energy", + "gentle visual slapstick without distress", + "storybook-level silliness", + ], + ) + + config_builder.add_column( + dd.ImageColumnConfig( + name="base_pet_image", + prompt=BASE_PET_IMAGE_PROMPT, + model_alias=model_alias, + ) + ) + + config_builder.add_column( + dd.ImageColumnConfig( + name="funny_pet_image", + prompt=FUNNY_PET_EDIT_PROMPT, + model_alias=model_alias, + multi_modal_context=[dd.ImageContext(column_name="base_pet_image")], + ) + ) + + return config_builder + + +def create_dataset( + config_builder: dd.DataDesignerConfigBuilder, + *, + num_records: int, + dataset_name: str, + artifact_path: Path | str | None = None, +) -> DatasetCreationResults: + data_designer = DataDesigner(artifact_path=artifact_path) + data_designer.validate(config_builder) + return data_designer.create(config_builder, num_records=num_records, dataset_name=dataset_name) + + +BASE_PET_IMAGE_PROMPT = """\ +Create a realistic synthetic pet photo. + +Image requirements: +- Visual variation ID, for internal diversity only: {{ visual_variation_id }} +- Pet type: {{ pet_type }} +- Pet breed: {{ pet_breed }} +- Pet age: {{ pet_age }} +- Base activity: {{ base_activity }} +- Base setting: {{ base_setting }} +- Pet expression: {{ pet_expression }} +- Photo style: {{ base_photo_style }} + +Show exactly one healthy, comfortable {{ pet_age }} {{ pet_breed }}. The pet +should be the clear subject, fully visible enough to edit later, and safely +posed in a harmless indoor or domestic setting. Use realistic fur, eyes, +proportions, lighting, shadows, and background details appropriate for the pet +type, breed, and age. Do not include text overlays, real brand logos, +watermarks, captions, speech bubbles, unsafe handling, costumes that restrict +movement, or distressed expressions. Generate exactly one final image for this +row. Use the visual variation ID only as an internal diversity key; never +render it as text. +""" + + +FUNNY_PET_EDIT_PROMPT = """\ +Edit the provided pet image to make the scene funnier while preserving the same pet. + +Edit requirements: +- Visual variation ID, for internal diversity only: {{ visual_variation_id }} +- Comedy edit goal: {{ comedy_edit_goal }} +- Funny prop: {{ funny_prop }} +- Scene escalation: {{ scene_escalation }} +- Humor style: {{ humor_style }} + +Preserve the same pet identity from the reference image: same species, fur +color, markings, face, body size, expression family, and core pose. Keep the +pet safe, comfortable, healthy, and not distressed. Add playful props, +background details, or scene context that make the image funnier, but keep the +result as one coherent photo-like image rather than a collage. + +Do not change the pet into a different animal, add extra pets, add humans, +add speech bubbles, add readable text, add letters, add numbers, add real brand +logos, add watermarks, show unsafe handling, show distress, or make the prop +appear tight, restrictive, or uncomfortable. If papers, signs, screens, labels, +chalkboards, books, control panels, or trophies appear, they must be blank or +use abstract colored shapes only. Generate exactly one final edited image for +this row. Use the visual variation ID only as an internal diversity key; never +render it as text. +""" + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description="Generate funny pet image edits.") + parser.add_argument("--num-records", type=int, default=10, help="Number of funny pet image rows to generate.") + parser.add_argument("--dataset-name", default="funny-pet-image-edits", help="Output dataset name.") + parser.add_argument("--artifact-path", type=Path, default=None, help="Optional Data Designer artifact directory.") + parser.add_argument("--model-provider", default=DEFAULT_MODEL_PROVIDER, help="Image model provider name.") + parser.add_argument("--model-id", default=DEFAULT_MODEL_ID, help="Provider model ID.") + parser.add_argument("--model-alias", default=DEFAULT_MODEL_ALIAS, help="Alias used by image columns.") + parser.add_argument("--image-size", default="1024", help="Provider-specific image size value.") + parser.add_argument("--aspect-ratio", default="4:3", help="Provider-specific aspect ratio value.") + parser.add_argument("--max-parallel-requests", type=int, default=10, help="Maximum parallel image requests.") + return parser.parse_args() + + +def main() -> None: + args = parse_args() + config_builder = build_config( + model_provider=args.model_provider, + model_id=args.model_id, + model_alias=args.model_alias, + image_size=args.image_size, + aspect_ratio=args.aspect_ratio, + max_parallel_requests=args.max_parallel_requests, + ) + results = create_dataset( + config_builder, + num_records=args.num_records, + dataset_name=args.dataset_name, + artifact_path=args.artifact_path, + ) + dataset = results.load_dataset() + print(f"Generated {len(dataset)} funny pet image-edit rows.") + print(f"Dataset artifacts: {results.artifact_storage.base_dataset_path}") + + +if __name__ == "__main__": + main() diff --git a/fern/assets/recipes/image_generation/humanoid_robot_scene_understanding.py b/fern/assets/recipes/image_generation/humanoid_robot_scene_understanding.py new file mode 100644 index 000000000..e293ce8f5 --- /dev/null +++ b/fern/assets/recipes/image_generation/humanoid_robot_scene_understanding.py @@ -0,0 +1,315 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# /// script +# requires-python = ">=3.10" +# dependencies = [ +# "data-designer", +# ] +# /// +"""Humanoid Robot Scene Understanding Image Generation Recipe + +Generate synthetic egocentric humanoid robot images with controlled variation +over indoor environment, robot viewpoint, task goal, object set, scene state, +safety condition, lighting, and adult human presence. + +Use the generated images for embodied-AI scene understanding, visual QA, +reviewer calibration, safety review, and robotics demos where the image should +look like a frame captured from the robot's own camera in a controlled setting. + +Prerequisites: + - An image-generation provider key for the selected model. The defaults use + OpenRouter, so set OPENROUTER_API_KEY before running. + +Run: + uv run humanoid_robot_scene_understanding.py --num-records 10 +""" + +from __future__ import annotations + +import argparse +from pathlib import Path + +import data_designer.config as dd +from data_designer.interface import DataDesigner, DatasetCreationResults + +DEFAULT_MODEL_PROVIDER = "openrouter" +DEFAULT_MODEL_ID = "google/gemini-3.1-flash-image-preview" +DEFAULT_MODEL_ALIAS = "humanoid-scene-model" + + +def build_model_configs( + *, + model_provider: str, + model_id: str, + model_alias: str, + image_size: str, + aspect_ratio: str, + max_parallel_requests: int, +) -> list[dd.ModelConfig]: + return [ + dd.ModelConfig( + alias=model_alias, + model=model_id, + provider=model_provider, + inference_parameters=dd.ImageInferenceParams( + extra_body={ + "n": 1, + "generationConfig": { + "imageConfig": { + "aspectRatio": aspect_ratio, + "imageSize": image_size, + } + }, + }, + max_parallel_requests=max_parallel_requests, + ), + skip_health_check=True, + ) + ] + + +def add_category(config_builder: dd.DataDesignerConfigBuilder, name: str, values: list[str]) -> None: + config_builder.add_column( + dd.SamplerColumnConfig( + name=name, + sampler_type=dd.SamplerType.CATEGORY, + params=dd.CategorySamplerParams(values=values), + ) + ) + + +def add_visual_variation_id(config_builder: dd.DataDesignerConfigBuilder) -> None: + """Add a unique row-level key that discourages duplicate image generations.""" + config_builder.add_column( + dd.SamplerColumnConfig( + name="visual_variation_id", + sampler_type=dd.SamplerType.UUID, + params=dd.UUIDSamplerParams(prefix="humanoid-", short_form=True), + ) + ) + + +def build_config( + *, + model_provider: str = DEFAULT_MODEL_PROVIDER, + model_id: str = DEFAULT_MODEL_ID, + model_alias: str = DEFAULT_MODEL_ALIAS, + image_size: str = "1024", + aspect_ratio: str = "16:9", + max_parallel_requests: int = 10, +) -> dd.DataDesignerConfigBuilder: + model_configs = build_model_configs( + model_provider=model_provider, + model_id=model_id, + model_alias=model_alias, + image_size=image_size, + aspect_ratio=aspect_ratio, + max_parallel_requests=max_parallel_requests, + ) + config_builder = dd.DataDesignerConfigBuilder(model_configs=model_configs) + add_visual_variation_id(config_builder) + + add_category( + config_builder, + "environment", + [ + "teaching kitchen with counters, cabinets, and everyday objects", + "mock apartment living room arranged for assistive robotics", + "assisted living bedroom with bedside table and mobility aids", + "robotics lab workbench with tools and calibration objects", + "retail stockroom with shelves, totes, and handheld items", + "hospital supply room with carts, bins, and sealed supplies", + "office break room with appliances, tableware, and waste bins", + "laundry room with baskets, detergent, shelves, and folded towels", + "tool bench training area with bins, fasteners, and hand tools", + "grocery practice aisle with shelves, baskets, and fallen items", + ], + ) + add_category( + config_builder, + "robot_viewpoint", + [ + "head-mounted camera at standing adult height", + "chest-mounted camera with both robot hands barely visible at the bottom edge", + "slightly downward gaze toward a tabletop work surface", + "close manipulation view with one robot hand near the target object", + "wide room scan from a doorway before entering the scene", + "low crouched inspection angle looking under a table or cart", + ], + ) + add_category( + config_builder, + "task_goal", + [ + "locate the requested object before reaching", + "judge whether the path is safe to walk through", + "identify which objects are reachable from the current pose", + "verify that a cleanup task is complete", + "prepare a clear handoff area for an adult user", + "find the missing tool or supply item", + "inspect a spill or obstacle before moving closer", + "decide whether fragile items are too close to an edge", + ], + ) + add_category( + config_builder, + "object_set", + [ + "mug, kettle, sponge, dish towel, and cereal bowl", + "water glass, medication organizer, tissue box, and walking cane", + "pipette rack, beaker, nitrile gloves, and small screwdriver", + "barcode scanner, tote, tape dispenser, folded shirt, and box cutter", + "laundry basket, detergent bottle, folded towels, and loose sock", + "pliers, hex keys, small bolts, tape measure, and plastic bins", + "shopping basket, cereal boxes, soup cans, and fallen fruit", + "meal tray, sealed supplies, clipboard, and rolling cart", + ], + ) + add_category( + config_builder, + "scene_state", + [ + "organized and ready for the task", + "moderately cluttered but navigable", + "target object partly occluded by other items", + "target object moved to an unexpected location", + "container open with mixed contents visible", + "fragile item near the table edge", + "object stack unstable but still standing", + "task area partly blocked by a chair or cart", + ], + ) + add_category( + config_builder, + "safety_condition", + [ + "no visible hazard", + "small liquid spill on the floor", + "power cable crossing the walking path", + "sharp tool exposed on the work surface", + "hot appliance indicator light visible", + "glass object on the floor near the path", + "drawer left open at knee height", + "rolling cart partially blocking the doorway", + ], + ) + add_category( + config_builder, + "human_presence", + [ + "no person visible", + "adult worker's gloved hands visible at a safe distance", + "adult caregiver standing in the background with face turned away", + "adult shopper passing through the background, not identifiable", + "adult lab worker partially visible from shoulders down", + "adult office worker's arm visible near the handoff area", + ], + ) + add_category( + config_builder, + "lighting", + [ + "bright even lab lighting", + "warm apartment lighting", + "overcast window light", + "mixed overhead and task lighting", + "dim hallway light with localized task lamp", + "high-contrast backlighting from a nearby window", + ], + ) + + config_builder.add_column( + dd.ImageColumnConfig( + name="humanoid_scene_image", + prompt=HUMANOID_SCENE_PROMPT, + model_alias=model_alias, + ) + ) + + return config_builder + + +def create_dataset( + config_builder: dd.DataDesignerConfigBuilder, + *, + num_records: int, + dataset_name: str, + artifact_path: Path | str | None = None, +) -> DatasetCreationResults: + data_designer = DataDesigner(artifact_path=artifact_path) + data_designer.validate(config_builder) + return data_designer.create(config_builder, num_records=num_records, dataset_name=dataset_name) + + +HUMANOID_SCENE_PROMPT = """\ +Create a realistic egocentric humanoid robot scene-understanding image. + +The frame must look like it was captured from the humanoid robot's own camera +inside a controlled indoor environment. Show the robot's viewpoint clearly: +camera height, reachable workspace, path geometry, task-relevant objects, +obstacles, and safety condition should all be visible enough for visual QA or +embodied-AI scene understanding. If the viewpoint mentions robot hands, show at +most one or two simple robot hands at the image edge; do not make the robot the +main subject. + +Scene requirements: +- Visual variation ID, for internal diversity only: {{ visual_variation_id }} +- Environment: {{ environment }} +- Robot viewpoint: {{ robot_viewpoint }} +- Task goal: {{ task_goal }} +- Object set: {{ object_set }} +- Scene state: {{ scene_state }} +- Safety condition: {{ safety_condition }} +- Human presence: {{ human_presence }} +- Lighting: {{ lighting }} + +Make the requested task goal, object set, scene state, and safety condition +visually legible without adding labels or annotation graphics. Use realistic +materials, clutter, occlusion, reachability cues, shadows, and indoor scale. + +Do not include children, identifiable faces, readable personal names, real +company logos, surveillance UI, bounding boxes, arrows, captions, labels, +watermarks, subtitles, HUD overlays, or diagnostic text. Generate exactly one +final camera frame for this row. Do not return alternate versions, a grid, a +pair of examples, before/after panels, or multiple frames. Use the visual +variation ID only as an internal diversity key; never render it as text. +""" + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description="Generate synthetic humanoid robot scene-understanding images.") + parser.add_argument("--num-records", type=int, default=10, help="Number of humanoid scene images to generate.") + parser.add_argument("--dataset-name", default="humanoid-robot-scene-understanding", help="Output dataset name.") + parser.add_argument("--artifact-path", type=Path, default=None, help="Optional Data Designer artifact directory.") + parser.add_argument("--model-provider", default=DEFAULT_MODEL_PROVIDER, help="Image model provider name.") + parser.add_argument("--model-id", default=DEFAULT_MODEL_ID, help="Provider model ID.") + parser.add_argument("--model-alias", default=DEFAULT_MODEL_ALIAS, help="Alias used by image columns.") + parser.add_argument("--image-size", default="1024", help="Provider-specific image size value.") + parser.add_argument("--aspect-ratio", default="16:9", help="Provider-specific aspect ratio value.") + parser.add_argument("--max-parallel-requests", type=int, default=10, help="Maximum parallel image requests.") + return parser.parse_args() + + +def main() -> None: + args = parse_args() + config_builder = build_config( + model_provider=args.model_provider, + model_id=args.model_id, + model_alias=args.model_alias, + image_size=args.image_size, + aspect_ratio=args.aspect_ratio, + max_parallel_requests=args.max_parallel_requests, + ) + results = create_dataset( + config_builder, + num_records=args.num_records, + dataset_name=args.dataset_name, + artifact_path=args.artifact_path, + ) + dataset = results.load_dataset() + print(f"Generated {len(dataset)} humanoid robot scene-understanding rows.") + print(f"Dataset artifacts: {results.artifact_storage.base_dataset_path}") + + +if __name__ == "__main__": + main() diff --git a/fern/assets/recipes/image_generation/medical_extremity_xrays.py b/fern/assets/recipes/image_generation/medical_extremity_xrays.py new file mode 100644 index 000000000..1e4faa076 --- /dev/null +++ b/fern/assets/recipes/image_generation/medical_extremity_xrays.py @@ -0,0 +1,382 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# /// script +# requires-python = ">=3.10" +# dependencies = [ +# "data-designer", +# ] +# /// +"""Synthetic Extremity X-ray Image Generation Recipe + +Generate synthetic extremity X-ray style images with controlled variation over +anatomical region, view, imaging context, technical quality, and musculoskeletal +findings. + +Medical disclaimer: + These generated images are synthetic and intended only for AI research, + education, data-pipeline prototyping, and evaluation workflows. They are not + real medical images and must not be used for diagnosis, treatment planning, + clinical decision-making, or as a substitute for real clinical validation. + +Prerequisites: + - An image-generation provider key for the selected model. The defaults use + OpenRouter, so set OPENROUTER_API_KEY before running. + +Run: + uv run medical_extremity_xrays.py --num-records 5 +""" + +from __future__ import annotations + +import argparse +from pathlib import Path + +import data_designer.config as dd +from data_designer.interface import DataDesigner, DatasetCreationResults + +DEFAULT_MODEL_PROVIDER = "openrouter" +DEFAULT_MODEL_ID = "google/gemini-3.1-flash-image-preview" +DEFAULT_MODEL_ALIAS = "medical-image-model" + + +def build_model_configs( + *, + model_provider: str, + model_id: str, + model_alias: str, + image_size: str, + aspect_ratio: str, + max_parallel_requests: int, +) -> list[dd.ModelConfig]: + """Build a provider-agnostic image-generation model config.""" + return [ + dd.ModelConfig( + alias=model_alias, + model=model_id, + provider=model_provider, + inference_parameters=dd.ImageInferenceParams( + extra_body={ + "n": 1, + "generationConfig": { + "imageConfig": { + "aspectRatio": aspect_ratio, + "imageSize": image_size, + } + }, + }, + max_parallel_requests=max_parallel_requests, + ), + skip_health_check=True, + ) + ] + + +def add_category(config_builder: dd.DataDesignerConfigBuilder, name: str, values: list[str]) -> None: + """Add a categorical sampler column.""" + config_builder.add_column( + dd.SamplerColumnConfig( + name=name, + sampler_type=dd.SamplerType.CATEGORY, + params=dd.CategorySamplerParams(values=values), + ) + ) + + +def add_visual_variation_id(config_builder: dd.DataDesignerConfigBuilder) -> None: + """Add a unique row-level key that discourages duplicate image generations.""" + config_builder.add_column( + dd.SamplerColumnConfig( + name="visual_variation_id", + sampler_type=dd.SamplerType.UUID, + params=dd.UUIDSamplerParams(prefix="xray-", short_form=True), + ) + ) + + +def build_config( + *, + model_provider: str = DEFAULT_MODEL_PROVIDER, + model_id: str = DEFAULT_MODEL_ID, + model_alias: str = DEFAULT_MODEL_ALIAS, + image_size: str = "1024", + aspect_ratio: str = "1:1", + max_parallel_requests: int = 10, +) -> dd.DataDesignerConfigBuilder: + """Build a synthetic extremity X-ray image-generation pipeline.""" + model_configs = build_model_configs( + model_provider=model_provider, + model_id=model_id, + model_alias=model_alias, + image_size=image_size, + aspect_ratio=aspect_ratio, + max_parallel_requests=max_parallel_requests, + ) + config_builder = dd.DataDesignerConfigBuilder(model_configs=model_configs) + add_visual_variation_id(config_builder) + + add_category( + config_builder, + "patient_age_group", + [ + "young adult", + "adult", + "middle-aged adult", + "older adult", + "geriatric adult", + ], + ) + + add_category( + config_builder, + "patient_sex", + [ + "female", + "male", + ], + ) + + add_category( + config_builder, + "body_habitus", + [ + "thin build", + "athletic build", + "average build", + "overweight build", + "obese build", + ], + ) + + add_category( + config_builder, + "anatomical_region", + [ + "right shoulder", + "left shoulder", + "right humerus", + "left humerus", + "right elbow", + "left elbow", + "right forearm with radius and ulna", + "left forearm with radius and ulna", + "right wrist", + "left wrist", + "right hand and fingers", + "left hand and fingers", + "right hip", + "left hip", + "right femur", + "left femur", + "right knee", + "left knee", + "right tibia and fibula", + "left tibia and fibula", + "right ankle", + "left ankle", + "right foot and toes", + "left foot and toes", + ], + ) + + add_category( + config_builder, + "equipment_type", + [ + "fixed radiography unit", + "portable X-ray machine", + "digital radiography system", + "computed radiography system", + ], + ) + + add_category( + config_builder, + "imaging_context", + [ + "emergency department acute trauma", + "emergency department fall injury", + "emergency department sports injury", + "orthopedic clinic routine follow-up", + "post-operative hardware check", + "pre-operative planning", + "urgent care pain evaluation", + ], + ) + + add_category( + config_builder, + "xray_view", + [ + "anteroposterior (AP)", + "lateral", + "oblique internal rotation", + "oblique external rotation", + "weight-bearing AP", + "stress view", + ], + ) + + add_category( + config_builder, + "exposure_quality", + [ + "underexposed with cortical margins poorly defined", + "optimal exposure with clear cortical and trabecular detail", + "overexposed with washed out bone detail", + "low kVp technique with high bone contrast", + "high kVp technique with better soft tissue visualization", + ], + ) + + add_category( + config_builder, + "positioning", + [ + "well-positioned true AP or lateral", + "slightly rotated", + "oblique positioning", + "splint or cast in place", + "traction device visible", + "suboptimal because the patient could not cooperate due to pain", + ], + ) + + add_category( + config_builder, + "primary_finding", + [ + "normal with no acute osseous abnormality", + "nondisplaced fracture through the imaged bone", + "displaced fracture through the imaged bone", + "comminuted fracture involving the imaged bone", + "stress fracture line in the imaged bone", + "joint dislocation or subluxation in the imaged region", + "degenerative osteoarthritis in the imaged joint", + "suspected osteomyelitis with focal cortical destruction", + "soft tissue swelling with no acute fracture identified", + ], + ) + + add_category( + config_builder, + "secondary_findings", + [ + "none", + "osteopenia", + "degenerative joint changes at adjacent joints", + "old healed fracture with callus formation", + "orthopedic plate and screws", + "intramedullary nail", + "joint effusion", + "soft tissue calcifications", + "vascular calcifications", + ], + ) + + add_category( + config_builder, + "image_quality", + [ + "excellent sharp cortical margins and clear trabecular pattern", + "good adequate visualization of all bony structures", + "fair with mild motion artifact", + "fair with mild noise or graininess", + "fair with cast or splint partially obscuring detail", + "limited portable technique with technical limitations", + "limited by patient body habitus", + ], + ) + + config_builder.add_column( + dd.ImageColumnConfig( + name="extremity_xray", + prompt=EXTREMITY_XRAY_PROMPT, + model_alias=model_alias, + ) + ) + + return config_builder + + +def create_dataset( + config_builder: dd.DataDesignerConfigBuilder, + *, + num_records: int, + dataset_name: str, + artifact_path: Path | str | None = None, +) -> DatasetCreationResults: + data_designer = DataDesigner(artifact_path=artifact_path) + data_designer.validate(config_builder) + return data_designer.create(config_builder, num_records=num_records, dataset_name=dataset_name) + + +EXTREMITY_XRAY_PROMPT = """\ +Create a synthetic research-only grayscale X-ray style radiograph of the +{{ anatomical_region }}, {{ xray_view }} view. + +Patient and acquisition context: +- Visual variation ID, for internal diversity only: {{ visual_variation_id }} +- Patient age group: {{ patient_age_group }} +- Patient sex: {{ patient_sex }} +- Body habitus: {{ body_habitus }} +- Equipment: {{ equipment_type }} +- Context: {{ imaging_context }} +- Technical quality: {{ exposure_quality }} +- Positioning: {{ positioning }} +- Image quality: {{ image_quality }} + +Findings to depict: +- Primary finding: {{ primary_finding }} +- Secondary findings: {{ secondary_findings }} + +Use a realistic educational radiograph style with visible bones, joints, cortex, +trabecular pattern, and soft-tissue silhouette. Include standard left/right +markers where appropriate. Make the image look synthetic but useful for AI +research and data-pipeline prototyping. Do not include real patient names, real +medical record numbers, hospital logos, or any real protected health information. +Generate exactly one final radiograph for this row. Do not return alternate +versions, a two-view panel, a grid, a before/after image, duplicated views, or +multiple image candidates. Use the visual variation ID only as an internal +diversity key for anatomy framing, rotation, exposure texture, and soft-tissue +background; never render it as text. Do not add diagnostic captions or +explanatory text overlays. +""" + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description="Generate synthetic extremity X-ray style images.") + parser.add_argument("--num-records", type=int, default=5, help="Number of synthetic X-ray images to generate.") + parser.add_argument("--dataset-name", default="synthetic-extremity-xrays", help="Output dataset name.") + parser.add_argument("--artifact-path", type=Path, default=None, help="Optional Data Designer artifact directory.") + parser.add_argument("--model-provider", default=DEFAULT_MODEL_PROVIDER, help="Image model provider name.") + parser.add_argument("--model-id", default=DEFAULT_MODEL_ID, help="Provider model ID.") + parser.add_argument("--model-alias", default=DEFAULT_MODEL_ALIAS, help="Alias used by image columns.") + parser.add_argument("--image-size", default="1024", help="Provider-specific image size value.") + parser.add_argument("--aspect-ratio", default="1:1", help="Provider-specific aspect ratio value.") + parser.add_argument("--max-parallel-requests", type=int, default=10, help="Maximum parallel image requests.") + return parser.parse_args() + + +def main() -> None: + args = parse_args() + config_builder = build_config( + model_provider=args.model_provider, + model_id=args.model_id, + model_alias=args.model_alias, + image_size=args.image_size, + aspect_ratio=args.aspect_ratio, + max_parallel_requests=args.max_parallel_requests, + ) + results = create_dataset( + config_builder, + num_records=args.num_records, + dataset_name=args.dataset_name, + artifact_path=args.artifact_path, + ) + dataset = results.load_dataset() + print(f"Generated {len(dataset)} synthetic extremity X-ray rows.") + print(f"Dataset artifacts: {results.artifact_storage.base_dataset_path}") + + +if __name__ == "__main__": + main() diff --git a/fern/assets/recipes/image_generation/product_image_variations.py b/fern/assets/recipes/image_generation/product_image_variations.py new file mode 100644 index 000000000..65f96c8f2 --- /dev/null +++ b/fern/assets/recipes/image_generation/product_image_variations.py @@ -0,0 +1,452 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# /// script +# requires-python = ">=3.10" +# dependencies = [ +# "data-designer", +# ] +# /// +"""Product Image Variation Recipe + +Generate a base apparel-on-person catalog image, then create inclusive fashion +catalog variations with an image-to-image model through ImageContext. Use this pattern +for e-commerce apparel variants, fit and styling coverage, marketplace +thumbnails, lookbook imagery, and creative QA workflows. + +For real product images, replace the `base_product_image` generation column with +a seed dataset column containing your product image paths, URLs, or base64 data, +then point `ImageContext(column_name=...)` at that seed column. + +Prerequisites: + - An image-generation provider key for a model that supports image-to-image + editing through the chat-completions route. The defaults use OpenRouter + and Gemini 3.1 Flash Image Preview, so set OPENROUTER_API_KEY before running. + +Run: + uv run product_image_variations.py --num-records 5 +""" + +from __future__ import annotations + +import argparse +from pathlib import Path + +import data_designer.config as dd +from data_designer.interface import DataDesigner, DatasetCreationResults + +DEFAULT_MODEL_PROVIDER = "openrouter" +DEFAULT_MODEL_ID = "google/gemini-3.1-flash-image-preview" +DEFAULT_MODEL_ALIAS = "product-image-model" + + +def build_model_configs( + *, + model_provider: str, + model_id: str, + model_alias: str, + image_size: str, + aspect_ratio: str, + max_parallel_requests: int, +) -> list[dd.ModelConfig]: + """Build an image model config for text-to-image and image-to-image generation.""" + return [ + dd.ModelConfig( + alias=model_alias, + model=model_id, + provider=model_provider, + inference_parameters=dd.ImageInferenceParams( + extra_body={ + "n": 1, + "generationConfig": { + "imageConfig": { + "aspectRatio": aspect_ratio, + "imageSize": image_size, + } + }, + }, + max_parallel_requests=max_parallel_requests, + ), + skip_health_check=True, + ) + ] + + +def add_category( + config_builder: dd.DataDesignerConfigBuilder, + name: str, + values: list[str], + weights: list[float] | None = None, +) -> None: + """Add a categorical sampler column.""" + config_builder.add_column( + dd.SamplerColumnConfig( + name=name, + sampler_type=dd.SamplerType.CATEGORY, + params=dd.CategorySamplerParams(values=values, weights=weights), + ) + ) + + +def add_visual_variation_id(config_builder: dd.DataDesignerConfigBuilder) -> None: + """Add a unique row-level key that discourages duplicate image generations.""" + config_builder.add_column( + dd.SamplerColumnConfig( + name="visual_variation_id", + sampler_type=dd.SamplerType.UUID, + params=dd.UUIDSamplerParams(prefix="apparel-", short_form=True), + ) + ) + + +def build_config( + *, + model_provider: str = DEFAULT_MODEL_PROVIDER, + model_id: str = DEFAULT_MODEL_ID, + model_alias: str = DEFAULT_MODEL_ALIAS, + image_size: str = "1024", + aspect_ratio: str = "3:4", + max_parallel_requests: int = 10, +) -> dd.DataDesignerConfigBuilder: + """Build an apparel product image variation pipeline.""" + model_configs = build_model_configs( + model_provider=model_provider, + model_id=model_id, + model_alias=model_alias, + image_size=image_size, + aspect_ratio=aspect_ratio, + max_parallel_requests=max_parallel_requests, + ) + config_builder = dd.DataDesignerConfigBuilder(model_configs=model_configs) + add_visual_variation_id(config_builder) + + add_category( + config_builder, + "apparel_item", + [ + "organic cotton crewneck t-shirt", + "lightweight denim jacket", + "water-resistant rain jacket", + "relaxed-fit hoodie", + "wide-leg linen trousers", + "ribbed knit cardigan", + "quilted puffer vest", + "stretch woven workwear shirt", + "ankle-length everyday dress", + "adaptive zip-front jacket", + ], + ) + + add_category( + config_builder, + "base_colorway", + [ + "matte black", + "warm white", + "sage green", + "deep navy", + "brushed silver", + "terracotta", + "soft lavender", + "charcoal gray", + "sunflower yellow", + "denim blue", + ], + ) + + add_category( + config_builder, + "base_view", + [ + "front-facing standing full-body catalog photo with one synthetic adult model", + "three-quarter standing full-body catalog photo with one synthetic adult model", + "side-angle standing full-body catalog photo with one synthetic adult model", + "walking-pose full-body catalog photo with one synthetic adult model", + ], + ) + + add_category( + config_builder, + "base_model_profile", + [ + "young adult Black model with an athletic build", + "middle-aged East Asian model with an average build", + "older Latine model with a petite build", + "young adult South Asian model with a plus-size build", + "middle-aged Middle Eastern model with a tall build", + "young adult Indigenous model with a broad-shouldered build", + "older White model with a curvy build", + "young adult multiracial model with a slender build", + ], + ) + + add_category( + config_builder, + "variation_goal", + [ + "inclusive e-commerce catalog image on a clean white background", + "lifestyle lookbook image in an everyday urban setting", + "fit-guide image showing garment drape and silhouette", + "seasonal campaign image for cool-weather layering", + "adaptive-fashion catalog image emphasizing ease of wear", + "single-model adult age-inclusive catalog image", + "social media campaign image with bold colored backdrop", + "editorial fashion image with soft premium lighting", + ], + ) + + add_category( + config_builder, + "edit_scene_delta", + [ + "move from a neutral studio catalog reference into an outdoor urban lifestyle scene", + "move from a neutral studio catalog reference into a warm home entryway lookbook scene", + "move from a neutral studio catalog reference into a bold color-block campaign set", + "move from a neutral studio catalog reference into a smart-casual workplace corridor scene", + "move from a neutral studio catalog reference into a weekend park lookbook scene", + "move from a neutral studio catalog reference into a premium editorial studio set with draped fabric", + "move from a neutral studio catalog reference into a clean fit-guide scene with a new full-body pose", + "move from a neutral studio catalog reference into a seasonal layering scene with visible outerwear styling", + ], + ) + + add_category( + config_builder, + "model_age_group", + [ + "young adult model", + "middle-aged adult model", + "older adult model", + "senior adult model", + ], + ) + + add_category( + config_builder, + "model_ethnicity", + [ + "Black or African diaspora model", + "East Asian model", + "South Asian model", + "Latine model", + "Middle Eastern or North African model", + "Indigenous model", + "Pacific Islander model", + "White or European model", + "multiracial model", + ], + ) + + add_category( + config_builder, + "body_type", + [ + "petite build", + "tall build", + "plus-size build", + "athletic build", + "broad-shouldered build", + "curvy build", + "slender build", + "average build", + ], + ) + + add_category( + config_builder, + "accessibility_context", + [ + "standing model without visible mobility aids", + "model with no specific accessibility cue", + "standing model in a relaxed catalog pose", + "model walking naturally without visible mobility aids", + "model seated on a simple studio stool", + "model leaning lightly against a studio block", + "model holding a small neutral accessory", + "seated model using a wheelchair", + "model with a visible prosthetic limb", + "model using forearm crutches", + ], + weights=[1.6, 1.6, 1.4, 1.4, 0.9, 0.9, 0.9, 0.2, 0.2, 0.2], + ) + + add_category( + config_builder, + "styling_context", + [ + "pure white seamless catalog background", + "soft neutral studio backdrop", + "outdoor morning city street", + "modern home entryway", + "adult campus casual setting", + "workplace smart-casual setting", + "weekend park setting", + "minimal geometric studio set", + ], + ) + + add_category( + config_builder, + "composition", + [ + "front-facing full-body catalog pose with the entire person visible", + "three-quarter full-body pose with the entire person visible", + "single seated full-body pose showing garment fit with the whole body visible", + "single walking full-body pose with natural garment movement", + "side-angle full-body pose with clear garment silhouette", + ], + ) + + add_category( + config_builder, + "lighting", + [ + "softbox studio lighting", + "natural window light", + "bright catalog lighting", + "warm golden-hour lighting", + "soft overcast outdoor light", + ], + ) + + config_builder.add_column( + dd.ImageColumnConfig( + name="base_product_image", + prompt=BASE_PRODUCT_IMAGE_PROMPT, + model_alias=model_alias, + ) + ) + + config_builder.add_column( + dd.ImageColumnConfig( + name="product_variant_image", + prompt=PRODUCT_VARIATION_PROMPT, + model_alias=model_alias, + multi_modal_context=[dd.ImageContext(column_name="base_product_image")], + ) + ) + + return config_builder + + +def create_dataset( + config_builder: dd.DataDesignerConfigBuilder, + *, + num_records: int, + dataset_name: str, + artifact_path: Path | str | None = None, +) -> DatasetCreationResults: + data_designer = DataDesigner(artifact_path=artifact_path) + data_designer.validate(config_builder) + return data_designer.create(config_builder, num_records=num_records, dataset_name=dataset_name) + + +BASE_PRODUCT_IMAGE_PROMPT = """\ +Create a synthetic apparel catalog reference photo of a person wearing a {{ base_colorway }} {{ apparel_item }}. + +Image requirements: +- Visual variation ID, for internal diversity only: {{ visual_variation_id }} +- View: {{ base_view }} +- Base model profile: {{ base_model_profile }} +- Background: clean neutral studio background +- Lighting: soft catalog lighting +- Use exactly one synthetic adult model in neutral catalog styling. +- Output must use vertical portrait framing, with a 3:4 portrait composition that is taller than it is wide. +- The garment should be centered, worn naturally, fully visible, and isolated enough to edit later. +- The frame must be a full-body image: show the model from head to toe with feet visible and comfortable margins around the body. +- Show fabric texture, seams, silhouette, cuffs, closures, pockets, fit, drape, and other garment details when relevant. +- Keep the model presentation neutral, fully clothed, non-sexualized, and commercially appropriate. +- Do not include extra people, duplicate bodies, mannequins, cropped bodies, close-up crops, landscape frames, square frames, real brand logos, real trademarks, watermarks, price tags, text overlays, celebrity likenesses, or real people. +- Use a plausible invented garment design with consistent shape, color, fit, and material details. +- Generate exactly one final image for this row. Do not return alternate versions, a grid, a pair of examples, a before/after image, or multiple panels. Use the visual variation ID only as an internal diversity key; never render it as text. +""" + + +PRODUCT_VARIATION_PROMPT = """\ +Edit the provided apparel-on-person catalog image into a new inclusive commercial fashion image. + +Variation requirements: +- Visual variation ID, for internal diversity only: {{ visual_variation_id }} +- Variation goal: {{ variation_goal }} +- Required edit delta: {{ edit_scene_delta }} +- Model age group: {{ model_age_group }} +- Model ethnicity: {{ model_ethnicity }} +- Body type: {{ body_type }} +- Accessibility context: {{ accessibility_context }} +- Styling context: {{ styling_context }} +- Composition: {{ composition }} +- Lighting: {{ lighting }} + +Use the provided image as a garment reference, not as a full-shot template. +Preserve the garment's core identity from the reference image: same apparel +item, same primary colorway, same fabric cues, same silhouette, same fit +behavior, and same distinctive design details. Do not preserve the original +person, original face, original hair, original stance, original camera angle, +or original plain studio background unless that exact choice is requested by +the sampled controls. + +Create a visibly different commercial variant. The final image should make at +least three obvious changes from the reference photo: a different synthetic +adult model, a different full-body pose or body orientation, a different +setting or background, and a different lighting or campaign style. Change the +person wearing the garment to match the requested synthetic adult model +background, age group, body type, and accessibility context. Represent the +adult model respectfully and without stereotypes. Every generated person must +clearly be 18 or older. + +The edited output must show exactly one person. The final image must be a +vertical 3:4 portrait full-body catalog image that is taller than it is wide: +show the full head-to-toe body with feet visible, or the full seated body and +full mobility aid when the accessibility context calls for one. Do not crop at +the face, waist, knees, ankles, hands, or garment hem. Do not create landscape +frames, square frames, group shots, mirrored duplicates, before/after +composites, multiple models, mannequins, or background bystanders. + +Follow the required edit delta when changing the surrounding scene, styling, +pose, background, and lighting. Keep the result realistic and commercially +usable. Do not add real brand logos, real trademarks, watermarks, price tags, +text overlays, sexualized styling, swimwear, underwear, lingerie, sheer +clothing, or revealing poses. +Generate exactly one final edited image for this row. Do not return alternate +versions, a grid, a pair of examples, a before/after image, or multiple panels. +Use the visual variation ID only as an internal diversity key; never render it +as text. +""" + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description="Generate product image variations with image-to-image editing.") + parser.add_argument("--num-records", type=int, default=5, help="Number of product variation rows to generate.") + parser.add_argument("--dataset-name", default="product-image-variations", help="Output dataset name.") + parser.add_argument("--artifact-path", type=Path, default=None, help="Optional Data Designer artifact directory.") + parser.add_argument("--model-provider", default=DEFAULT_MODEL_PROVIDER, help="Image model provider name.") + parser.add_argument("--model-id", default=DEFAULT_MODEL_ID, help="Provider model ID.") + parser.add_argument("--model-alias", default=DEFAULT_MODEL_ALIAS, help="Alias used by image columns.") + parser.add_argument("--image-size", default="1024", help="Provider-specific image size value.") + parser.add_argument("--aspect-ratio", default="3:4", help="Provider-specific aspect ratio value.") + parser.add_argument("--max-parallel-requests", type=int, default=10, help="Maximum parallel image requests.") + return parser.parse_args() + + +def main() -> None: + args = parse_args() + config_builder = build_config( + model_provider=args.model_provider, + model_id=args.model_id, + model_alias=args.model_alias, + image_size=args.image_size, + aspect_ratio=args.aspect_ratio, + max_parallel_requests=args.max_parallel_requests, + ) + results = create_dataset( + config_builder, + num_records=args.num_records, + dataset_name=args.dataset_name, + artifact_path=args.artifact_path, + ) + dataset = results.load_dataset() + print(f"Generated {len(dataset)} product variation rows.") + print(f"Dataset artifacts: {results.artifact_storage.base_dataset_path}") + + +if __name__ == "__main__": + main() diff --git a/fern/assets/recipes/image_generation/rich_document_images.py b/fern/assets/recipes/image_generation/rich_document_images.py new file mode 100644 index 000000000..38f9c3f39 --- /dev/null +++ b/fern/assets/recipes/image_generation/rich_document_images.py @@ -0,0 +1,446 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# /// script +# requires-python = ">=3.10" +# dependencies = [ +# "data-designer", +# "pandas", +# "pyarrow", +# ] +# /// +"""Rich Document Image Generation Recipe + +Generate synthetic business-document page images with controlled variation. +Each generated row pairs an image with the metadata that produced it, making +the output useful as seed data for visual QA, OCR robustness, multimodal +judging, and document-understanding experiments. + +Prerequisites: + - An image-generation provider key for the selected model. The defaults use + OpenRouter, so set OPENROUTER_API_KEY before running. + +Run: + # Generate 5 rich document images with the default OpenRouter model. + uv run rich_document_images.py --num-records 5 + + # Export a VQA-ready seed parquet with base64 PNGs plus orientation fields. + uv run rich_document_images.py --num-records 25 --export-seed rich_document_seed.parquet + + # Use a different provider or image model. + uv run rich_document_images.py --model-provider openrouter --model-id google/gemini-3.1-flash-image-preview +""" + +from __future__ import annotations + +import argparse +import base64 +from collections.abc import Sequence +from pathlib import Path + +import pandas as pd + +import data_designer.config as dd +from data_designer.interface import DataDesigner, DatasetCreationResults + +DEFAULT_MODEL_PROVIDER = "openrouter" +DEFAULT_MODEL_ID = "google/gemini-3.1-flash-image-preview" +DEFAULT_MODEL_ALIAS = "document-generation-model" + +SEED_METADATA_COLUMNS = [ + "document_type", + "primary_visual", + "secondary_visual", + "layout_style", + "document_condition", +] + + +def build_model_configs( + *, + model_provider: str, + model_id: str, + model_alias: str, + image_size: str, + aspect_ratio: str, + max_parallel_requests: int, +) -> list[dd.ModelConfig]: + """Build a provider-agnostic image-generation model config.""" + return [ + dd.ModelConfig( + alias=model_alias, + model=model_id, + provider=model_provider, + inference_parameters=dd.ImageInferenceParams( + extra_body={ + "n": 1, + "generationConfig": { + "imageConfig": { + "aspectRatio": aspect_ratio, + "imageSize": image_size, + } + }, + }, + max_parallel_requests=max_parallel_requests, + ), + skip_health_check=True, + ) + ] + + +def add_category( + config_builder: dd.DataDesignerConfigBuilder, + name: str, + values: list[str], + weights: list[float] | None = None, +) -> None: + """Add a categorical sampler column.""" + config_builder.add_column( + dd.SamplerColumnConfig( + name=name, + sampler_type=dd.SamplerType.CATEGORY, + params=dd.CategorySamplerParams(values=values, weights=weights), + ) + ) + + +def add_visual_variation_id(config_builder: dd.DataDesignerConfigBuilder) -> None: + """Add a unique row-level key that discourages duplicate image generations.""" + config_builder.add_column( + dd.SamplerColumnConfig( + name="visual_variation_id", + sampler_type=dd.SamplerType.UUID, + params=dd.UUIDSamplerParams(prefix="doc-", short_form=True), + ) + ) + + +def build_config( + *, + model_provider: str = DEFAULT_MODEL_PROVIDER, + model_id: str = DEFAULT_MODEL_ID, + model_alias: str = DEFAULT_MODEL_ALIAS, + image_size: str = "1024", + aspect_ratio: str = "2:3", + max_parallel_requests: int = 10, +) -> dd.DataDesignerConfigBuilder: + """Build a rich document image-generation pipeline.""" + model_configs = build_model_configs( + model_provider=model_provider, + model_id=model_id, + model_alias=model_alias, + image_size=image_size, + aspect_ratio=aspect_ratio, + max_parallel_requests=max_parallel_requests, + ) + config_builder = dd.DataDesignerConfigBuilder(model_configs=model_configs) + add_visual_variation_id(config_builder) + + add_category( + config_builder, + "document_type", + [ + "quarterly business review", + "market research brief", + "operations dashboard export", + "clinical trial status report", + "sustainability impact report", + "financial variance memo", + "customer support incident review", + "supply chain risk assessment", + "product launch readiness plan", + "employee engagement summary", + ], + weights=[0.12, 0.10, 0.14, 0.08, 0.08, 0.12, 0.12, 0.10, 0.12, 0.12], + ) + + add_category( + config_builder, + "organization_name", + [ + "Aster Analytics", + "Blue Ridge Health", + "CedarWorks Manufacturing", + "DeltaGrid Energy", + "Evergreen Mobility", + "Harborlight Retail", + "Northstar Robotics", + "Redwood BioSystems", + "Summit Cloud Services", + "Valley Forge Logistics", + ], + ) + + add_category( + config_builder, + "document_owner", + [ + "Maya Chen", + "Jonas Patel", + "Elena Garcia", + "Noah Williams", + "Amara Okafor", + "Theo Martin", + "Priya Raman", + "Sofia Rossi", + "Lena Fischer", + "Caleb Brooks", + ], + ) + + add_category( + config_builder, + "owner_role", + [ + "VP Operations", + "Finance Director", + "Clinical Program Manager", + "Customer Success Lead", + "Risk Officer", + "Product Launch Owner", + "People Analytics Partner", + ], + ) + + add_category( + config_builder, + "audience", + [ + "executive leadership", + "finance review committee", + "field operations managers", + "clinical program leads", + "board audit committee", + "customer success directors", + ], + ) + + add_category( + config_builder, + "content_theme", + [ + "quarterly revenue performance and forecast variance", + "regional customer adoption and churn risk", + "service-level agreement compliance and incident aging", + "inventory throughput, backorders, and supplier delays", + "trial enrollment, site activation, and adverse event counts", + "energy consumption, emissions, and sustainability targets", + "hiring funnel conversion, offer acceptance, and attrition", + "product launch milestones, owners, and readiness status", + ], + ) + + add_category( + config_builder, + "primary_visual", + [ + "clustered bar chart comparing three regions across four quarters", + "line chart with two series, annotated inflection points, and a target band", + "stacked area chart showing category mix over six months", + "waterfall chart showing contributors to budget variance", + "scatter plot with labeled outliers and a trend line", + "Gantt-style timeline with milestones and owner initials", + "heatmap matrix with risk severity by team and region", + "donut chart with callout labels and percentages", + ], + ) + + add_category( + config_builder, + "secondary_visual", + [ + "dense financial table with subtotals and variance arrows", + "KPI card row with current value, target, delta, and traffic-light status", + "two-column risk register with owner, due date, and mitigation note", + "small process diagram with arrows between four labeled stages", + "ranked list table with sparklines in the final column", + "compact map inset with region labels and numeric badges", + "executive callout box with three bullet conclusions", + "signature block plus approval checklist", + ], + ) + + add_category( + config_builder, + "layout_style", + [ + "clean consulting report page with narrow margins and section dividers", + "dashboard export with a top filter bar and grid of panels", + "formal memo with letterhead, dense paragraphs, and one embedded chart", + "board-pack page with title ribbon, footnotes, and small-print source notes", + "compliance form with checkboxes, tables, and stamped approval", + "research brief with abstract, sidebar definitions, and figure captions", + "operations one-pager with color-coded status chips and action table", + ], + ) + + add_category( + config_builder, + "document_condition", + [ + "pristine exported PDF screenshot", + "high-resolution office scanner output", + "faded photocopy with mild paper texture", + "creased printout with a clipped corner", + "low-contrast scan with light shadow near the binding edge", + ], + ) + + add_category( + config_builder, + "annotation_layer", + [ + "no manual annotations", + "yellow highlights over two key numbers", + "red pen circle around one chart outlier", + "blue sticky note partially covering the lower right table", + "handwritten margin note asking for follow-up", + "rubber stamp reading DRAFT across the header", + ], + ) + + add_category( + config_builder, + "numeric_context", + [ + "include values in thousands with one decimal place", + "include percentages, basis-point deltas, and small footnotes", + "include dates across the next six months", + "include currency values, totals, and year-over-year deltas", + "include counts by region plus a total row", + ], + ) + + config_builder.add_column( + dd.ImageColumnConfig( + name="document_image", + prompt=RICH_DOCUMENT_IMAGE_PROMPT, + model_alias=model_alias, + ) + ) + + return config_builder + + +def create_dataset( + config_builder: dd.DataDesignerConfigBuilder, + *, + num_records: int, + dataset_name: str, + artifact_path: Path | str | None = None, +) -> DatasetCreationResults: + data_designer = DataDesigner(artifact_path=artifact_path) + data_designer.validate(config_builder) + return data_designer.create(config_builder, num_records=num_records, dataset_name=dataset_name) + + +def export_seed_parquet(results: DatasetCreationResults, output_path: Path) -> None: + """Export generated images as base64 PNG seed rows for VLM pipelines.""" + dataset = results.load_dataset() + base_path = results.artifact_storage.base_dataset_path + rows: list[dict[str, str]] = [] + + for _, row in dataset.iterrows(): + image_ref = _first_image_ref(row["document_image"]) + image_path = base_path / image_ref + output_row = { + "png_base64": base64.b64encode(image_path.read_bytes()).decode("utf-8"), + } + output_row.update({column: row[column] for column in SEED_METADATA_COLUMNS}) + rows.append(output_row) + + output_path.parent.mkdir(parents=True, exist_ok=True) + pd.DataFrame(rows).to_parquet(output_path, index=False) + + +def _first_image_ref(value: object) -> str: + if isinstance(value, str): + return value + if isinstance(value, Sequence) and value: + first = value[0] + if isinstance(first, str): + return first + raise ValueError(f"Expected document_image to be a string path or non-empty sequence, got {type(value)!r}") + + +RICH_DOCUMENT_IMAGE_PROMPT = """\ +Create a realistic single-page business document image with rich visual information. + +Document requirements: +- Visual variation ID, for internal diversity only: {{ visual_variation_id }} +- Document type: {{ document_type }} +- Organization: {{ organization_name }} +- Document owner: {{ document_owner }}, {{ owner_role }} +- Intended audience: {{ audience }} +- Theme: {{ content_theme }} +- Layout style: {{ layout_style }} +- Physical/rendering condition: {{ document_condition }} +- Annotation layer: {{ annotation_layer }} +- Numeric style: {{ numeric_context }} + +Required visual content: +- Primary visual: {{ primary_visual }} +- Secondary visual: {{ secondary_visual }} +- At least one readable table with row and column labels +- At least one chart, timeline, heatmap, diagram, or KPI-card cluster +- A clear title, date, organization name, document owner, section headings, and small source note +- Enough readable text to ask visual QA questions about exact values, trends, labels, owners, dates, and relationships + +Make the page visually dense but professionally designed. Use realistic fonts, +alignment, legends, axis labels, table borders, captions, and spacing. The text +and numbers should be legible. Avoid blank areas, generic placeholder blocks, +or lorem ipsum. Generate exactly one final document page for this row. Do not +return alternate versions, a grid, a pair of examples, before/after panels, or +multiple pages. Use the visual variation ID only as an internal diversity key; +never render it as text. Do not include real company logos or real personal +data. +""" + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description="Generate rich synthetic business-document images.") + parser.add_argument("--num-records", type=int, default=5, help="Number of document images to generate.") + parser.add_argument("--dataset-name", default="rich-document-images", help="Output dataset name.") + parser.add_argument("--artifact-path", type=Path, default=None, help="Optional Data Designer artifact directory.") + parser.add_argument("--model-provider", default=DEFAULT_MODEL_PROVIDER, help="Image model provider name.") + parser.add_argument("--model-id", default=DEFAULT_MODEL_ID, help="Provider model ID.") + parser.add_argument("--model-alias", default=DEFAULT_MODEL_ALIAS, help="Alias used by image columns.") + parser.add_argument("--image-size", default="1024", help="Provider-specific image size value.") + parser.add_argument("--aspect-ratio", default="2:3", help="Provider-specific aspect ratio value.") + parser.add_argument("--max-parallel-requests", type=int, default=10, help="Maximum parallel image requests.") + parser.add_argument( + "--export-seed", + type=Path, + default=None, + help="Optional parquet path for a VQA-ready seed with base64 PNGs and orientation fields.", + ) + return parser.parse_args() + + +def main() -> None: + args = parse_args() + config_builder = build_config( + model_provider=args.model_provider, + model_id=args.model_id, + model_alias=args.model_alias, + image_size=args.image_size, + aspect_ratio=args.aspect_ratio, + max_parallel_requests=args.max_parallel_requests, + ) + results = create_dataset( + config_builder, + num_records=args.num_records, + dataset_name=args.dataset_name, + artifact_path=args.artifact_path, + ) + + dataset = results.load_dataset() + print(f"Generated {len(dataset)} rich document image rows.") + print(f"Dataset artifacts: {results.artifact_storage.base_dataset_path}") + + if args.export_seed is not None: + export_seed_parquet(results, args.export_seed) + print(f"Exported VQA seed parquet: {args.export_seed}") + + +if __name__ == "__main__": + main() diff --git a/fern/assets/recipes/image_generation/traffic_scenarios.py b/fern/assets/recipes/image_generation/traffic_scenarios.py new file mode 100644 index 000000000..046bb1b73 --- /dev/null +++ b/fern/assets/recipes/image_generation/traffic_scenarios.py @@ -0,0 +1,433 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# /// script +# requires-python = ">=3.10" +# dependencies = [ +# "data-designer", +# ] +# /// +"""Autonomous Vehicle Traffic Scenario Image Generation Recipe + +Generate synthetic autonomous-vehicle ego-camera images with controlled +variation over region, road type, weather, time of day, traffic density, +surface condition, traffic controls, and long-tail scenario elements. Use the +generated images for perception review sets, visual QA, or +simulator-validation prompts. + +Synthetic images are not a replacement for real sensor logs, simulator runs, or +safety validation. They are useful for rapidly creating controlled visual +examples around rare or hazardous conditions. + +Prerequisites: + - An image-generation provider key for the selected model. The defaults use + OpenRouter, so set OPENROUTER_API_KEY before running. + +Run: + uv run traffic_scenarios.py --num-records 10 +""" + +from __future__ import annotations + +import argparse +from pathlib import Path + +import data_designer.config as dd +from data_designer.interface import DataDesigner, DatasetCreationResults + +DEFAULT_MODEL_PROVIDER = "openrouter" +DEFAULT_MODEL_ID = "google/gemini-3.1-flash-image-preview" +DEFAULT_MODEL_ALIAS = "traffic-scene-model" + + +def build_model_configs( + *, + model_provider: str, + model_id: str, + model_alias: str, + image_size: str, + aspect_ratio: str, + max_parallel_requests: int, +) -> list[dd.ModelConfig]: + """Build a provider-agnostic image-generation model config.""" + return [ + dd.ModelConfig( + alias=model_alias, + model=model_id, + provider=model_provider, + inference_parameters=dd.ImageInferenceParams( + extra_body={ + "n": 1, + "generationConfig": { + "imageConfig": { + "aspectRatio": aspect_ratio, + "imageSize": image_size, + } + }, + }, + max_parallel_requests=max_parallel_requests, + ), + skip_health_check=True, + ) + ] + + +def add_category(config_builder: dd.DataDesignerConfigBuilder, name: str, values: list[str]) -> None: + """Add a categorical sampler column.""" + config_builder.add_column( + dd.SamplerColumnConfig( + name=name, + sampler_type=dd.SamplerType.CATEGORY, + params=dd.CategorySamplerParams(values=values), + ) + ) + + +def build_config( + *, + model_provider: str = DEFAULT_MODEL_PROVIDER, + model_id: str = DEFAULT_MODEL_ID, + model_alias: str = DEFAULT_MODEL_ALIAS, + image_size: str = "1024", + aspect_ratio: str = "16:9", + max_parallel_requests: int = 10, +) -> dd.DataDesignerConfigBuilder: + """Build an autonomous-vehicle ego-camera image-generation pipeline.""" + model_configs = build_model_configs( + model_provider=model_provider, + model_id=model_id, + model_alias=model_alias, + image_size=image_size, + aspect_ratio=aspect_ratio, + max_parallel_requests=max_parallel_requests, + ) + config_builder = dd.DataDesignerConfigBuilder(model_configs=model_configs) + + add_category( + config_builder, + "geographic_region", + [ + "US - dense urban (NYC-style)", + "US - sprawling suburban (Los Angeles-style)", + "US - rural Midwest", + "Europe - narrow streets (Italian/French town)", + "Europe - orderly infrastructure (German autobahn)", + "Asia - mixed traffic (India/Thailand)", + "Asia - modern cityscape (Singapore/Tokyo)", + ], + ) + + add_category( + config_builder, + "road_type", + [ + "urban city street with tall buildings", + "urban street with mixed retail/residential", + "suburban residential street with trees", + "suburban commercial strip with parking lots", + "highway - 3 lanes each direction", + "highway - 5 lanes each direction with HOV", + "rural two-lane country road", + "rural highway with sparse markings", + "mountain road with curves and guardrails", + "coastal road with scenic views", + "bridge - suspension or arch style", + "bridge - concrete overpass", + "tunnel - well-lit with ceiling lights", + "tunnel - dim lighting", + "parking lot - shopping center", + "parking garage - multi-level", + "intersection - 4-way with traffic lights", + "intersection - complex 6-way", + "roundabout - single lane", + "roundabout - multi-lane", + "construction zone with detour", + "school zone with crossing", + ], + ) + + add_category( + config_builder, + "weather", + [ + "clear sunny day", + "partly cloudy", + "overcast with gray skies", + "light rain - misty windshield", + "moderate rain - active wipers", + "heavy rain - reduced visibility under 100ft", + "light fog - moderate visibility", + "dense fog - visibility under 50ft", + "light snow - flurries", + "moderate snow - accumulating on road", + "heavy snow - whiteout conditions", + "sleet/freezing rain", + "dust storm - desert conditions", + "high winds - debris visible", + ], + ) + + add_category( + config_builder, + "time_of_day", + [ + "dawn - pre-sunrise twilight", + "early morning - golden hour light", + "mid-morning - bright sun, long shadows", + "midday - overhead sun, minimal shadows", + "afternoon - sun starting to lower", + "late afternoon - golden hour", + "dusk - post-sunset twilight", + "night - well-lit with streetlights", + "night - moderately lit urban", + "night - poorly lit rural", + "night - headlights only, no street lighting", + ], + ) + + add_category( + config_builder, + "traffic_density", + [ + "empty - no other vehicles visible", + "sparse - 1-2 vehicles in distance", + "light - 3-5 vehicles visible", + "moderate - steady flow of traffic", + "heavy - congested, slow-moving", + "stop-and-go - bumper-to-bumper", + ], + ) + + add_category( + config_builder, + "vehicle_mix", + [ + "sedans and compact cars", + "mix of cars and SUVs", + "includes large trucks/semi-trailers", + "includes buses", + "includes motorcycles and scooters", + "includes bicycles and e-bikes", + "includes delivery vans/box trucks", + "mixed vehicle types - diverse traffic", + ], + ) + + add_category( + config_builder, + "scenario_element", + [ + "pedestrian crossing at marked crosswalk", + "pedestrian jaywalking mid-block", + "pedestrian with stroller/wheelchair", + "group of pedestrians crossing", + "child chasing ball toward street", + "jogger/runner on shoulder", + "pedestrian wearing dark clothing at night", + "pedestrian with umbrella obscuring face in rain", + "elderly pedestrian crossing slowly with walker", + "pedestrian distracted by phone while crossing", + "pedestrians exiting parked bus on roadside", + "crowd spilling onto road from sidewalk event", + "cyclist in dedicated bike lane", + "cyclist merging into traffic lane", + "cyclist making left turn", + "cyclist riding against traffic on wrong side", + "e-scooter rider weaving between cars", + "e-scooter rider on sidewalk entering crosswalk", + "group of cyclists in paceline on shoulder", + "cyclist with cargo trailer taking full lane", + "motorcycle lane splitting", + "motorcycle filtering through stopped traffic", + "motorcycle approaching from blind spot", + "school bus with stop sign extended and flashing", + "ambulance approaching with lights and sirens", + "police vehicle with lights activated", + "fire truck in oncoming lane", + "emergency vehicle approaching from behind in mirror", + "tow truck loading vehicle on roadside", + "construction zone - workers present with cones", + "construction zone - lane closure with signs", + "road crew filling potholes with equipment in lane", + "utility workers with cherry picker blocking lane", + "temporary steel plates covering road excavation", + "stopped vehicle - hazard lights on shoulder", + "vehicle broken down in lane", + "disabled vehicle in lane with warning triangle", + "vehicle stalled in intersection", + "vehicle with open hood - person inspecting engine", + "vehicle suddenly braking ahead", + "vehicle making unexpected lane change without signal", + "vehicle backing out of parking spot", + "vehicle running red light from cross street", + "vehicle driving wrong way on one-way street", + "vehicle making illegal U-turn", + "vehicle swerving to avoid pothole", + "vehicle drifting out of lane (distracted driver)", + "vehicle cutting in from merging lane aggressively", + "slow-moving vehicle (farm equipment/golf cart) on road", + "delivery truck double-parked with flashers", + "garbage truck with crew working", + "parked car door opening into traffic", + "semi-truck jackknifed across lanes", + "wide-load vehicle with escort car", + "ice cream truck stopped with children nearby", + "ride-share vehicle stopped abruptly for pickup", + "moving truck partially blocking lane while loading", + "food truck parked on street with customer queue", + "animal (deer) on roadside", + "animal (dog) loose on road", + "flock of birds on road surface", + "animal (coyote/fox) darting across road", + "fallen tree branch partially blocking lane", + "debris/cargo on road surface", + "large pothole in driving lane", + "standing water/flooded section of road", + "oil spill or fluid on road surface", + "tire tread/retread debris on highway", + "mattress or furniture fallen from truck on road", + "manhole cover missing or displaced", + "traffic cone or barrel knocked into lane", + "sun glare directly ahead through windshield", + "headlight glare from oncoming vehicle at night", + "spray/mist from vehicle ahead on wet road", + "shadow from overpass creating sudden darkness", + "smoke from nearby fire drifting across road", + "reflection of wet road creating mirror effect", + "traffic light malfunctioning - flashing red", + "obscured traffic sign by overgrown vegetation", + "contradictory road signs at intersection", + "pedestrian signal countdown with people still crossing", + "railroad crossing with gates descending and lights flashing", + "toll booth approach with lanes merging", + ], + ) + + add_category( + config_builder, + "road_surface", + [ + "dry asphalt - good condition", + "dry asphalt - faded lane markings", + "wet reflective surface", + "wet with puddles", + "icy patches visible", + "black ice conditions", + "snow-covered - lane markings obscured", + "gravel surface", + "unpaved dirt road", + "potholes and road damage visible", + "recent patching - uneven surface", + "construction - temporary markings", + ], + ) + + add_category( + config_builder, + "traffic_control", + [ + "traffic light - green", + "traffic light - yellow/amber", + "traffic light - red", + "stop sign clearly visible", + "yield sign", + "speed limit sign - 25 mph", + "speed limit sign - 55 mph", + "no traffic control - uncontrolled intersection", + "construction signage and cones", + "temporary traffic lights", + ], + ) + + config_builder.add_column( + dd.ImageColumnConfig( + name="traffic_scene", + prompt=TRAFFIC_SCENE_PROMPT, + model_alias=model_alias, + ) + ) + + return config_builder + + +def create_dataset( + config_builder: dd.DataDesignerConfigBuilder, + *, + num_records: int, + dataset_name: str, + artifact_path: Path | str | None = None, +) -> DatasetCreationResults: + data_designer = DataDesigner(artifact_path=artifact_path) + data_designer.validate(config_builder) + return data_designer.create(config_builder, num_records=num_records, dataset_name=dataset_name) + + +TRAFFIC_SCENE_PROMPT = """\ +Create a photorealistic autonomous-vehicle ego-camera perception scene. + +The image must look like it was captured by a camera mounted on the self-driving +ego vehicle, not by a roadside camera, drone, or cinematic photographer. Keep +the viewpoint physically plausible for an AV sensor. When appropriate, show a +subtle hood edge, windshield edge, or bumper edge, but do not show a full car +interior or dashboard UI. + +Scene requirements: +- Geographic region: {{ geographic_region }} +- Road type: {{ road_type }} +- Weather: {{ weather }} +- Time of day: {{ time_of_day }} +- Traffic density: {{ traffic_density }} +- Vehicle mix: {{ vehicle_mix }} +- Key scenario element: {{ scenario_element }} +- Road surface: {{ road_surface }} +- Traffic control: {{ traffic_control }} + +The scene should clearly show road geometry, lane markings, traffic signs, +traffic control devices, surrounding vehicles, vulnerable road users when +requested, and the key scenario element from the ego vehicle's camera. Preserve +regional driving characteristics such as road width, side of road, sign style, +and lane markings. Use realistic lighting, lens geometry, motion perspective, +weather effects, and visibility. Generate exactly one final ego-camera image +for this row. Do not return alternate versions, a grid, a pair of examples, +before/after panels, or multiple camera frames. Do not include text overlays, +labels, watermarks, dashcam timestamps, bounding boxes, sensor UI, or navigation +UI. +""" + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description="Generate synthetic autonomous-vehicle traffic scenes.") + parser.add_argument("--num-records", type=int, default=10, help="Number of traffic scenes to generate.") + parser.add_argument("--dataset-name", default="synthetic-traffic-scenarios", help="Output dataset name.") + parser.add_argument("--artifact-path", type=Path, default=None, help="Optional Data Designer artifact directory.") + parser.add_argument("--model-provider", default=DEFAULT_MODEL_PROVIDER, help="Image model provider name.") + parser.add_argument("--model-id", default=DEFAULT_MODEL_ID, help="Provider model ID.") + parser.add_argument("--model-alias", default=DEFAULT_MODEL_ALIAS, help="Alias used by image columns.") + parser.add_argument("--image-size", default="1024", help="Provider-specific image size value.") + parser.add_argument("--aspect-ratio", default="16:9", help="Provider-specific aspect ratio value.") + parser.add_argument("--max-parallel-requests", type=int, default=10, help="Maximum parallel image requests.") + return parser.parse_args() + + +def main() -> None: + args = parse_args() + config_builder = build_config( + model_provider=args.model_provider, + model_id=args.model_id, + model_alias=args.model_alias, + image_size=args.image_size, + aspect_ratio=args.aspect_ratio, + max_parallel_requests=args.max_parallel_requests, + ) + results = create_dataset( + config_builder, + num_records=args.num_records, + dataset_name=args.dataset_name, + artifact_path=args.artifact_path, + ) + dataset = results.load_dataset() + print(f"Generated {len(dataset)} synthetic traffic-scene rows.") + print(f"Dataset artifacts: {results.artifact_storage.base_dataset_path}") + + +if __name__ == "__main__": + main() diff --git a/fern/components/ImageExample.tsx b/fern/components/ImageExample.tsx new file mode 100644 index 000000000..3f6209405 --- /dev/null +++ b/fern/components/ImageExample.tsx @@ -0,0 +1,316 @@ +/** + * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ + +/** + * ImageExample - Image-first figure for generated image examples. + * + * Designed for dev notes where an example image should stay visually dominant + * while sampler controls remain scannable beside or below it. + * + * Usage in MDX: + * import { ImageExample } from "@/components/ImageExample"; + * + * + */ + +const IMAGE_EXAMPLE_CSS = ` +.image-example { + margin: 1.5rem 0 2.25rem; + padding: 0.75rem; + border: 1px solid var(--grayscale-a5, rgba(128, 128, 128, 0.18)); + border-radius: 8px; + background: var(--grayscale-1, rgba(128, 128, 128, 0.025)); +} +.image-example__figure { + margin: 0 !important; + padding: 0; +} +.image-example__image-link { + display: block; + margin: 0 auto !important; + padding: 0; + border-radius: 8px; + outline-offset: 3px; + cursor: zoom-in; +} +.image-example__image-link:hover .image-example__image { + border-color: var(--accent, #76b900); + box-shadow: 0 0 0 2px rgba(118, 185, 0, 0.24); +} +.image-example__image { + display: block; + width: 100%; + height: auto; + margin: 0 !important; + border-radius: 8px; + border: 1px solid var(--grayscale-a5, rgba(128, 128, 128, 0.18)); + background: var(--grayscale-2, rgba(128, 128, 128, 0.04)); +} +.image-example__caption { + margin-top: 0.55rem; + padding-bottom: 0.6rem; + border-bottom: 1px solid var(--grayscale-a4, rgba(128, 128, 128, 0.14)); + font-size: 0.92rem; + line-height: 1.35; + font-weight: 650; +} +.image-example__control-groups { + display: grid; + gap: 0.55rem; + margin-top: 0.6rem; +} +.image-example__control-groups--split { + grid-template-columns: repeat(auto-fit, minmax(260px, 1fr)); +} +.image-example__group { + border-left: 3px solid var(--accent, #76b900); + padding-left: 0.65rem; +} +.image-example__group-label { + display: block; + margin: 0 0 0.3rem; + color: var(--accent, #76b900); + font-size: 0.7rem; + font-weight: 700; + letter-spacing: 0.06em; + line-height: 1.2; + text-transform: uppercase; +} +.image-example__chips { + column-count: 2; + column-gap: 0.36rem; + margin: 0; + padding: 0; + list-style: none; +} +.image-example__control-groups--split .image-example__chips { + column-count: 2; +} +.image-example__chip { + display: inline-block; + width: 100%; + min-width: 0; + margin: 0 0 0.2rem !important; + padding: 0.34rem 0.45rem; + border: 1px solid var(--grayscale-a5, rgba(128, 128, 128, 0.18)); + border-radius: 8px; + background: var(--grayscale-2, rgba(128, 128, 128, 0.04)); + color: inherit; + line-height: 1.22; + break-inside: avoid; +} +.image-example__key { + display: block; + margin: 0 0 0.12rem; + color: var(--accent, #76b900); + font-size: 0.61rem; + font-weight: 700; + letter-spacing: 0.04em; + line-height: 1.2; + text-transform: uppercase; +} +.image-example__value { + display: block; + font-size: 0.76rem; + overflow-wrap: anywhere; +} +.image-example__lightbox { + display: none; + position: fixed; + inset: 0; + z-index: 9999; + align-items: center; + justify-content: center; + padding: 2rem; +} +.image-example__lightbox:target { + display: flex; +} +.image-example__lightbox-backdrop { + position: absolute; + inset: 0; + background: rgba(0, 0, 0, 0.82); +} +.image-example__lightbox-panel { + position: relative; + z-index: 1; + max-width: min(94vw, 1200px); + max-height: 90vh; +} +.image-example__lightbox-image { + display: block; + max-width: 100%; + max-height: 90vh; + width: auto; + height: auto; + border-radius: 8px; + background: #fff; + box-shadow: 0 20px 60px rgba(0, 0, 0, 0.4); +} +.image-example__lightbox-close { + position: absolute; + top: -0.85rem; + right: -0.85rem; + display: flex; + width: 2rem; + height: 2rem; + align-items: center; + justify-content: center; + border-radius: 999px; + background: #fff; + color: #111; + font-size: 1.25rem; + line-height: 1; + text-decoration: none !important; + box-shadow: 0 4px 20px rgba(0, 0, 0, 0.32); +} +@media (max-width: 720px) { + .image-example { + padding: 0.55rem; + } + .image-example__chips, + .image-example__control-groups--split .image-example__chips { + column-count: 1; + } + .image-example__image-link { + max-width: 100% !important; + } + .image-example__lightbox { + padding: 1rem; + } +} +`; + +export interface ImageExampleControlGroup { + label: string; + controls: [string, string][]; +} + +export interface ImageExampleProps { + title: string; + src: string; + alt: string; + imageWidth?: string; + controls?: [string, string][]; + controlGroups?: ImageExampleControlGroup[]; +} + +const BASEPATH = "/nemo/datadesigner"; + +function withBasepath(path: string): string { + if (!path.startsWith("/") || path.startsWith("//")) return path; + if (path === BASEPATH || path.startsWith(`${BASEPATH}/`)) return path; + return `${BASEPATH}${path}`; +} + +function handleImageError(event: React.SyntheticEvent) { + const image = event.currentTarget; + const fallbackSrc = image.dataset.fallbackSrc; + + if (!fallbackSrc) return; + + delete image.dataset.fallbackSrc; + image.src = fallbackSrc; +} + +function lightboxIdFor(src: string): string { + const slug = src.replace(/[^a-zA-Z0-9_-]+/g, "-").replace(/^-+|-+$/g, ""); + return `image-example-${slug.slice(-96)}`; +} + +function renderGroup(group: ImageExampleControlGroup, groupIndex: number) { + return ( +
+ {group.label} +
    + {group.controls.map(([key, value], controlIndex) => ( +
  • + {key} + {value} +
  • + ))} +
+
+ ); +} + +export const ImageExample = ({ + title, + src, + alt, + imageWidth, + controls = [], + controlGroups, +}: ImageExampleProps) => { + const groups = + controlGroups && controlGroups.length > 0 + ? controlGroups + : [{ label: "Sampler controls", controls }]; + const fallbackSrc = withBasepath(src); + const hasFallback = fallbackSrc !== src; + const lightboxId = lightboxIdFor(src); + const imageFallbackProps = hasFallback + ? { "data-fallback-src": fallbackSrc, onError: handleImageError } + : {}; + + return ( +
+ {/* static CSS string literal (no user input) — safe to inject as raw HTML */} +