diff --git a/.env.example b/.env.example
index 1e10ade..87c261b 100644
--- a/.env.example
+++ b/.env.example
@@ -21,23 +21,37 @@ BASE_URL=http://localhost:8000
 ALLOW_HTTP_SESSIONS=true
 
 # Slack — one pair per agent (Bot User OAuth Token + App-Level Token)
+# Add as many agents as needed using this pattern; no code changes required.
+#   SLACK_BOT_TOKEN_<AGENT_ID>=xoxb-...   (required)
+#   SLACK_APP_TOKEN_<AGENT_ID>=xapp-...   (optional)
 SLACK_BOT_TOKEN_SU=xoxb-placeholder
-SLACK_APP_TOKEN_SU=xapp-placeholder
 SLACK_BOT_TOKEN_WISEMAN=xoxb-placeholder
-SLACK_APP_TOKEN_WISEMAN=xapp-placeholder
-SLACK_BOT_TOKEN_LOTZ=xoxb-placeholder
-SLACK_APP_TOKEN_LOTZ=xapp-placeholder
-SLACK_BOT_TOKEN_CRAVATT=xoxb-placeholder
-SLACK_APP_TOKEN_CRAVATT=xapp-placeholder
-SLACK_BOT_TOKEN_GROTJAHN=xoxb-placeholder
-SLACK_APP_TOKEN_GROTJAHN=xapp-placeholder
-SLACK_BOT_TOKEN_PETRASCHECK=xoxb-placeholder
-SLACK_APP_TOKEN_PETRASCHECK=xapp-placeholder
-SLACK_BOT_TOKEN_KEN=xoxb-placeholder
-SLACK_APP_TOKEN_KEN=xapp-placeholder
-SLACK_BOT_TOKEN_RACKI=xoxb-placeholder
-SLACK_APP_TOKEN_RACKI=xapp-placeholder
-SLACK_BOT_TOKEN_SAEZ=xoxb-placeholder
-SLACK_APP_TOKEN_SAEZ=xapp-placeholder
-SLACK_BOT_TOKEN_WU=xoxb-placeholder
-SLACK_APP_TOKEN_WU=xapp-placeholder
+SLACK_BOT_TOKEN_GRANTBOT=xoxb-placeholder
+
+# Podcast TTS backend: "mistral" (default), "openai", or "local" (vLLM-Omni server)
+PODCAST_TTS_BACKEND="mistral"
+
+# Mistral AI TTS (used when PODCAST_TTS_BACKEND=mistral)
+MISTRAL_API_KEY=your-mistral-api-key
+MISTRAL_TTS_MODEL=voxtral-mini-tts-latest
+MISTRAL_TTS_DEFAULT_VOICE=your-voice-uuid
+
+# OpenAI TTS (used when PODCAST_TTS_BACKEND=openai)
+# Voices: alloy echo fable onyx nova shimmer
+# Models: tts-1  tts-1-hd  gpt-4o-mini-tts
+OPENAI_API_KEY=your-openai-api-key
+OPENAI_TTS_MODEL=tts-1
+OPENAI_TTS_DEFAULT_VOICE=alloy
+
+# Local vLLM-Omni TTS server (used when PODCAST_TTS_BACKEND=local)
+# Start with: vllm serve <model> --port 8010
+LOCAL_TTS_HOST=127.0.0.1
+LOCAL_TTS_PORT=8008
+LOCAL_TTS_MODEL=mistralai/Voxtral-4B-TTS-2603
+LOCAL_TTS_VOICE=default
+
+# Podcast
+PODCAST_BASE_URL=http://localhost:8001
+PODCAST_SEARCH_WINDOW_DAYS=14
+PODCAST_MAX_CANDIDATES=50
+# PODCAST_NORMALIZE_AUDIO=true  # uncomment to enable ffmpeg loudnorm post-processing (EBU R128, -16 LUFS)
diff --git a/.gitignore b/.gitignore
index aad82ec..342842f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -53,3 +53,9 @@ certbot/
 .pytest_cache/
 .coverage
 htmlcov/
+
+# Runtime data (state files, generated audio — ephemeral)
+data/
+
+# Test output artifacts
+.labbot-tests/
diff --git a/AGENT.md b/AGENT.md
index a94b338..39628fc 100644
--- a/AGENT.md
+++ b/AGENT.md
@@ -32,6 +32,7 @@ All specs are in `/specs/`:
 - `profile-ingestion.md` — 9-step pipeline, ORCID → PubMed → PMC → LLM
 - `admin-dashboard.md` — read-only, server-rendered, impersonation
 - `agent-system.md` — Slack Bolt, Socket Mode, two-phase LLM calls, simulation engine
+- `labbot-podcast.md` — daily personalized research briefing: PubMed search, LLM selection/summarization, Local or API TTS, Slack DM delivery, per-PI RSS podcast feed
 
 ## Tech Stack
 
diff --git a/CLAUDE.md b/CLAUDE.md
index 66a844b..4c8db5a 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -1,5 +1,29 @@
 # CLAUDE.md
 
+## Project Overview
+
+**coPI** is an AI-powered research collaboration discovery platform for academic PIs. It combines:
+
+- **Web app** (`src/routers/`, `templates/`) — FastAPI + Jinja2, ORCID OAuth login, profile editing, admin dashboard
+- **Profile pipeline** (`src/services/`) — Ingests ORCID/PubMed data; Claude Opus synthesizes a public + private profile per researcher
+- **Agent simulation** (`src/agent/`) — 12 AI Slack bots (one per pilot lab) that converse, identify synergies, and generate collaboration proposals in a turn-based 5-phase loop
+- **Podcast pipeline** (`src/podcast/`) — Daily personalized research briefings via Slack DM + RSS feed with TTS audio
+- **GrantBot** (`src/agent/grantbot.py`) — Fetches NIH/NSF FOAs, posts relevant ones to Slack channels
+- **Background worker** (`src/worker/`) — PostgreSQL-backed job queue for profile generation and monthly refreshes
+
+**Stack:** Python/FastAPI, PostgreSQL + SQLAlchemy async, Anthropic Claude (Opus for profiles, Sonnet for agents), Slack Web API, Docker Compose, AWS (S3/SES).
+
+**Key patterns:**
+- Public profiles exported to `profiles/public/` (disk markdown, agent-readable)
+- Private profiles in `profiles/private/` (PI behavioral instructions, editable via web/DM)
+- Agent working memory in `profiles/memory/` (updated post-simulation)
+- All LLM calls logged to `LlmCallLog` table (model, tokens, latency, cost)
+- Agent messages append-only in `MessageLog`; outcomes in `ThreadDecision`; PI ratings in `ProposalReview`
+- Prompts are standalone files in `prompts/` — editable without code changes
+- Specs for all subsystems in `specs/`
+
+**Pilot agents:** SuBot, WisemanBot, LotzBot, CravattBot, GrotjahnBot, PetrascheckBot, KenBot, RackiBot, SaezBot, WuBot, WardBot, BrineyBot
+
 ## Testing
 
 Run `python -m pytest tests/ -v` before committing. All tests must pass.
@@ -42,3 +66,50 @@ docker compose --profile agent run -d --name agent-run agent python -m src.agent
 ```
 
 **Note:** The agent-run container uses mounted source code but the Python process only loads modules at startup. Code changes require a container restart to take effect. **After any code change that affects the running agent process, flag this to the user so they can decide whether to restart.**
+
+## Podcast Pipeline
+
+The LabBot Podcast pipeline (specs/labbot-podcast.md) runs daily at 9am UTC for each active agent:
+
+1. Build PubMed queries from lab's public profile
+2. Fetch candidates from PubMed + bioRxiv + medRxiv + arXiv (last 14 days, up to 50+10 candidates)
+3. Claude Sonnet selects most relevant paper (applying PI's podcast preferences from their private ProfileRevision)
+4. Claude Opus writes a ~250-word structured brief
+5. TTS audio generated (Mistral or local vLLM-Omni); ffmpeg loudnorm applied if PODCAST_NORMALIZE_AUDIO=true
+6. Slack DM sent to PI with text summary + RSS link
+7. RSS feed available at `/podcast/{agent_id}/feed.xml`
+8. Audio served at `/podcast/{agent_id}/audio/{date}.mp3`
+
+Preprint IDs use prefixed format: `biorxiv:...`, `medrxiv:...`, `arxiv:...`. The `paper_url` in summaries links to the correct server (not always PubMed).
+
+```bash
+# Run podcast pipeline once for all active agents
+docker compose --profile podcast run --rm podcast python -m src.podcast.main
+
+# Test pipeline for 'su' agent only
+docker compose exec app python scripts/test_podcast_su.py
+```
+
+## Database Migration Caveat
+
+If the DB was initialized from the `main` branch schema and then this branch is checked out, `alembic upgrade head` will stamp the version without re-running migrations that share a revision ID with ones already applied on `main`. Any columns added by branch-specific migrations may be silently missing.
+
+**Symptom:** `UndefinedColumnError` at runtime despite `alembic current` showing `head`.
+
+**Fix:** Check for missing columns and apply them manually:
+```bash
+docker compose exec app python -c "
+import asyncio
+from src.database import get_engine
+from sqlalchemy import text
+
+async def check():
+    eng = get_engine()
+    async with eng.connect() as conn:
+        result = await conn.execute(text(\"SELECT column_name FROM information_schema.columns WHERE table_name='researcher_profiles' ORDER BY ordinal_position\"))
+        print([r[0] for r in result])
+
+asyncio.run(check())
+"
+```
+Then add any missing columns with `ALTER TABLE ... ADD COLUMN IF NOT EXISTS ...`.
diff --git a/Dockerfile b/Dockerfile
index c032e95..63a7b94 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -6,6 +6,7 @@ WORKDIR /app
 RUN apt-get update && apt-get install -y --no-install-recommends \
     gcc \
     libpq-dev \
+    ffmpeg \
     && rm -rf /var/lib/apt/lists/*
 
 # Install Python dependencies
diff --git a/alembic/versions/0010_access_gate_and_waitlist.py b/alembic/versions/0010_access_gate_and_waitlist.py
index 36c0ec6..79cb165 100644
--- a/alembic/versions/0010_access_gate_and_waitlist.py
+++ b/alembic/versions/0010_access_gate_and_waitlist.py
@@ -1,7 +1,7 @@
 """Access gate + waitlist
 
-Revision ID: 0010
-Revises: 0009
+Revision ID: 0010a
+Revises: 0010
 Create Date: 2026-04-15 00:00:00.000000
 
 """
@@ -13,8 +13,8 @@
 
 from alembic import op
 
-revision: str = "0010"
-down_revision: Union[str, None] = "0009"
+revision: str = "0010a"
+down_revision: Union[str, None] = "0010"
 branch_labels: Union[str, Sequence[str], None] = None
 depends_on: Union[str, Sequence[str], None] = None
 
diff --git a/alembic/versions/0010_add_podcast_episodes.py b/alembic/versions/0010_add_podcast_episodes.py
new file mode 100644
index 0000000..adad7d2
--- /dev/null
+++ b/alembic/versions/0010_add_podcast_episodes.py
@@ -0,0 +1,56 @@
+"""Add podcast_episodes table
+
+Revision ID: 0010
+Revises: 0009
+Create Date: 2026-04-09 00:00:00.000000
+
+"""
+
+from typing import Sequence, Union
+
+import sqlalchemy as sa
+from sqlalchemy.dialects import postgresql
+
+from alembic import op
+
+revision: str = "0010"
+down_revision: Union[str, None] = "0009"
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def upgrade() -> None:
+    op.create_table(
+        "podcast_episodes",
+        sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
+        sa.Column("agent_id", sa.String(50), nullable=False),
+        sa.Column("episode_date", sa.Date, nullable=False),
+        sa.Column("pmid", sa.String(100), nullable=False),
+        sa.Column("paper_title", sa.String(500), nullable=False),
+        sa.Column("paper_authors", sa.String(500), nullable=False),
+        sa.Column("paper_journal", sa.String(255), nullable=False),
+        sa.Column("paper_year", sa.Integer, nullable=False),
+        sa.Column("text_summary", sa.Text, nullable=False),
+        sa.Column("audio_file_path", sa.String(500), nullable=True),
+        sa.Column("audio_duration_seconds", sa.Integer, nullable=True),
+        sa.Column("slack_delivered", sa.Boolean, nullable=False, server_default="false"),
+        sa.Column("selection_justification", sa.Text, nullable=False),
+        sa.Column(
+            "created_at",
+            sa.DateTime(timezone=True),
+            server_default=sa.func.now(),
+            nullable=False,
+        ),
+    )
+    op.create_index("ix_podcast_episodes_agent_id", "podcast_episodes", ["agent_id"])
+    op.create_index("ix_podcast_episodes_episode_date", "podcast_episodes", ["episode_date"])
+    op.create_unique_constraint(
+        "uq_podcast_agent_date", "podcast_episodes", ["agent_id", "episode_date"]
+    )
+
+
+def downgrade() -> None:
+    op.drop_constraint("uq_podcast_agent_date", "podcast_episodes")
+    op.drop_index("ix_podcast_episodes_episode_date")
+    op.drop_index("ix_podcast_episodes_agent_id")
+    op.drop_table("podcast_episodes")
diff --git a/alembic/versions/0011_add_podcast_paper_url.py b/alembic/versions/0011_add_podcast_paper_url.py
new file mode 100644
index 0000000..5b2aa8f
--- /dev/null
+++ b/alembic/versions/0011_add_podcast_paper_url.py
@@ -0,0 +1,29 @@
+"""Add paper_url column to podcast_episodes
+
+Revision ID: 0011
+Revises: 0010
+Create Date: 2026-04-10 00:00:00.000000
+
+"""
+
+from typing import Sequence, Union
+
+import sqlalchemy as sa
+
+from alembic import op
+
+revision: str = "0011"
+down_revision: Union[str, None] = "0010a"
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def upgrade() -> None:
+    op.add_column(
+        "podcast_episodes",
+        sa.Column("paper_url", sa.String(1000), nullable=True),
+    )
+
+
+def downgrade() -> None:
+    op.drop_column("podcast_episodes", "paper_url")
diff --git a/alembic/versions/0012_add_podcast_preferences.py b/alembic/versions/0012_add_podcast_preferences.py
new file mode 100644
index 0000000..bba69c7
--- /dev/null
+++ b/alembic/versions/0012_add_podcast_preferences.py
@@ -0,0 +1,64 @@
+"""Add podcast_preferences table
+
+Revision ID: 0012
+Revises: 0011
+Create Date: 2026-04-14 00:00:00.000000
+
+"""
+
+from typing import Sequence, Union
+
+import sqlalchemy as sa
+from sqlalchemy.dialects import postgresql
+from sqlalchemy.dialects.postgresql import ARRAY
+
+from alembic import op
+
+revision: str = "0012"
+down_revision: Union[str, None] = "0011"
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def upgrade() -> None:
+    op.create_table(
+        "podcast_preferences",
+        sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
+        sa.Column("agent_id", sa.String(50), nullable=False),
+        sa.Column("voice_id", sa.String(100), nullable=True),
+        sa.Column(
+            "extra_keywords",
+            ARRAY(sa.String),
+            nullable=False,
+            server_default="{}",
+        ),
+        sa.Column(
+            "preferred_journals",
+            ARRAY(sa.String),
+            nullable=False,
+            server_default="{}",
+        ),
+        sa.Column(
+            "deprioritized_journals",
+            ARRAY(sa.String),
+            nullable=False,
+            server_default="{}",
+        ),
+        sa.Column(
+            "updated_at",
+            sa.DateTime(timezone=True),
+            nullable=False,
+            server_default=sa.func.now(),
+        ),
+    )
+    op.create_index(
+        "ix_podcast_preferences_agent_id",
+        "podcast_preferences",
+        ["agent_id"],
+        unique=True,
+    )
+
+
+def downgrade() -> None:
+    op.drop_index("ix_podcast_preferences_agent_id", table_name="podcast_preferences")
+    op.drop_table("podcast_preferences")
diff --git a/alembic/versions/0013_podcast_user_support.py b/alembic/versions/0013_podcast_user_support.py
new file mode 100644
index 0000000..89d77cd
--- /dev/null
+++ b/alembic/versions/0013_podcast_user_support.py
@@ -0,0 +1,83 @@
+"""Extend podcast tables to support plain ORCID users (no agent required)
+
+Adds nullable user_id FK to podcast_preferences and podcast_episodes so that
+any user who has completed onboarding can receive daily research briefings
+without needing an approved AgentRegistry entry.
+
+Changes:
+  - podcast_preferences.agent_id: NOT NULL → nullable
+  - podcast_preferences.user_id:  new nullable FK → users.id, unique index
+  - podcast_episodes.agent_id:    NOT NULL → nullable
+  - podcast_episodes.user_id:     new nullable FK → users.id
+  - podcast_episodes: partial unique index on (user_id, episode_date) WHERE user_id IS NOT NULL
+
+Revision ID: 0013
+Revises: 0012
+Create Date: 2026-04-14 00:00:00.000000
+"""
+
+from typing import Sequence, Union
+
+import sqlalchemy as sa
+from sqlalchemy.dialects.postgresql import UUID
+
+from alembic import op
+
+revision: str = "0013"
+down_revision: Union[str, None] = "0012"
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def upgrade() -> None:
+    # --- podcast_preferences ---
+    # Make agent_id nullable (existing agent rows keep their values)
+    op.alter_column("podcast_preferences", "agent_id", nullable=True)
+
+    # Add user_id FK column
+    op.add_column(
+        "podcast_preferences",
+        sa.Column(
+            "user_id",
+            UUID(as_uuid=True),
+            sa.ForeignKey("users.id", ondelete="CASCADE"),
+            nullable=True,
+        ),
+    )
+    op.create_index(
+        "ix_podcast_preferences_user_id",
+        "podcast_preferences",
+        ["user_id"],
+        unique=True,
+    )
+
+    # --- podcast_episodes ---
+    # Make agent_id nullable (existing agent rows keep their values)
+    op.alter_column("podcast_episodes", "agent_id", nullable=True)
+
+    # Add user_id FK column
+    op.add_column(
+        "podcast_episodes",
+        sa.Column(
+            "user_id",
+            UUID(as_uuid=True),
+            sa.ForeignKey("users.id", ondelete="CASCADE"),
+            nullable=True,
+        ),
+    )
+    # Partial unique index: one episode per user per day (only when user_id is set)
+    op.execute(
+        "CREATE UNIQUE INDEX ix_podcast_episodes_user_date "
+        "ON podcast_episodes (user_id, episode_date) "
+        "WHERE user_id IS NOT NULL"
+    )
+
+
+def downgrade() -> None:
+    op.execute("DROP INDEX IF EXISTS ix_podcast_episodes_user_date")
+    op.drop_column("podcast_episodes", "user_id")
+    op.alter_column("podcast_episodes", "agent_id", nullable=False)
+
+    op.drop_index("ix_podcast_preferences_user_id", table_name="podcast_preferences")
+    op.drop_column("podcast_preferences", "user_id")
+    op.alter_column("podcast_preferences", "agent_id", nullable=False)
diff --git a/alembic/versions/0014_add_matchmaker_proposals.py b/alembic/versions/0014_add_matchmaker_proposals.py
new file mode 100644
index 0000000..ea31eb4
--- /dev/null
+++ b/alembic/versions/0014_add_matchmaker_proposals.py
@@ -0,0 +1,57 @@
+"""Add matchmaker_proposals table
+
+Revision ID: 0014
+Revises: 0013
+Create Date: 2026-04-21 00:00:00.000000
+"""
+
+from typing import Sequence, Union
+
+import sqlalchemy as sa
+from sqlalchemy.dialects.postgresql import UUID
+
+from alembic import op
+
+revision: str = "0014"
+down_revision: Union[str, None] = "0013"
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def upgrade() -> None:
+    op.create_table(
+        "matchmaker_proposals",
+        sa.Column("id", UUID(as_uuid=True), primary_key=True),
+        sa.Column(
+            "pi_a_id",
+            UUID(as_uuid=True),
+            sa.ForeignKey("users.id", ondelete="CASCADE"),
+            nullable=False,
+        ),
+        sa.Column(
+            "pi_b_id",
+            UUID(as_uuid=True),
+            sa.ForeignKey("users.id", ondelete="CASCADE"),
+            nullable=False,
+        ),
+        sa.Column("proposal_md", sa.Text, nullable=False),
+        sa.Column("title", sa.String(500), nullable=False),
+        sa.Column("confidence", sa.String(20), nullable=False),
+        sa.Column("llm_model", sa.String(100), nullable=False),
+        sa.Column("input_tokens", sa.Integer, nullable=True),
+        sa.Column("output_tokens", sa.Integer, nullable=True),
+        sa.Column(
+            "generated_at",
+            sa.DateTime(timezone=True),
+            server_default=sa.func.now(),
+            nullable=False,
+        ),
+    )
+    op.create_index("ix_matchmaker_proposals_pi_a_id", "matchmaker_proposals", ["pi_a_id"])
+    op.create_index("ix_matchmaker_proposals_pi_b_id", "matchmaker_proposals", ["pi_b_id"])
+
+
+def downgrade() -> None:
+    op.drop_index("ix_matchmaker_proposals_pi_b_id", table_name="matchmaker_proposals")
+    op.drop_index("ix_matchmaker_proposals_pi_a_id", table_name="matchmaker_proposals")
+    op.drop_table("matchmaker_proposals")
diff --git a/alembic/versions/0015_matchmaker_nullable_ids_and_names.py b/alembic/versions/0015_matchmaker_nullable_ids_and_names.py
new file mode 100644
index 0000000..cf7b95f
--- /dev/null
+++ b/alembic/versions/0015_matchmaker_nullable_ids_and_names.py
@@ -0,0 +1,30 @@
+"""Make matchmaker PI FKs nullable; add pi_a_name / pi_b_name for CLI path
+
+Revision ID: 0015
+Revises: 0014
+Create Date: 2026-04-22 00:00:00.000000
+"""
+
+from typing import Sequence, Union
+
+import sqlalchemy as sa
+from alembic import op
+
+revision: str = "0015"
+down_revision: Union[str, None] = "0014"
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def upgrade() -> None:
+    op.alter_column("matchmaker_proposals", "pi_a_id", nullable=True)
+    op.alter_column("matchmaker_proposals", "pi_b_id", nullable=True)
+    op.add_column("matchmaker_proposals", sa.Column("pi_a_name", sa.String(255), nullable=True))
+    op.add_column("matchmaker_proposals", sa.Column("pi_b_name", sa.String(255), nullable=True))
+
+
+def downgrade() -> None:
+    op.drop_column("matchmaker_proposals", "pi_b_name")
+    op.drop_column("matchmaker_proposals", "pi_a_name")
+    op.alter_column("matchmaker_proposals", "pi_b_id", nullable=False)
+    op.alter_column("matchmaker_proposals", "pi_a_id", nullable=False)
diff --git a/code_review.md b/code_review.md
new file mode 100644
index 0000000..fbf0a1d
--- /dev/null
+++ b/code_review.md
@@ -0,0 +1,290 @@
+# Code Review: Top 5 Priority Issues
+
+Reviewed: 2026-04-14  
+Branch: `coPI-podcast`
+
+---
+
+## Issue 1 — CSRF Bypass on Expired OAuth Session
+
+**File:** `src/routers/auth.py:76-79`  
+**Severity:** High (security)
+
+### Current Code
+
+```python
+stored_state = request.session.pop("oauth_state", None)
+if stored_state and state != stored_state:
+    logger.warning("OAuth state mismatch")
+    return RedirectResponse(url="/login?error=state_mismatch", status_code=302)
+```
+
+### Problem
+
+The guard condition is `if stored_state and ...`, meaning it only enforces the check when `stored_state` is truthy. If the user's session has expired (or was never set), `stored_state` is `None` and the entire check is skipped — any `state` value (including `None`) passes through. A CSRF attacker can initiate an OAuth flow, let the victim's session expire, then replay the callback with an arbitrary code.
+
+### Best Practice
+
+Per [RFC 6749 §10.12](https://datatracker.ietf.org/doc/html/rfc6749#section-10.12) and OWASP OAuth guidelines, the `state` parameter must be treated as a **required, non-nullable nonce**. The correct pattern is to reject the callback if `stored_state` is missing (session expired), not to treat it as a pass condition.
+
+### How to Fix
+
+Change the condition from a two-branch `if stored_state and ...` guard to an explicit three-case rejection:
+
+```python
+stored_state = request.session.pop("oauth_state", None)
+
+if stored_state is None:
+    # Session expired before the callback arrived — cannot verify CSRF nonce
+    logger.warning("OAuth callback with no stored state (session expired or missing)")
+    return RedirectResponse(url="/login?error=session_expired", status_code=302)
+
+if state != stored_state:
+    logger.warning("OAuth state mismatch — possible CSRF attempt")
+    return RedirectResponse(url="/login?error=state_mismatch", status_code=302)
+```
+
+Also ensure the state nonce is generated with sufficient entropy. In `src/routers/auth.py` (in the `/login` route that initiates the flow), use `secrets.token_urlsafe(32)` rather than any shorter or predictable token, and store it in the session immediately before the redirect.
+
+---
+
+## Issue 2 — Budget Enforcement Exits the Entire Simulation Loop
+
+**File:** `src/agent/simulation.py:218-222`  
+**Severity:** Medium (reliability / correctness)
+
+### Current Code
+
+```python
+agent = self._select_agent()
+if not agent or not self._agent_within_budget(agent):
+    # All agents over budget
+    logger.info("All agents over budget or no agent selected. Stopping.")
+    break
+```
+
+### Problem
+
+`_select_agent()` returns whichever agent is next in the rotation. If that specific agent is over budget, the entire simulation `break`s — even if every other agent still has budget remaining. The log comment says "All agents over budget" but that is only true in the case where `_select_agent` returns `None`; when it returns an agent that is individually over budget, the others are never checked.
+
+### Best Practice
+
+Budget exhaustion for a single agent should be a **skip**, not a **halt**. The loop should continue cycling through agents until every agent is either over budget or no agent can be selected at all. A common pattern is to track how many consecutive agents have been skipped and stop only when the skip count equals the total number of agents.
+
+### How to Fix
+
+Separate the two exit conditions and convert the over-budget case from `break` to `continue`. Count consecutive over-budget skips and only exit the loop when all agents have been skipped in a single pass:
+
+```python
+over_budget_streak = 0
+total_agents = len(self._agents)
+
+while True:
+    agent = self._select_agent()
+    if not agent:
+        logger.info("No agent selected — simulation complete.")
+        break
+
+    if not self._agent_within_budget(agent):
+        over_budget_streak += 1
+        agent.state.last_selected = time.time()
+        if over_budget_streak >= total_agents:
+            logger.info("All agents over budget. Stopping.")
+            break
+        logger.debug("[%s] Over budget, skipping.", agent.agent_id)
+        continue
+
+    over_budget_streak = 0  # reset when a valid agent is found
+    # ... rest of the turn logic
+```
+
+This requires that `_select_agent` rotates through agents based on `last_selected` time (which it already does), so agents that have been skipped will be picked up again on the next cycle.
+
+---
+
+## Issue 3 — RSS Feed Served with Missing Audio File
+
+**File:** `src/podcast/main.py:89-103`, `src/podcast/pipeline.py`  
+**Severity:** Medium (reliability)
+
+### Current Code
+
+```python
+try:
+    ok = await run_pipeline_for_agent(
+        agent_id=agent_id,
+        ...
+    )
+    if ok:
+        produced.append(agent_id)
+except Exception as exc:
+    logger.error(
+        "Pipeline failed for agent %s: %s", agent_id, exc, exc_info=True
+    )
+```
+
+### Problem
+
+`run_pipeline_for_agent` returns a boolean `ok`, but within the pipeline itself the episode DB record and RSS entry can be written before the TTS step completes. If TTS fails, the audio file does not exist, but the feed already contains an `<enclosure>` pointing to a non-existent MP3. Any podcast client that subscribed to the feed will attempt a GET on a 404 URL and may display a broken episode permanently.
+
+### Best Practice
+
+The pipeline should follow a **commit-last** pattern: write the episode record and RSS enclosure only after all assets are confirmed present on disk. This is the same pattern used in video/audio platforms (e.g., YouTube's upload pipeline) — metadata is published only after the binary asset is available.
+
+### How to Fix
+
+Inside `src/podcast/pipeline.py`, restructure the steps in this order:
+
+1. Fetch and select the paper (read-only, safe to do first).
+2. Generate the text brief (Claude Opus call).
+3. Call TTS and write the audio file to disk. **Capture the returned path.**
+4. Verify the audio file exists and has a non-zero size (`path.stat().st_size > 0`) before proceeding.
+5. Only if step 4 passes: write the `PodcastEpisode` DB row and call `db_session.flush()`.
+6. Only after the DB row is committed: build and write the RSS `<item>`.
+
+If TTS fails at step 3, log the error and return `ok=False` without writing anything to the DB or RSS. The caller in `main.py` already handles `ok=False` correctly; the gap is in the pipeline not propagating TTS failures as `False`.
+
+As a secondary safeguard, the RSS endpoint (`/podcast/{agent_id}/feed.xml`) should check whether `data/podcast_audio/{agent_id}/{date}.mp3` exists before including the `<enclosure>` element in its output. This prevents any historical DB rows with missing audio from appearing in the feed.
+
+---
+
+## Issue 4 — Non-Atomic File Writes for Profile and Podcast State
+
+**Files:** `src/agent/agent.py:423-444`, `src/podcast/state.py:22-24`  
+**Severity:** Medium (data integrity)
+
+### Current Code
+
+```python
+# agent.py
+memory_path.write_text(new_memory + "\n", encoding="utf-8")
+
+# state.py
+def _save(data: dict) -> None:
+    STATE_FILE.parent.mkdir(parents=True, exist_ok=True)
+    STATE_FILE.write_text(json.dumps(data, indent=2), encoding="utf-8")
+```
+
+### Problem
+
+`Path.write_text` is not atomic — it opens the file for truncation and writes in multiple OS-level operations. If the process crashes, is killed, or two coroutines call the write concurrently, the file can be left in a partially written state (empty, or with truncated JSON). For `podcast_state.json`, this means the `delivered_pmids` list can be lost, causing duplicate Slack DMs. For working memory files, a partial write silently discards the agent's accumulated context.
+
+There is also a logical race: `_save` in `state.py` does a read-modify-write cycle (`_load()` → modify → `_save()`). Two concurrent podcast pipeline runs (possible if the scheduler is invoked twice) will both read the same initial state, both modify it independently, and whichever writes last will silently overwrite the other's changes.
+
+### Best Practice
+
+The standard pattern for atomic file writes on POSIX systems is **write to a temp file, then `os.rename`**. Because `rename` is guaranteed atomic by the POSIX spec (it is a single syscall), a reader will always see either the old complete file or the new complete file — never a partial write. Python's `tempfile.NamedTemporaryFile` with `delete=False` in the same directory is the standard way to achieve this.
+
+For the read-modify-write race in `state.py`, use a `threading.Lock` (or `asyncio.Lock` if the callers are async) as a process-level mutex around all load/save operations.
+
+### How to Fix
+
+**Atomic write helper** (can live in `src/utils.py` or inline in each module):
+
+```python
+import os
+import tempfile
+from pathlib import Path
+
+def atomic_write_text(path: Path, content: str, encoding: str = "utf-8") -> None:
+    """Write `content` to `path` atomically using a temp-file + rename."""
+    path.parent.mkdir(parents=True, exist_ok=True)
+    fd, tmp = tempfile.mkstemp(dir=path.parent, suffix=".tmp")
+    try:
+        with os.fdopen(fd, "w", encoding=encoding) as f:
+            f.write(content)
+        os.replace(tmp, path)   # atomic on POSIX; overwrites destination
+    except Exception:
+        os.unlink(tmp)          # clean up temp file on any error
+        raise
+```
+
+Replace all four `path.write_text(...)` calls in `agent.py` (lines 428 and 441) and `state.py` (line 24) with `atomic_write_text(path, content)`.
+
+**Lock for state.py read-modify-write:**
+
+```python
+import threading
+_STATE_LOCK = threading.Lock()
+
+def record_delivery(agent_id: str, pmid: str) -> None:
+    with _STATE_LOCK:
+        data = _load()
+        # ... modify ...
+        _save(data)          # now uses atomic_write_text internally
+
+def mark_run_complete() -> None:
+    with _STATE_LOCK:
+        data = _load()
+        data["last_run_date"] = ...
+        _save(data)
+```
+
+**Note:** if these functions are ever called from async context across multiple event-loop threads (e.g., concurrent `run_pipeline_for_agent` calls), a `threading.Lock` is sufficient because `asyncio.run` uses a single thread per call. If concurrency is ever introduced via `asyncio.gather`, switch to `asyncio.Lock`.
+
+---
+
+## Issue 5 — Per-Task Failures Silently Discarded in `asyncio.gather`
+
+**File:** `src/agent/simulation.py:632-637`  
+**Severity:** Low-Medium (observability / silent failure)
+
+### Current Code
+
+```python
+tasks = [
+    self._reply_to_thread(agent, thread)
+    for thread in threads_to_reply
+]
+await asyncio.gather(*tasks, return_exceptions=True)
+```
+
+### Problem
+
+`return_exceptions=True` causes `asyncio.gather` to return exceptions as result values instead of re-raising them. The return value here is discarded entirely, so any exceptions from individual `_reply_to_thread` calls are silently swallowed. If a Slack API error, DB write failure, or Claude API timeout occurs in any thread reply, it is invisible in logs and metrics. Operators have no signal that Phase 4 is partially or fully failing.
+
+### Best Practice
+
+When using `return_exceptions=True` the caller **must** inspect the results. The canonical pattern is to iterate the results list and log (or re-raise) any values that are `isinstance(r, BaseException)`. This is preferable to removing `return_exceptions=True` (which would cancel all remaining tasks on the first failure) because Phase 4 replies are independent — a failure on one thread should not prevent replies to others.
+
+### How to Fix
+
+Capture the return value of `asyncio.gather` and inspect each result:
+
+```python
+results = await asyncio.gather(*tasks, return_exceptions=True)
+
+for thread, result in zip(threads_to_reply, results):
+    if isinstance(result, BaseException):
+        logger.error(
+            "[%s] Phase 4: Failed to reply to thread %s: %s",
+            agent.agent_id,
+            thread.thread_id,
+            result,
+            exc_info=result,   # includes traceback in log record
+        )
+```
+
+This pattern is appropriate anywhere `asyncio.gather(..., return_exceptions=True)` is used without inspecting results. There is a similar call site in `src/agent/simulation.py` for channel scanning — apply the same pattern there. Consider extracting a small helper:
+
+```python
+async def gather_logged(tasks: list, label: str) -> list:
+    """gather with return_exceptions=True, logging each failure."""
+    results = await asyncio.gather(*tasks, return_exceptions=True)
+    for i, r in enumerate(results):
+        if isinstance(r, BaseException):
+            logger.error("%s task[%d] failed: %s", label, i, r, exc_info=r)
+    return results
+```
+
+---
+
+## Summary Table
+
+| # | File | Line(s) | Severity | Category |
+|---|------|---------|----------|----------|
+| 1 | `src/routers/auth.py` | 76-79 | High | Security — CSRF bypass |
+| 2 | `src/agent/simulation.py` | 218-222 | Medium | Correctness — premature loop exit |
+| 3 | `src/podcast/pipeline.py` + `main.py` | pipeline write order | Medium | Reliability — broken RSS enclosure |
+| 4 | `src/agent/agent.py` + `src/podcast/state.py` | 428, 441, 22-24 | Medium | Data integrity — non-atomic writes |
+| 5 | `src/agent/simulation.py` | 637 | Low-Medium | Observability — silent task failures |
diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml
index 44dc726..3c0c371 100644
--- a/docker-compose.prod.yml
+++ b/docker-compose.prod.yml
@@ -35,6 +35,7 @@ services:
     volumes:
       - ./profiles:/app/profiles
       - ./prompts:/app/prompts
+      - podcast_data:/app/data
     depends_on:
       postgres:
         condition: service_healthy
@@ -83,7 +84,6 @@ services:
     volumes:
       - ./profiles:/app/profiles
       - ./prompts:/app/prompts
-      - ./data:/app/data
     depends_on:
       postgres:
         condition: service_healthy
@@ -108,7 +108,7 @@ services:
     volumes:
       - ./profiles:/app/profiles
       - ./prompts:/app/prompts
-      - ./data:/app/data
+      - grantbot_data:/app/data
     depends_on:
       postgres:
         condition: service_healthy
@@ -120,6 +120,29 @@ services:
         awslogs-create-group: "true"
         awslogs-region: ${AWS_REGION:-us-east-2}
 
+  podcast:
+    build:
+      context: .
+    restart: unless-stopped
+    command: ["python", "-m", "src.podcast.main", "scheduler", "--run-hour", "9"]
+    env_file: .env
+    environment:
+      DATABASE_URL: postgresql+asyncpg://${POSTGRES_USER:-copi}:${POSTGRES_PASSWORD}@postgres:5432/${POSTGRES_DB:-copi}
+    volumes:
+      - ./profiles:/app/profiles
+      - ./prompts:/app/prompts
+      - podcast_data:/app/data
+    depends_on:
+      postgres:
+        condition: service_healthy
+    logging:
+      driver: awslogs
+      options:
+        awslogs-group: /copi/podcast
+        tag: podcast
+        awslogs-create-group: "true"
+        awslogs-region: ${AWS_REGION:-us-east-2}
+
   nginx:
     image: nginx:1.27-alpine
     restart: unless-stopped
@@ -167,3 +190,5 @@ services:
 
 volumes:
   pgdata:
+  grantbot_data:
+  podcast_data:
diff --git a/docker-compose.yml b/docker-compose.yml
index d686043..71d3fd9 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -25,6 +25,7 @@ services:
       - .:/app
       - ./profiles:/app/profiles
       - ./prompts:/app/prompts
+      - ./data:/app/data
     depends_on:
       postgres:
         condition: service_healthy
@@ -69,5 +70,25 @@ services:
       postgres:
         condition: service_healthy
 
+  podcast:
+    build: .
+    command: python -m src.podcast.main scheduler --run-hour 9
+    env_file: .env
+    environment:
+      # Override LOCAL_TTS_HOST so the container can reach a vLLM-Omni server
+      # running on the host machine (127.0.0.1 does not reach the host from inside Docker).
+      LOCAL_TTS_HOST: host.docker.internal
+    extra_hosts:
+      # Ensures host.docker.internal resolves on Linux (Docker Desktop sets it automatically on Mac/Windows).
+      - "host.docker.internal:host-gateway"
+    volumes:
+      - .:/app
+      - ./profiles:/app/profiles
+      - ./prompts:/app/prompts
+      - ./data:/app/data
+    depends_on:
+      postgres:
+        condition: service_healthy
+
 volumes:
   pgdata:
diff --git a/prompts/agent-system.md b/prompts/agent-system.md
index 37f033b..1aecf97 100644
--- a/prompts/agent-system.md
+++ b/prompts/agent-system.md
@@ -1,9 +1,10 @@
 # Agent System Prompt
 
-You are an AI agent representing a research lab at Scripps Research in a Slack workspace called "labbot".
+You are an AI agent representing a research lab at Scripps Research in a Slack workspace.
 Your role is to facilitate scientific collaboration by engaging authentically with other lab agents.
 All agents represent real labs with real researchers — your goal is to identify genuinely valuable
 collaboration opportunities, not to generate noise.
+Your task is to produce a high-quality collaboration proposal that follows the Proposal Generation Rules and meets the listed quality standards by engaging in dialouge between agents. You have access to each PI's public profile associated with the user (or profiles in profiles/public), private instructions (profiles in profiles/private), and recent relevant publications. Use all of this to initiate conversations with the ultimate goal of generating a specific, grounded, and actionable proposal after sufficient discussion.
 
 ## Core Rules
 
@@ -19,84 +20,9 @@ collaboration opportunities, not to generate noise.
 4. **DM rules.** You may DM your own PI to report on discussions or ask for guidance. You cannot DM other
    labs' PIs or send agent-to-agent DMs.
 
-## Collaboration Quality Standards
+## Proposal Generation Rules
 
-These standards apply to every collaboration idea you propose or explore. Your PI's private instructions
-may adjust these defaults — always follow PI instructions when they conflict.
-
-### Core Principles
-
-1. **Specificity.** Every collaboration idea must name specific techniques, models, reagents, datasets,
-   or expertise from each lab's profile. "Lab A's expertise in X" is not enough — say what specifically
-   they would do and with what.
-
-2. **True complementarity.** Each lab must bring something the other doesn't have. If either lab's
-   contribution could be described as a generic service (e.g., "computational analysis", "structural studies",
-   "mouse behavioral testing") without reference to the specific scientific question, the idea is too generic.
-
-3. **Concrete first experiment.** Any collaboration that advances beyond initial interest must include
-   a proposed first experiment scoped to days-to-weeks of effort. The experiment must name specific assays,
-   computational methods, reagents, or datasets. "We would analyze the data" is not a first experiment.
-
-4. **Silence over noise.** If you cannot articulate what makes this collaboration better than either lab
-   hiring a postdoc to do the other's part, do not propose it.
-
-5. **Non-generic benefits.** Both labs must benefit in ways specific to the collaboration. "Access to
-   new techniques" is too vague. "Structural evidence for the mechanism of mitochondrial rescue at
-   nanometer resolution, strengthening the therapeutic narrative for HRI activators" is specific.
-
-### Confidence Labels
-
-When you propose a collaboration, label your confidence level:
-- *[High]* — Clear complementarity, specific anchoring to recent work, concrete first experiment,
-  both sides benefit non-generically
-- *[Moderate]* — Good synergy but first experiment is less defined, or one side's benefit is less clear
-- *[Speculative]* — Interesting angle but requires more development — use "This is speculative, but..."
-
-### Examples of Good Collaboration Ideas
-
-**Good: Specific question, specific contributions, concrete experiment**
-> Wiseman's HRI activators induce mitochondrial elongation in MFN2-deficient cells, but the ultrastructural
-> basis is unknown. Grotjahn's cryo-ET and Surface Morphometrics pipeline could directly visualize this
-> remodeling at nanometer resolution. First experiment: Wiseman provides treated vs untreated MFN2-deficient
-> fibroblasts, Grotjahn runs cryo-FIB-SEM and cryo-ET on both conditions, quantifying cristae morphology
-> and membrane contact site metrics.
-
-**Good: Each lab has something the other literally cannot do alone**
-> Petrascheck's atypical tetracyclines provide neuroprotection via ISR-independent ribosome targeting.
-> Wiseman's HRI activators work through ISR-dependent pathways. Neither lab can test the combination alone.
-> First experiment: mix compounds in neuronal ferroptosis assays, measure survival, calculate combination
-> indices for synergy.
-
-**Good: Computational contribution is specific, not generic**
-> Lotz's JCI paper identified cyproheptadine as an H1R inverse agonist activating FoxO in chondrocytes,
-> but the structural basis for FoxO activation vs antihistamine activity is unknown. Su's BioThings
-> knowledge graph could identify additional H1R ligands with FoxO activity data across multiple
-> orthogonal datasets. First experiment: Lotz provides 10-15 H1R ligands with FoxO activity data,
-> Su runs BioThings traversal to identify structural and mechanistic correlates from published datasets.
-
-### Examples of Bad Collaboration Ideas (do not propose these)
-
-**Bad: Descriptive imaging without leverage**
-> "Grotjahn could use cryo-ET to visualize disc matrix degeneration in Lotz samples." — This may
-> generate interesting images, but it is mostly descriptive. It does not clearly unlock a mechanistic
-> bottleneck, therapeutic decision, or scalable downstream program.
-
-**Bad: Mechanistic depth without an intervention path**
-> "A chromatin-focused collaboration could add mechanistic depth to disc regeneration work." — This
-> sounds sophisticated, but it is not tied to a clear intervention strategy or near-term decision.
-
-**Bad: Incremental validation of an already-supported pathway**
-> "Petrascheck could test the FoxO-H1R pathway in C. elegans aging assays." — Orthogonal validation
-> alone is not enough if it only incrementally confirms a pathway that is already fairly well supported.
-
-**Bad: Generic screening in an overused model**
-> "Run a high-throughput screen for FoxO activators in a C. elegans aging model." — A screen is not
-> automatically compelling if the assay class is overused and the proposal lacks a distinctive hypothesis.
-
-**Bad: Novel but still low-leverage imaging**
-> "Use cryo-ET to compare the chondrocyte-matrix interface in OA versus control samples." — Novelty
-> and visual appeal are not sufficient without mechanistic or translational leverage.
+{{include: colab-proposal-rules.md}}
 
 ## Communication Style
 
@@ -163,13 +89,7 @@ Every thread must reach one of two outcomes:
 
 **Outcome 1: Collaboration Proposal** (rare — only the best ideas)
 
-Post a `:memo: Summary` reply containing:
-- **What each lab brings** (specific techniques, reagents, datasets — not generic capabilities)
-- **The specific scientific question** being addressed
-- **A concrete first experiment** scoped to days-to-weeks, naming specific assays/methods/reagents,
-  requiring modest effort from both sides
-- **Why this collaboration is better** than either lab doing it independently
-- **Confidence label** ([High], [Moderate], or [Speculative])
+Generate a proposal conforming to the "Proposal Generation Rules" and output format
 
 The other agent confirms agreement by replying with ✅.
 
diff --git a/prompts/colab-proposal-rules.md b/prompts/colab-proposal-rules.md
new file mode 100644
index 0000000..a158bca
--- /dev/null
+++ b/prompts/colab-proposal-rules.md
@@ -0,0 +1,125 @@
+## Collaboration Quality Standards
+
+These standards apply to every collaboration proposal. PI private instructions may adjust these
+defaults — always follow PI instructions when they conflict.
+
+### Core Principles
+
+1. **Specificity.** Every collaboration idea must name specific techniques, models, reagents, datasets,
+   or expertise from each lab's profile. "Lab A's expertise in X" is not enough — say what specifically
+   they would do and with what.
+
+2. **True complementarity.** Each lab must bring something the other doesn't have. If either lab's
+   contribution could be described as a generic service (e.g., "computational analysis", "structural
+   studies", "mouse behavioral testing") without reference to the specific scientific question, the
+   idea is too generic.
+
+3. **Concrete first experiment.** Any collaboration proposal must include a first experiment scoped
+   to days-to-weeks of effort. The experiment must name specific assays, computational methods,
+   reagents, or datasets. "We would analyze the data" is not a first experiment.
+
+4. **Silence over noise.** If you cannot articulate what makes this collaboration better than either
+   lab hiring a postdoc to do the other's part, do not propose it.
+
+5. **Non-generic benefits.** Both labs must benefit in ways specific to the collaboration. "Access to
+   new techniques" is too vague. "Structural evidence for the mechanism of mitochondrial rescue at
+   nanometer resolution, strengthening the therapeutic narrative for HRI activators" is specific.
+
+### Confidence Labels
+
+- **High** — Clear complementarity, specific anchoring to recent work, concrete first experiment,
+  both sides benefit non-generically
+- **Moderate** — Good synergy but first experiment is less defined, or one side's benefit is less clear
+- **Speculative** — Interesting angle but requires more development — label sections accordingly
+
+### Examples of Good Collaboration Ideas
+
+**Good: Specific question, specific contributions, concrete experiment**
+> Wiseman's HRI activators induce mitochondrial elongation in MFN2-deficient cells, but the ultrastructural
+> basis is unknown. Grotjahn's cryo-ET and Surface Morphometrics pipeline could directly visualize this
+> remodeling at nanometer resolution. First experiment: Wiseman provides treated vs untreated MFN2-deficient
+> fibroblasts, Grotjahn runs cryo-FIB-SEM and cryo-ET on both conditions, quantifying cristae morphology
+> and membrane contact site metrics.
+
+**Good: Each lab has something the other literally cannot do alone**
+> Petrascheck's atypical tetracyclines provide neuroprotection via ISR-independent ribosome targeting.
+> Wiseman's HRI activators work through ISR-dependent pathways. Neither lab can test the combination alone.
+> First experiment: mix compounds in neuronal ferroptosis assays, measure survival, calculate combination
+> indices for synergy.
+
+**Good: Computational contribution is specific, not generic**
+> Lotz's JCI paper identified cyproheptadine as an H1R inverse agonist activating FoxO in chondrocytes,
+> but the structural basis for FoxO activation vs antihistamine activity is unknown. Su's BioThings
+> knowledge graph could identify additional H1R ligands with FoxO activity data across multiple
+> orthogonal datasets. First experiment: Lotz provides 10–15 H1R ligands with FoxO activity data,
+> Su runs BioThings traversal to identify structural and mechanistic correlates from published datasets.
+
+### Examples of Bad Collaboration Ideas
+
+**Bad: Descriptive imaging without leverage**
+> "Grotjahn could use cryo-ET to visualize disc matrix degeneration in Lotz samples." — This may
+> generate interesting images, but it is mostly descriptive. It does not clearly unlock a mechanistic
+> bottleneck, therapeutic decision, or scalable downstream program.
+
+**Bad: Mechanistic depth without an intervention path**
+> "A chromatin-focused collaboration could add mechanistic depth to disc regeneration work." — This
+> sounds sophisticated, but it is not tied to a clear intervention strategy or near-term decision.
+
+**Bad: Incremental validation of an already-supported pathway**
+> "Petrascheck could test the FoxO-H1R pathway in C. elegans aging assays." — Orthogonal validation
+> alone is not enough if it only incrementally confirms a pathway that is already fairly well supported.
+
+**Bad: Generic screening in an overused model**
+> "Run a high-throughput screen for FoxO activators in a C. elegans aging model." — A screen is not
+> automatically compelling if the assay class is overused and the proposal lacks a distinctive hypothesis.
+
+**Bad: Novel but still low-leverage imaging**
+> "Use cryo-ET to compare the chondrocyte-matrix interface in OA versus control samples." — Novelty
+> and visual appeal are not sufficient without mechanistic or translational leverage.
+
+---
+
+## Instructions
+
+Produce ONE collaboration proposal between PI A and PI B using the output format below.
+
+- Apply the Collaboration Quality Standards strictly.
+- Ground the proposal in specific publications, techniques, and findings from each profile.
+- Respect each PI's private instructions when framing the proposal: if a PI has expressed preferences
+  for specific topics, partners, or collaboration styles, weight those angles positively.
+- Do NOT quote or reveal any private instruction text verbatim in the output.
+- If you cannot identify a High or Moderate confidence collaboration, produce the best Speculative
+  proposal you can and label it clearly.
+- Wrap your entire proposal (and only the proposal) in `<proposal>` tags. 
+- Any extra header information (e.g. Slack memos) can be place before the `<proposal>` tags but do not include any extra text or description about the generation (e.g "Let me formulate the most compelling collaboration...")
+
+## Output Format
+
+<proposal>
+# [Collaboration Title — specific, not generic]
+**[PI_A] + [PI_B]** [Timestamp]
+**Confidence:** High | Moderate | Speculative
+
+## Scientific Rationale
+[2–3 paragraphs. Why these two labs? What does each bring that the other lacks? Name specific
+techniques, datasets, reagents, or model systems from recent publications.]
+
+## True Complementarity
+- **PI A contributes:** [specific capabilities — not generic]
+- **PI B contributes:** [specific capabilities — not generic]
+- **Gap filled:** [what neither could do alone, stated precisely]
+
+## Concrete First Experiment
+[1 paragraph. Scoped to days-to-weeks. Names specific assays, methods, reagents, or datasets.
+Explains why both labs are essential to execute it.]
+
+## Benefits to Each Lab
+- **PI A benefits:** [specific, non-generic — tied to their research goals]
+- **PI B benefits:** [specific, non-generic — tied to their research goals]
+
+## Open Questions / Next Steps
+- [Bullet list of what would need to be confirmed before committing effort]
+
+## Effort / Resources
+- [Estimate low/med/high for each PI in terms of time, people, and resources required to acheive the goals of the proposal] 
+</proposal>
\ No newline at end of file
diff --git a/prompts/matchmaker.md b/prompts/matchmaker.md
new file mode 100644
index 0000000..96a1fe9
--- /dev/null
+++ b/prompts/matchmaker.md
@@ -0,0 +1,18 @@
+You are evaluating a potential research collaboration between two PIs.
+
+Your task is to produce a high-quality collaboration proposal that follows the Proposal Generation Rules and meets the listed quality standards. You have access to each PI's public profile associated with the coPI user (or profiles in profiles/public), private instructions (profiles in profiles/private), and recent relevant publications. Use all of this to generate a specific, grounded, and actionable proposal.
+
+## Proposal Generation Rules
+
+{{include: colab-proposal-rules.md}}
+
+## Tools
+
+- **`retrieve_profile(agent_id)`** — Get another agent's public profile (techniques, publications,
+  research focus). Use this early before interrogating a proposal idea to understand the other lab's capabilities.
+- **`retrieve_abstract(pmid_or_doi)`** — Fetch a paper's abstract from PubMed. Use this to check
+  specific claims or learn about cited work. No cap for your own lab's papers; up to 10 per potential collaboration idea
+  for other labs' papers.
+- **`retrieve_full_text(pmid_or_doi)`** — Fetch full text from PubMed Central. Use sparingly —
+  up to 2 per promising proposal. Only use when the abstract isn't sufficient and the paper is central to a
+  potential collaboration.
\ No newline at end of file
diff --git a/prompts/phase4-thread-reply.md b/prompts/phase4-thread-reply.md
index a1de54a..79632ea 100644
--- a/prompts/phase4-thread-reply.md
+++ b/prompts/phase4-thread-reply.md
@@ -81,6 +81,10 @@ you should already have the information you need.
 
 {instructions}
 
+## Proposal Generation Rules
+
+{{include: colab-proposal-rules.md}}
+
 ## Output
 
 Your final response MUST contain exactly one `<slack_message>` block. Everything inside
@@ -95,12 +99,7 @@ Your message here — written as it should appear in Slack.
 You may think/reason freely outside the block, but ONLY the content between
 `<slack_message>` and `</slack_message>` tags will be posted.
 
-If you are posting a :memo: Summary (collaboration proposal), format it clearly with:
-- What each lab brings
-- The specific scientific question
-- A concrete first experiment (days-to-weeks scope, specific assays/methods)
-- Why this collaboration beats either lab working alone
-- Confidence label: [High], [Moderate], or [Speculative]
+If you are posting a :memo: Summary (collaboration proposal), format it clearly by conforming to the Proposal Generation Rules
 
 If you are confirming agreement with a :memo: Summary from the other agent, start your
 reply with ✅. This means you accept the proposal **exactly as written** — do not add
diff --git a/prompts/podcast-select.md b/prompts/podcast-select.md
new file mode 100644
index 0000000..121af03
--- /dev/null
+++ b/prompts/podcast-select.md
@@ -0,0 +1,46 @@
+You are a literature triage assistant for a scientific researcher. Your job is to identify the single most relevant and impactful recent paper from a list of candidates, based on the researcher's profile.
+
+## Researcher Profile
+
+{profile}
+
+## PI Podcast Preferences
+
+{preferences}
+
+## Task
+
+Below is a numbered list of recent publications (title + abstract). Select the ONE paper whose findings or outputs could most plausibly accelerate or inform a specific aspect of this researcher's ongoing work.
+
+Return your answer as JSON:
+```json
+{"index": <number>, "justification": "<one sentence citing a specific aspect of the researcher's profile>"}
+```
+
+If no paper clears the relevance bar, return:
+```json
+{"index": null, "justification": "No paper is sufficiently relevant to this researcher's current work."}
+```
+
+## Selection Criteria
+
+**INCLUDE** a paper if:
+- Its findings or methods could directly accelerate a specific ongoing project, technique, or open question in the researcher's profile
+- It releases a new tool, dataset, method, or reagent relevant to the researcher's techniques or targets
+- It addresses a disease area, model system, or molecular target the researcher actively works on
+
+**EXCLUDE** a paper if:
+- The connection to the researcher's work is only superficial or generic
+- It is a review article, editorial, or commentary (no new primary data)
+- It is purely clinical or epidemiological with no basic science relevance
+- Recency alone makes it interesting — the connection must be specific and actionable
+
+**NOTE:** Some candidates are preprints (from bioRxiv, medRxiv, or arXiv) and are marked as such in the journal field. Preprints are valid candidates — treat them the same as peer-reviewed papers for selection purposes.
+
+**PREFER** papers that release a concrete output alongside findings (code, dataset, protocol, reagent, model). These tend to be immediately useful.
+
+**FOLLOW PI PREFERENCES:** If the PI Podcast Preferences section above contains specific instructions (e.g., topic focus, exclusions, prioritizations), apply them when selecting. PI preferences override the general criteria above.
+
+## Candidate Papers
+
+{candidates}
diff --git a/prompts/podcast-summarize.md b/prompts/podcast-summarize.md
new file mode 100644
index 0000000..1a96589
--- /dev/null
+++ b/prompts/podcast-summarize.md
@@ -0,0 +1,46 @@
+You are a science communicator writing a personalized research brief for a specific PI. Your goal is to help the PI quickly grasp whether and how a new paper is useful to their lab.
+
+## Researcher Profile
+
+{profile}
+
+## PI Podcast Preferences
+
+{preferences}
+
+## Paper
+
+{paper}
+
+## Task
+
+Write a structured research brief following the exact format below. Be specific, direct, and concise — like a knowledgeable postdoc briefing their PI. No filler phrases, no generic connections.
+
+---
+
+*Today's Research Brief — {date}*
+
+*{paper_title}*
+{authors} · {journal} · {year}
+
+*What they found:*
+[2–3 sentences on core findings. Include specific results, effect sizes, or key observations. Be concrete — name specific proteins, pathways, organisms, or quantitative outcomes where relevant.]
+
+*Key output:*
+[1–2 sentences on the tool, method, dataset, code, protocol, or reagent released with the paper. ONLY include this section if the paper releases a concrete artifact. If there is no distinct output, omit this section entirely — do not write "N/A" or a placeholder.]
+
+*Why this matters for your lab:*
+[2–3 sentences connecting the paper specifically to this PI's work. You MUST name at least one specific technique, model system, molecular target, or open question from the researcher's profile. Do not write generic connections like "this is relevant to your proteomics work" — say exactly what aspect and how.]
+
+*Link:* {paper_url}
+
+---
+
+## Rules
+
+- Total length: approximately 200–280 words
+- Tone: collegial and precise, not promotional
+- The "Why this matters" section is the most important — make it specific to this researcher, not a general statement about the field
+- If the PI Podcast Preferences section contains specific instructions on tone, focus, or framing, follow them
+- If the abstract is all you have, base the brief on the abstract. Do not speculate about full-text content you weren't given.
+- Do not add any text before or after the brief itself
diff --git a/pyproject.toml b/pyproject.toml
index d09fa83..6b780d2 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -25,6 +25,7 @@ dependencies = [
     "boto3>=1.34.0",
     "typer>=0.12.0",
     "rich>=13.7.0",
+    "mutagen>=1.47.0",
 ]
 
 [project.optional-dependencies]
diff --git a/scripts/fix_proposal_summaries.py b/scripts/fix_proposal_summaries.py
new file mode 100644
index 0000000..264a1a9
--- /dev/null
+++ b/scripts/fix_proposal_summaries.py
@@ -0,0 +1,29 @@
+# scripts/fix_proposal_summaries.py
+import asyncio, re
+from sqlalchemy import select
+from sqlalchemy.ext.asyncio import create_async_engine, async_sessionmaker, AsyncSession
+from src.config import get_settings
+from src.models import ThreadDecision
+
+async def fix():
+    settings = get_settings()
+    engine = create_async_engine(settings.database_url)
+    factory = async_sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
+
+    async with factory() as db:
+        result = await db.execute(
+            select(ThreadDecision).where(ThreadDecision.outcome == "proposal")
+        )
+        decisions = result.scalars().all()
+        fixed = 0
+        for d in decisions:
+            if not d.summary_text:
+                continue
+            match = re.search(r"<proposal>(.*?)</proposal>", d.summary_text, re.DOTALL)
+            if match:
+                d.summary_text = match.group(1).strip()
+                fixed += 1
+        await db.commit()
+        print(f"Fixed {fixed} / {len(decisions)} proposals")
+
+asyncio.run(fix())
diff --git a/scripts/matchmaker_cli.py b/scripts/matchmaker_cli.py
new file mode 100644
index 0000000..a8072ac
--- /dev/null
+++ b/scripts/matchmaker_cli.py
@@ -0,0 +1,217 @@
+#!/usr/bin/env python3
+"""Generate a matchmaker collaboration proposal from two PI profile directories.
+
+Usage (from repo root inside the app container):
+
+  Single pair (positional args):
+    python scripts/matchmaker_cli.py <pi_a_slug> <pi_b_slug> [--dry-run]
+
+  Batch from TSV file (-t flag, no positional args):
+    python scripts/matchmaker_cli.py -t pairs.tsv [--dry-run]
+
+The TSV file has two tab-separated columns (pi_a, pi_b), one pair per line.
+Lines starting with '#' and blank lines are ignored. A header row whose first
+cell is "pi_a" (case-insensitive) is also skipped automatically.
+
+Examples:
+    python scripts/matchmaker_cli.py su wiseman
+    python scripts/matchmaker_cli.py grotjahn lotz --dry-run
+    python scripts/matchmaker_cli.py -t pairs.tsv
+    python scripts/matchmaker_cli.py -t pairs.tsv --dry-run
+
+The PI slug must match a filename in profiles/public/ (without .md extension).
+Private profiles from profiles/private/{slug}.md are included if they exist.
+
+Results are written to the matchmaker_proposals DB table and are immediately
+visible in the admin Matchmaker tab at /admin/matchmaker.
+"""
+
+import argparse
+import asyncio
+import re
+import sys
+from pathlib import Path
+
+
+def load_profile_files(slug: str) -> tuple[str, str, str]:
+    """Load public + private profiles for a given slug.
+
+    Returns (pi_name, public_md, private_md).
+    pi_name is extracted from the '**PI:**' line in the public profile.
+    """
+    slug = slug.lower()
+    public_path = Path("profiles/public") / f"{slug}.md"
+    private_path = Path("profiles/private") / f"{slug}.md"
+
+    if not public_path.exists():
+        available = sorted(p.stem for p in Path("profiles/public").glob("*.md"))
+        print(f"Error: no public profile found for '{slug}'.")
+        print(f"Available slugs: {', '.join(available)}")
+        sys.exit(1)
+
+    public_md = public_path.read_text()
+
+    # Extract PI name from "**PI:** Name" line
+    pi_name = slug.capitalize()
+    match = re.search(r"\*\*PI:\*\*\s*(.+)", public_md)
+    if match:
+        pi_name = match.group(1).strip()
+
+    private_md = private_path.read_text() if private_path.exists() else ""
+
+    return pi_name, public_md, private_md
+
+
+async def run(slug_a: str, slug_b: str, dry_run: bool) -> None:
+    from src.config import get_settings
+    from src.services.llm import generate_matchmaker_proposal
+
+    name_a, public_a, private_a = load_profile_files(slug_a)
+    name_b, public_b, private_b = load_profile_files(slug_b)
+
+    settings = get_settings()
+
+    print(f"Generating proposal: {name_a}  ×  {name_b}")
+    print(f"Model: {settings.llm_agent_model_opus}")
+    print("Calling LLM… (this may take 10–20 seconds)")
+
+    result = await generate_matchmaker_proposal(
+        name_a=name_a,
+        public_profile_a=public_a,
+        private_profile_a=private_a,
+        publications_a="(see public profile above)",
+        name_b=name_b,
+        public_profile_b=public_b,
+        private_profile_b=private_b,
+        publications_b="(see public profile above)",
+        model=settings.llm_agent_model_opus,
+    )
+
+    print(f"\nConfidence : {result['confidence'].upper()}")
+    print(f"Title      : {result['title']}")
+    print(f"Tokens     : {result['input_tokens']} in / {result['output_tokens']} out")
+    print("\n" + "─" * 72)
+    print(result["proposal_md"])
+    print("─" * 72)
+
+    if dry_run:
+        print("\n[dry-run] Skipping database write.")
+        return
+
+    # Write to DB
+    import uuid
+    from datetime import datetime, timezone
+
+    from sqlalchemy import text
+
+    from src.database import get_engine, get_session_factory
+    from src.models.matchmaker import MatchmakerProposal
+
+    engine = get_engine()
+    session_factory = get_session_factory()
+
+    async with session_factory() as session:
+        proposal = MatchmakerProposal(
+            id=uuid.uuid4(),
+            pi_a_id=None,
+            pi_b_id=None,
+            pi_a_name=name_a,
+            pi_b_name=name_b,
+            proposal_md=result["proposal_md"],
+            title=result["title"],
+            confidence=result["confidence"],
+            llm_model=result["model"],
+            input_tokens=result["input_tokens"],
+            output_tokens=result["output_tokens"],
+            generated_at=datetime.now(timezone.utc),
+        )
+        session.add(proposal)
+        await session.commit()
+        print(f"\nSaved to DB: {proposal.id}")
+        print(f"View at   : /admin/matchmaker/{proposal.id}")
+
+    await engine.dispose()
+
+
+def _parse_tsv(path: str) -> list[tuple[str, str]]:
+    """Parse a two-column TSV file into a list of (pi_a, pi_b) slug pairs."""
+    pairs: list[tuple[str, str]] = []
+    with open(path) as f:
+        for lineno, line in enumerate(f, 1):
+            line = line.rstrip("\n")
+            if not line or line.startswith("#"):
+                continue
+            parts = line.split("\t")
+            if len(parts) < 2:
+                print(f"Warning: line {lineno} has fewer than 2 columns, skipping: {line!r}")
+                continue
+            a, b = parts[0].strip(), parts[1].strip()
+            if lineno == 1 and a.lower() == "pi_a":
+                continue  # skip header row
+            if not a or not b:
+                print(f"Warning: line {lineno} has empty slug, skipping.")
+                continue
+            pairs.append((a, b))
+    return pairs
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(
+        description="Generate matchmaker proposals from PI profile slugs.",
+        epilog=(
+            "Single pair:  matchmaker_cli.py su wiseman\n"
+            "Batch TSV:    matchmaker_cli.py -t pairs.tsv"
+        ),
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+    )
+    parser.add_argument("pi_a", nargs="?", help="Slug for PI A (e.g. 'su')")
+    parser.add_argument("pi_b", nargs="?", help="Slug for PI B (e.g. 'wiseman')")
+    parser.add_argument(
+        "-t", "--tsv",
+        metavar="FILE",
+        help="TSV file with two columns (pi_a, pi_b); one pair per line",
+    )
+    parser.add_argument(
+        "--dry-run",
+        action="store_true",
+        help="Print proposals to stdout without writing to the database",
+    )
+    args = parser.parse_args()
+
+    # Build list of pairs to process
+    if args.tsv:
+        if args.pi_a or args.pi_b:
+            parser.error("Cannot combine -t/--tsv with positional PI arguments.")
+        pairs = _parse_tsv(args.tsv)
+        if not pairs:
+            print("No valid pairs found in TSV file.")
+            sys.exit(1)
+    elif args.pi_a and args.pi_b:
+        pairs = [(args.pi_a, args.pi_b)]
+    else:
+        parser.error("Provide either two positional slugs or -t FILE.")
+
+    errors: list[str] = []
+    for i, (slug_a, slug_b) in enumerate(pairs):
+        if len(pairs) > 1:
+            print(f"\n{'='*72}")
+            print(f"Pair {i + 1}/{len(pairs)}: {slug_a}  ×  {slug_b}")
+            print(f"{'='*72}")
+        if slug_a == slug_b:
+            msg = f"Skipping {slug_a} × {slug_b}: PI A and PI B must be different."
+            print(msg)
+            errors.append(msg)
+            continue
+        try:
+            asyncio.run(run(slug_a, slug_b, args.dry_run))
+        except SystemExit:
+            errors.append(f"Failed: {slug_a} × {slug_b}")
+
+    if errors:
+        print(f"\n{len(errors)} pair(s) failed:")
+        for e in errors:
+            print(f"  {e}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/test_podcast_su.py b/scripts/test_podcast_su.py
new file mode 100644
index 0000000..600c6e6
--- /dev/null
+++ b/scripts/test_podcast_su.py
@@ -0,0 +1,140 @@
+"""One-shot test: run the podcast pipeline for agent 'su' only.
+
+Outputs:
+  .labbot-tests/su-summary-<date>.txt   — generated text summary
+  .labbot-tests/su-audio-<date>.mp3     — TTS audio (if MISTRAL_API_KEY is set)
+
+Usage:
+    DATABASE_URL=postgresql+asyncpg://copi:copi@localhost:5432/copi \
+    python scripts/test_podcast_su.py
+"""
+
+import asyncio
+import logging
+import os
+import shutil
+from datetime import date
+from pathlib import Path
+
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s %(levelname)s %(name)s: %(message)s",
+)
+logger = logging.getLogger(__name__)
+
+OUTPUT_DIR = Path(".labbot-tests")
+AUDIO_DIR = Path("data/podcast_audio")
+
+
+async def run():
+    from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine
+    from sqlalchemy.orm import sessionmaker
+
+    from src.config import get_settings
+    from src.podcast.pipeline import (
+        _generate_summary,
+        _load_podcast_preferences,
+        _load_public_profile,
+        _parse_profile_markdown,
+        _select_article,
+        _try_fetch_full_text,
+    )
+    from src.podcast.tts_utils import get_audio_duration_seconds
+    from src.podcast.pubmed_search import build_queries, fetch_candidates
+    from src.podcast.state import get_delivered_pmids, record_delivery
+
+    settings = get_settings()
+    agent_id = "su"
+    today = date.today()
+    OUTPUT_DIR.mkdir(exist_ok=True)
+
+    logger.info("=== LabBot Podcast test run for agent: %s ===", agent_id)
+
+    # 1. Load profiles
+    profile_text = _load_public_profile(agent_id)
+    if not profile_text:
+        logger.error("No public profile found for agent: %s", agent_id)
+        return
+    logger.info("Loaded profile (%d chars)", len(profile_text))
+
+    preferences_text = await _load_podcast_preferences(agent_id)
+    if preferences_text:
+        logger.info("Loaded podcast preferences (%d chars)", len(preferences_text))
+    else:
+        logger.info("No podcast preferences found for agent: %s", agent_id)
+
+    # 2. Build queries and fetch candidates
+    profile_dict = _parse_profile_markdown(profile_text)
+    queries = build_queries(profile_dict)
+    logger.info("Search queries: %s", queries)
+
+    already_delivered = get_delivered_pmids(agent_id)
+    logger.info("Already delivered PMIDs: %s", already_delivered)
+
+    candidates = await fetch_candidates(
+        queries,
+        already_delivered=already_delivered,
+        days=settings.podcast_search_window_days,
+        max_total=settings.podcast_max_candidates,
+    )
+    logger.info("Fetched %d candidates", len(candidates))
+    if not candidates:
+        logger.error("No candidate articles found — aborting")
+        return
+
+    # 3. LLM article selection
+    selected, justification = await _select_article(profile_text, candidates, agent_id, preferences_text)
+    if selected is None:
+        logger.error("No article selected — aborting")
+        return
+    pmid = selected.get("pmid", "")
+    logger.info("Selected PMID: %s", pmid)
+    logger.info("Justification: %s", justification)
+
+    # 4. Fetch full text
+    full_text = await _try_fetch_full_text(pmid)
+    logger.info("Full text fetched: %s", bool(full_text))
+
+    # 5. Generate text summary
+    summary = await _generate_summary(profile_text, selected, full_text, agent_id, preferences_text)
+    if not summary:
+        logger.error("Summary generation failed — aborting")
+        return
+
+    summary_path = OUTPUT_DIR / f"su-summary-{today.isoformat()}.txt"
+    summary_path.write_text(summary, encoding="utf-8")
+    logger.info("Summary written to %s", summary_path)
+    print("\n" + "=" * 60)
+    print("TEXT SUMMARY")
+    print("=" * 60)
+    print(summary)
+    print("=" * 60 + "\n")
+
+    # 6. Generate audio — dispatch to backend configured by PODCAST_TTS_BACKEND
+    if settings.podcast_tts_backend == "local":
+        from src.podcast.local_tts import generate_audio
+        logger.info("TTS backend: local vLLM-Omni (%s:%s)", settings.local_tts_host, settings.local_tts_port)
+    else:
+        from src.podcast.mistral_tts import generate_audio
+        logger.info("TTS backend: Mistral AI (%s)", settings.mistral_tts_model)
+
+    audio_src = AUDIO_DIR / agent_id / f"{today.isoformat()}.mp3"
+    audio_ok = await generate_audio(summary, agent_id, audio_src)
+
+    if audio_ok:
+        audio_dest = OUTPUT_DIR / f"su-audio-{today.isoformat()}.mp3"
+        shutil.copy2(audio_src, audio_dest)
+        duration = get_audio_duration_seconds(audio_src)
+        logger.info("Audio saved to %s (duration: %ss)", audio_dest, duration)
+    else:
+        logger.warning("Audio generation failed (backend: %s)", settings.podcast_tts_backend)
+
+    logger.info("=== Test run complete ===")
+    logger.info("  PMID: %s", pmid)
+    logger.info("  Summary: %s", summary_path)
+    if audio_ok:
+        logger.info("  Audio: %s", audio_dest)
+
+
+if __name__ == "__main__":
+    asyncio.run(run())
diff --git a/specs/admin-dashboard.md b/specs/admin-dashboard.md
index 2d9cd8f..b797531 100644
--- a/specs/admin-dashboard.md
+++ b/specs/admin-dashboard.md
@@ -140,7 +140,34 @@ Analytics on agent-to-agent thread conversations and outcomes.
 
 **Export:** HTML and plain text export options for proposal review.
 
-### 7. LLM Call Logs (`/admin/llm-calls`)
+### 7. Matchmaker (`/admin/matchmaker`)
+
+Admin tool for generating collaboration proposals between two PIs on demand, without running an agent simulation. See `labbot-matchmaker.md` for the full specification.
+
+**Generate form (top of page):**
+- Two dropdowns listing all users with a complete `ResearcherProfile`, sorted by name
+- Client-side enforcement: same user cannot be selected in both dropdowns; Generate button disabled until both are selected
+- On submit: POSTs to `/admin/matchmaker/generate`, shows spinner, redirects to detail view on success
+
+**Proposals table:**
+- Confidence badge (High = green, Moderate = yellow, Speculative = gray)
+- PI A / PI B names
+- Proposal title (truncated)
+- Generated timestamp
+- View / Delete actions
+
+**Filters:**
+- PI multi-select (matches either side)
+- Confidence filter
+
+**Row click** → proposal detail page (`/admin/matchmaker/{id}`)
+
+**Proposal detail (`/admin/matchmaker/{id}`):**
+- Header: PI A × PI B, confidence badge, generated timestamp, token counts
+- Full proposal rendered as markdown
+- Delete button
+
+### 8. LLM Call Logs (`/admin/llm-calls`)
 
 Debugging view for all LLM API calls.
 
@@ -153,7 +180,7 @@ Debugging view for all LLM API calls.
 - Latency (ms)
 - System prompt and response (expandable)
 
-### 8. Access Requests (`/admin/access-requests`)
+### 9. Access Requests (`/admin/access-requests`)
 
 Pre-release access gate management.
 
@@ -171,7 +198,7 @@ Pre-release access gate management.
 - Add ORCID + note form
 - Remove ORCID button
 
-### 9. Waitlist (`/admin/waitlist`)
+### 10. Waitlist (`/admin/waitlist`)
 
 Lead-capture signups from the public landing page.
 
@@ -183,7 +210,7 @@ Lead-capture signups from the public landing page.
 
 No outbound email is sent automatically — the admin uses the export to reach out manually, then marks rows contacted.
 
-### 10. User Impersonation
+### 11. User Impersonation
 
 Admins can assume the identity of any user to see the app as they see it.
 
@@ -215,6 +242,10 @@ Admins can assume the identity of any user to see the app as they see it.
 | `POST /admin/agents/{id}/approve` | Approve pending agent |
 | `GET /admin/discussions` | Thread discussions and outcomes |
 | `GET /admin/discussions/export` | Export discussions (HTML/text) |
+| `GET /admin/matchmaker` | Matchmaker tab with generate form and proposals table |
+| `POST /admin/matchmaker/generate` | Run LLM pipeline and store result |
+| `GET /admin/matchmaker/{id}` | Proposal detail view |
+| `POST /admin/matchmaker/{id}/delete` | Delete a proposal |
 | `GET /admin/access-requests` | Pending access requests + allowlist management |
 | `POST /admin/access-requests/{user_id}/approve` | Approve a pending user |
 | `POST /admin/access-requests/{user_id}/deny` | Deny a pending user |
diff --git a/specs/data-model.md b/specs/data-model.md
index bc3adc9..cad5bc8 100644
--- a/specs/data-model.md
+++ b/specs/data-model.md
@@ -93,6 +93,25 @@ PostgreSQL-backed async job queue.
 | started_at | timestamp | Nullable |
 | completed_at | timestamp | Nullable |
 
+### MatchmakerProposal
+
+Admin-generated collaboration proposals produced by a single LLM call from two PIs' public and private profiles, without running an agent simulation. See `labbot-matchmaker.md`.
+
+| Field | Type | Notes |
+|---|---|---|
+| id | uuid | Primary key |
+| pi_a_id | FK → User | First PI (CASCADE delete) |
+| pi_b_id | FK → User | Second PI (CASCADE delete) |
+| proposal_md | text | Full proposal in markdown |
+| title | string(500) | Extracted from first `# heading` in proposal_md |
+| confidence | string(20) | `high` / `moderate` / `speculative` |
+| llm_model | string(100) | Model used (e.g. `claude-opus-4-7`) |
+| input_tokens | integer | Nullable. Input token count |
+| output_tokens | integer | Nullable. Output token count |
+| generated_at | timestamp | Server default now() |
+
+**Indexes:** `pi_a_id`, `pi_b_id`
+
 ### AccessAllowlist
 
 Admin-managed list of pre-approved ORCID IDs. ORCIDs on this list bypass the pre-release access gate and land directly in `allowed` state on first login.
diff --git a/specs/labbot-matchmaker.md b/specs/labbot-matchmaker.md
new file mode 100644
index 0000000..f8a1e9a
--- /dev/null
+++ b/specs/labbot-matchmaker.md
@@ -0,0 +1,237 @@
+# LabBot Matchmaker Specification
+
+## Overview
+
+The Matchmaker is an alternative pathway for generating collaboration proposals between two PIs without running the multi-agent simulation. An admin selects two PIs from dropdowns; a single LLM call reads both their public and private profiles and produces a proposal of identical quality and format to those generated by the agent dialogue system. The output is stored and displayed in a new **Matchmaker** tab in the admin dashboard.
+
+This is complementary to the agent system — not a replacement. It is useful for:
+- Quickly generating proposals on demand before a simulation run
+- Testing profile quality in isolation
+- Generating proposals for PIs who don't yet have active agents
+
+---
+
+## Admin UI (`/admin/matchmaker`)
+
+### Layout
+
+A new tab in the admin nav alongside "Discussions".
+
+**Top section — Generate form:**
+
+```
+[ PI A dropdown ▾ ]    [ PI B dropdown ▾ ]    [ Generate Proposal ]
+```
+
+Both dropdowns list all users who have a complete `ResearcherProfile`. Sorted alphabetically by name. The same user cannot be selected in both dropdowns (client-side enforcement — disable the selected user in the other dropdown).
+
+The **Generate Proposal** button is disabled until both PIs are selected. On click it POSTs to `/admin/matchmaker/generate` and shows an inline spinner while the LLM call runs (typically 5–15 seconds).
+
+**Main section — Proposals table:**
+
+Same visual style as `/admin/discussions`.
+
+| Column | Notes |
+|---|---|
+| Date/time | When generated |
+| PI A | Name |
+| PI B | Name |
+| Confidence | `High` / `Moderate` / `Speculative` badge (color-coded: green/yellow/gray) |
+| Title | First line of the proposal |
+| Actions | View · Delete |
+
+**Filters:**
+- PI filter (multi-select, filters rows where either PI matches)
+- Confidence filter
+
+**Export:** Each proposal row has an HTML and plain-text export link, same as Discussions.
+
+### Proposal Detail View (`/admin/matchmaker/{id}`)
+
+Full-page view of a single proposal. Sections mirror the proposal format (see Output Format below). Includes:
+- Header: PI A, PI B, confidence label, generated timestamp
+- Full proposal body rendered as markdown
+- "Back to Matchmaker" link
+
+---
+
+## Backend
+
+### New Route: `POST /admin/matchmaker/generate`
+
+**Request body (form):** `pi_a_id`, `pi_b_id` (user UUIDs)
+
+**Steps:**
+1. Load `ResearcherProfile` for both users (including `private_profile_md`).
+2. Load recent publications for both users (up to 20, sorted by year desc).
+3. Call `generate_matchmaker_proposal(profile_a, profile_b, pubs_a, pubs_b)` — see LLM Pipeline below.
+4. Parse LLM output, extract confidence label.
+5. Insert a `MatchmakerProposal` row.
+6. Redirect to `/admin/matchmaker` (or return JSON for HTMX — see implementation note below).
+
+**Error handling:** If either user lacks a complete profile, return a 400 with an inline error message above the form: "PI A / PI B does not have a complete profile yet."
+
+### New Route: `GET /admin/matchmaker`
+
+Renders the tab with the generate form and proposals table.
+
+### New Route: `GET /admin/matchmaker/{id}`
+
+Renders the proposal detail view.
+
+### New Route: `DELETE /admin/matchmaker/{id}`
+
+Deletes the proposal row. Redirects back to `/admin/matchmaker`.
+
+---
+
+## Data Model
+
+### MatchmakerProposal
+
+| Field | Type | Notes |
+|---|---|---|
+| id | uuid | Primary key |
+| pi_a_id | FK → User | |
+| pi_b_id | FK → User | |
+| proposal_md | text | Full proposal in markdown |
+| title | text | Extracted first heading from proposal_md |
+| confidence | string(20) | `high` / `moderate` / `speculative` |
+| llm_model | string | Model used (e.g. `claude-opus-4-7`) |
+| input_tokens | integer | |
+| output_tokens | integer | |
+| generated_at | timestamp | |
+
+`pi_a_id` and `pi_b_id` are stored in canonical order (lower UUID first) to avoid duplicate detection confusion. The UI always shows them in name-alphabetical order regardless.
+
+---
+
+## LLM Pipeline
+
+### Model
+
+`claude-opus-4-7` — same model used for Phase 4 agent replies.
+
+### Prompt (`prompts/matchmaker.md`)
+
+```
+You are evaluating a potential research collaboration between two PIs.
+
+Your task is to produce a high-quality collaboration proposal that meets the
+Collaboration Quality Standards below.
+
+---
+## Collaboration Quality Standards
+
+[Verbatim content from agent-system.md § "Collaboration Quality Standards"]
+
+---
+## PI A: {name_a}
+
+### Public Profile
+{public_profile_a}
+
+### Private Instructions (confidential — do not quote directly)
+{private_profile_a}
+
+### Recent Publications
+{publications_a}
+
+---
+## PI B: {name_b}
+
+### Public Profile
+{public_profile_b}
+
+### Private Instructions (confidential — do not quote directly)
+{private_profile_b}
+
+### Recent Publications
+{publications_b}
+
+---
+## Instructions
+
+Produce a collaboration proposal using the output format below. Apply the
+Collaboration Quality Standards strictly. If you cannot identify a High or
+Moderate confidence collaboration, produce a Speculative one and label it as such.
+
+Respect each PI's private instructions when framing the proposal: if a PI has
+expressed preferences for specific topics or partners, weight those angles
+positively. Do not quote or reveal any private instruction text in the output.
+```
+
+### Output Format
+
+The LLM output must follow this structure (identical to proposals generated by the agent dialogue system):
+
+```markdown
+# [Collaboration Title]
+
+**Confidence:** High | Moderate | Speculative
+
+## Scientific Rationale
+[2–3 paragraphs. Why these two labs? What does each bring that the other lacks?
+Name specific techniques, datasets, reagents, or model systems.]
+
+## True Complementarity
+- **PI A contributes:** [specific capabilities — not generic]
+- **PI B contributes:** [specific capabilities — not generic]
+- **Gap filled:** [what neither could do alone]
+
+## Concrete First Experiment
+[1 paragraph. Scoped to days-to-weeks. Names specific assays, methods,
+reagents, or datasets. Both labs are essential to execute it.]
+
+## Benefits to Each Lab
+- **PI A benefits:** [specific, non-generic]
+- **PI B benefits:** [specific, non-generic]
+
+## Open Questions / Next Steps
+[Bullet list of what would need to be confirmed before committing effort]
+```
+
+The output format is enforced by wrapping the final proposal in `<proposal>` tags (same pattern as `<slack_message>` in Phase 4). The parser extracts content inside those tags. The confidence label is parsed from the `**Confidence:**` line.
+
+---
+
+## Admin Dashboard Integration
+
+### Nav update (`admin-dashboard.md` §API Routes additions)
+
+| Route | Purpose |
+|---|---|
+| `GET /admin/matchmaker` | Matchmaker tab with generate form and proposals table |
+| `POST /admin/matchmaker/generate` | Run LLM pipeline and store result |
+| `GET /admin/matchmaker/{id}` | Proposal detail view |
+| `DELETE /admin/matchmaker/{id}` | Delete a proposal |
+
+### Nav link
+
+Add **Matchmaker** between **Discussions** and **LLM Call Logs** in the admin sidebar. Show a count badge of total proposals (no filter).
+
+---
+
+## Relationship to Agent System
+
+| Dimension | Agent System | Matchmaker |
+|---|---|---|
+| Input | Multi-turn Slack dialogue | Public + private profiles only |
+| Latency | Minutes to hours (simulation run) | ~10 seconds (single LLM call) |
+| Proposal quality standard | Collaboration Quality Standards | Identical |
+| Output format | Identical | Identical |
+| Private profile respected | Yes (agent's own) | Yes (both, read-only, not quoted) |
+| PI notification | Slack DM | None (admin-only tool) |
+| Stored in | `Proposal` table | `MatchmakerProposal` table |
+
+Proposals from both sources can be exported in the same HTML/plain-text format.
+
+---
+
+## Design Principles
+
+- **Admin-only.** The Matchmaker tab is only accessible to `is_admin = true` users. No self-service endpoint.
+- **Read-only for PIs.** PIs are not notified. Admins use this to explore matches and manually share results.
+- **No simulation dependency.** Works without any running agent containers or Slack tokens.
+- **Same quality bar.** The Collaboration Quality Standards section from `agent-system.md` is embedded verbatim in the matchmaker prompt. No relaxed criteria.
+- **Prompt caching.** The system prompt (quality standards + both profiles) is structured to maximize Anthropic prompt cache hits when the same pair is regenerated.
diff --git a/specs/labbot-podcast.md b/specs/labbot-podcast.md
new file mode 100644
index 0000000..6ad3bc1
--- /dev/null
+++ b/specs/labbot-podcast.md
@@ -0,0 +1,616 @@
+# LabBot Podcast Specification
+
+## Overview
+
+LabBot Podcast is a daily personalized research briefing service for researchers. It surfaces the single most relevant and impactful recent publication from the scientific literature based on the researcher's profile, generates a structured text summary highlighting findings and tools useful to their ongoing work, and produces a short audio episode via Mistral AI TTS. Researchers can subscribe to a personal RSS podcast feed to listen to the audio.
+
+The system runs once per day and requires no researcher interaction to be useful — but researchers can tune it through a web UI. There are two delivery paths:
+
+- **Agent path** — pilot-lab PIs with an approved `AgentRegistry` entry additionally receive the text summary as a Slack DM from their lab bot.
+- **User path** — any researcher who has completed ORCID onboarding and has a `ResearcherProfile` with a research summary receives the podcast automatically. No Slack bot, agent approval, or admin action required.
+
+---
+
+## Architecture
+
+### Service Placement
+
+LabBot Podcast runs as a separate Docker container (`podcast` service), mirroring the GrantBot pattern:
+- Long-running scheduler process
+- Executes once per calendar day at 9am UTC (1 hour after GrantBot)
+- If the container was down at the scheduled time, runs immediately on startup (catch-up)
+- State persisted in `data/podcast_state.json` (tracks which articles have been delivered per agent)
+
+### Delivery Paths
+
+| Path | Who | Profile source | Delivery | Audio/RSS key |
+|---|---|---|---|---|
+| **Agent** | Pilot-lab PIs with active `AgentRegistry` | `profiles/public/{agent_id}.md` (disk) | Slack DM + RSS | `agent_id` string |
+| **User** | Any ORCID user with completed `ResearcherProfile` | `ResearcherProfile` DB row (structured fields) | RSS only | `user_id` UUID |
+
+Both paths run in the same daily scheduler pass. A user who has both a `ResearcherProfile` and an active agent is handled only by the agent path (no duplicate episode).
+
+### Dependencies on Existing Systems
+
+| Existing component | How Podcast uses it |
+|---|---|
+| `ResearcherProfile` DB model | Source of research areas, keywords, techniques, disease areas for the user path |
+| `profiles/public/{lab}.md` | Profile text for the agent path (LLM article selection and summary) |
+| `src/services/pubmed.py` | Literature search (keyword + MeSH queries) |
+| `src/services/llm.py` | Article selection ranking and summary generation (all calls logged to `LlmCallLog`) |
+| `AgentRegistry` | Maps agent → PI → Slack bot token for DM delivery (agent path only) |
+| `User.id` (UUID) | Stable, opaque RSS feed token for the user path |
+| Slack bot DM | Text summary delivery (agent path only) |
+
+### New External Dependency
+
+**Mistral AI API** — text-to-speech generation.
+- Configured via `MISTRAL_API_KEY` environment variable
+- Voice selection per agent configured in `data/podcast_voices.json` (agent_id → voice_id); falls back to a default voice if not set
+- Audio files stored at `data/podcast_audio/{agent_id}/{YYYY-MM-DD}.mp3`
+
+---
+
+## Daily Pipeline
+
+Each day the scheduler runs two loops in sequence:
+
+1. **Agent loop** — iterates over all active `AgentRegistry` entries and calls `run_pipeline_for_agent()` for each.
+2. **User loop** — iterates over all `User` rows where `onboarding_complete=True` and `profile.research_summary IS NOT NULL`, skipping any whose `user_id` appeared in the agent loop, and calls `run_podcast_for_user()` for each.
+
+For each recipient, the pipeline executes the following steps sequentially:
+
+### Step 1: Load Profile
+
+- **Agent path**: read `profiles/public/{agent_id}.md` from disk. If absent, skip.
+- **User path**: construct profile text from structured `ResearcherProfile` DB fields (`research_summary`, `disease_areas`, `techniques`, `experimental_models`, `keywords`). If `research_summary` is empty, skip.
+
+### Step 2: Build Search Queries
+
+Construct PubMed search terms from the profile:
+- Extract top research area keywords
+- Extract technique and experimental model terms
+- Combine into 2–3 PubMed query strings (e.g., `(proteostasis OR unfolded protein response) AND (neurodegeneration OR proteomics)`)
+- Inject any `extra_keywords` from `PodcastPreferences` as additional quoted terms
+- Limit to publications from the last 14 days (rolling window ensures coverage across weekend/holiday gaps)
+- Cap at 50 candidate abstracts
+
+### Step 3: Fetch Candidate Abstracts
+
+Use `src/services/pubmed.py` to execute each query and retrieve PMIDs + abstracts. Deduplicate across queries. Skip any PMID already in `podcast_state.json` for this recipient (agent or user) to prevent re-delivering the same article.
+
+### Step 4: LLM Article Selection (Sonnet)
+
+Single LLM call (Sonnet) with:
+- The researcher's full profile text (disk for agent path; constructed from DB for user path)
+- The list of candidate abstracts (title + abstract text, numbered)
+- Any journal preferences from `PodcastPreferences`
+- Prompt: `prompts/podcast-select.md`
+
+The LLM returns the index of the single best article, along with a one-sentence justification of why it is relevant to this researcher's ongoing work. If no article meets a minimum relevance threshold, it returns `null` and the pipeline skips delivery today.
+
+### Step 5: Generate Text Summary (Opus)
+
+One LLM call (Opus) with:
+- The researcher's full profile text
+- The selected article's title, abstract, and full text (fetched via `retrieve_full_text` if available in PMC, otherwise abstract only)
+- Prompt: `prompts/podcast-summarize.md`
+
+Output is a structured text summary (see format below). This is used as the TTS input and stored in `PodcastEpisode.text_summary`.
+
+### Step 6: Generate Audio (Mistral AI)
+
+Pass the text summary to the Mistral AI TTS API:
+- Voice: from `PodcastPreferences.voice_id`, or `MISTRAL_TTS_DEFAULT_VOICE`
+- Model: configurable via `MISTRAL_TTS_MODEL`
+- Output: MP3 file saved to:
+  - Agent path: `data/podcast_audio/{agent_id}/{YYYY-MM-DD}.mp3`
+  - User path: `data/podcast_audio/users/{user_id}/{YYYY-MM-DD}.mp3`
+- If TTS fails, the episode DB row is **not** written (see commit-last ordering); the run returns `False`.
+
+### Step 7: Deliver via Slack DM _(agent path only)_
+
+Send the text summary as a DM from the agent's Slack bot to its PI, appending the RSS feed URL. User-path episodes are delivered via RSS only — no Slack bot is required.
+
+### Step 8: Persist Episode and Update State
+
+1. Write the `PodcastEpisode` row to the DB:
+   - Agent path: `agent_id` set, `user_id` NULL
+   - User path: `user_id` set, `agent_id` NULL
+2. Append the delivered PMID to `data/podcast_state.json` (keyed by `agent_id` or `user_id`) to prevent re-delivery.
+
+---
+
+## Text Summary Format
+
+The Opus-generated summary follows a consistent structure. The prompt enforces this layout:
+
+```
+*Today's Research Brief — {Date}*
+
+*{Paper Title}*
+{Authors} · {Journal} · {Year}
+
+*What they found:*
+2–3 sentences on the core findings — specific results, effect sizes, or observations.
+
+*Key output:*
+1–2 sentences on any tool, method, dataset, or reagent released with the paper (if applicable). Omit this section if the paper has no distinct output.
+
+*Why this matters for your lab:*
+2–3 sentences connecting the paper's findings and outputs specifically to the PI's ongoing research areas, techniques, or open questions. Ground this in the PI's profile — name specific techniques, model systems, or questions from their work.
+
+*PubMed:* https://pubmed.ncbi.nlm.nih.gov/{PMID}/
+```
+
+The Slack DM appends a line at the bottom:
+> _Listen to the audio version: {rss_feed_url}_
+
+---
+
+## RSS Podcast Feed
+
+### Endpoints
+
+| Path | Auth | Key |
+|---|---|---|
+| `GET /podcast/{agent_id}/feed.xml` | None | Pilot-lab agent |
+| `GET /podcast/{agent_id}/audio/{date}.mp3` | None | Pilot-lab agent |
+| `GET /podcast/users/{user_id}/feed.xml` | None | Plain ORCID user |
+| `GET /podcast/users/{user_id}/audio/{date}.mp3` | None | Plain ORCID user |
+
+All four endpoints are public and unauthenticated. The `user_id` UUID is opaque and acts as a stable, subscribable feed token — equivalent to a private podcast URL. Users retrieve their feed URL from the `/podcast/settings` page.
+
+### Feed Structure
+
+Standard RSS 2.0 with iTunes podcast extensions (identical structure for both paths):
+
+```xml
+<rss version="2.0" xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd">
+  <channel>
+    <title>{Name} — LabBot Research Briefings</title>
+    <description>Daily personalized research summaries for {Name}.</description>
+    <link>{feed_url}</link>
+    <itunes:author>{Name}</itunes:author>
+    <itunes:category text="Science"/>
+    <item>
+      <title>{Paper Title} — {Date}</title>
+      <description>{text summary}</description>
+      <enclosure url="{audio_url}" type="audio/mpeg" length="{file_size}"/>
+      <pubDate>{RFC 822 date}</pubDate>
+      <guid>{agent_id|user-{user_id}}-{YYYY-MM-DD}</guid>
+      <itunes:duration>{duration}</itunes:duration>
+    </item>
+    ...
+  </channel>
+</rss>
+```
+
+### Audio File Storage
+
+| Path | Audio directory |
+|---|---|
+| Agent path | `data/podcast_audio/{agent_id}/{YYYY-MM-DD}.mp3` |
+| User path | `data/podcast_audio/users/{user_id}/{YYYY-MM-DD}.mp3` |
+
+Files are streamed with `Content-Type: audio/mpeg`.
+
+---
+
+## LLM Prompt Files
+
+Two new prompt files in `prompts/`:
+
+### `prompts/podcast-select.md`
+
+Instructs the LLM to act as a literature triage assistant for a specific PI. It receives:
+- The PI's public profile (research areas, techniques, open questions, unique capabilities)
+- Numbered list of candidate abstracts (title + abstract)
+
+It must return:
+- The number of the most relevant article, or `null` if none clears the relevance bar
+- A one-sentence justification referencing a specific aspect of the PI's profile
+
+Key instructions in the prompt:
+- Relevance is defined as: the paper's findings or outputs could plausibly accelerate or inform a specific aspect of the PI's ongoing work
+- Recency alone is not sufficient — the connection must be specific
+- Prefer papers that release a tool, method, dataset, or reagent alongside findings
+- Do not pick review articles or editorials
+
+### `prompts/podcast-summarize.md`
+
+Instructs the LLM to act as a science communicator writing for a specific PI. It receives:
+- The PI's public profile
+- Full paper text (or abstract if full text unavailable)
+
+It must produce the structured summary described above. Key instructions:
+- The "Why this matters for your lab" section must name specific techniques, model systems, or open questions from the PI's profile — no generic connections
+- Tone is like a knowledgeable postdoc briefing their PI: specific, direct, no filler
+- The "Key output" section is only included if the paper releases a concrete artifact (tool, code, dataset, method, reagent); skip it otherwise
+- Target length: ~250 words total
+
+---
+
+## Data Model
+
+### `PodcastEpisode`
+
+Rows are keyed by either `agent_id` (string) or `user_id` (UUID FK to `users.id`). Exactly one should be set per row.
+
+```python
+class PodcastEpisode(Base):
+    __tablename__ = "podcast_episodes"
+
+    id: Mapped[uuid.UUID] = mapped_column(primary_key=True, default=uuid.uuid4)
+    agent_id: Mapped[str | None] = mapped_column(String(50), nullable=True, index=True)
+    user_id: Mapped[uuid.UUID | None] = mapped_column(UUID, ForeignKey("users.id"), nullable=True, index=True)
+    episode_date: Mapped[date] = mapped_column(Date, nullable=False)
+    pmid: Mapped[str] = mapped_column(String(100), nullable=False)
+    paper_title: Mapped[str] = mapped_column(String(500), nullable=False)
+    paper_authors: Mapped[str] = mapped_column(String(500), nullable=False)
+    paper_journal: Mapped[str] = mapped_column(String(255), nullable=False)
+    paper_year: Mapped[int] = mapped_column(Integer, nullable=False)
+    paper_url: Mapped[str | None] = mapped_column(String(1000), nullable=True)
+    text_summary: Mapped[str] = mapped_column(Text, nullable=False)
+    audio_file_path: Mapped[str | None] = mapped_column(String(500), nullable=True)
+    audio_duration_seconds: Mapped[int | None] = mapped_column(Integer, nullable=True)
+    slack_delivered: Mapped[bool] = mapped_column(Boolean, default=False)
+    selection_justification: Mapped[str] = mapped_column(Text, nullable=False)
+    created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now())
+
+    __table_args__ = (
+        # Agent-path: one episode per agent per day
+        UniqueConstraint("agent_id", "episode_date", name="uq_podcast_agent_date"),
+        # User-path: enforced by partial unique index (migration 0013):
+        # CREATE UNIQUE INDEX ix_podcast_episodes_user_date
+        #   ON podcast_episodes (user_id, episode_date) WHERE user_id IS NOT NULL
+    )
+```
+
+### `PodcastPreferences`
+
+Rows are keyed by either `agent_id` or `user_id`. Both columns are nullable and uniquely indexed.
+
+```python
+class PodcastPreferences(Base):
+    __tablename__ = "podcast_preferences"
+
+    id: Mapped[uuid.UUID] = mapped_column(primary_key=True, default=uuid.uuid4)
+    agent_id: Mapped[str | None] = mapped_column(String(50), nullable=True, unique=True, index=True)
+    user_id: Mapped[uuid.UUID | None] = mapped_column(UUID, ForeignKey("users.id"), nullable=True, unique=True, index=True)
+    voice_id: Mapped[str | None] = mapped_column(String(100), nullable=True)
+    extra_keywords: Mapped[list[str]] = mapped_column(ARRAY(String), server_default="{}")
+    preferred_journals: Mapped[list[str]] = mapped_column(ARRAY(String), server_default="{}")
+    deprioritized_journals: Mapped[list[str]] = mapped_column(ARRAY(String), server_default="{}")
+    updated_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now(), onupdate=func.now())
+```
+
+### State File (`data/podcast_state.json`)
+
+Keyed separately for agents and users:
+
+```json
+{
+  "agents": {
+    "<agent_id>": { "delivered_pmids": ["12345", "67890"] }
+  },
+  "users": {
+    "<user_id UUID string>": { "delivered_pmids": ["11111"] }
+  },
+  "last_run_date": "2026-04-14"
+}
+```
+
+The state file is a lightweight deduplication cache. The DB is the authoritative record for RSS generation and admin visibility.
+
+### Alembic Migrations
+
+| Migration | Creates / alters |
+|---|---|
+| `0010_add_podcast_episodes.py` | `podcast_episodes` table (agent path) |
+| `0011_add_podcast_paper_url.py` | `paper_url` column |
+| `0012_add_podcast_preferences.py` | `podcast_preferences` table (agent path) |
+| `0013_podcast_user_support.py` | `user_id` FK on both tables; make `agent_id` nullable; partial unique index for user-path episodes |
+
+---
+
+## Configuration
+
+New environment variables:
+
+| Variable | Required | Description |
+|---|---|---|
+| `MISTRAL_API_KEY` | Yes (for audio) | Mistral AI API key |
+| `MISTRAL_TTS_MODEL` | No | TTS model ID (default: `mistral-tts-latest`) |
+| `MISTRAL_TTS_DEFAULT_VOICE` | No | Default voice when no per-agent override exists |
+| `PODCAST_BASE_URL` | Yes | Public base URL for RSS enclosure links (e.g., `https://copi.science`) |
+| `PODCAST_SEARCH_WINDOW_DAYS` | No | Rolling search window in days (default: `14`) |
+| `PODCAST_MAX_CANDIDATES` | No | Max PubMed abstracts per agent per day (default: `50`) |
+
+Per-agent voice overrides (Phase 2/3): `data/podcast_voices.json`
+```json
+{
+  "su": "alex",
+  "wiseman": "stella"
+}
+```
+**Deprecated in Phase 4** — voice preferences move to the `podcast_preferences` DB table. The JSON file is still read as a fallback while the migration is in progress.
+
+---
+
+## Docker Service
+
+Add `podcast` service to `docker-compose.yml` and `docker-compose.prod.yml`:
+
+```yaml
+podcast:
+  build: .
+  command: python -m src.podcast.main
+  env_file: .env
+  volumes:
+    - ./data:/app/data
+  depends_on:
+    - postgres
+  profiles:
+    - podcast
+```
+
+Run with: `docker compose --profile podcast up -d podcast`
+
+---
+
+## Module Structure
+
+```
+src/podcast/
+├── main.py          # Scheduler entry point (APScheduler, same pattern as grantbot.py)
+├── pipeline.py      # Per-agent pipeline (steps 1–8 above)
+├── pubmed_search.py # Query builder from ResearcherProfile
+├── mistral_tts.py   # Mistral AI TTS client wrapper
+├── rss.py           # RSS feed builder (reads from DB)
+└── state.py         # podcast_state.json read/write helpers
+
+src/routers/podcast.py   # FastAPI routes: /podcast/{agent_id}/feed.xml, /podcast/{agent_id}/audio/{date}.mp3
+```
+
+The scheduler in `src/podcast/main.py` follows the same catch-up-on-startup pattern as `src/agent/grantbot.py`:
+1. On startup, check `data/podcast_state.json` for last run timestamp
+2. If last run was before today's 9am UTC, run immediately
+3. Schedule next run at 9am UTC
+
+---
+
+## Admin Dashboard Integration
+
+Add a **Podcast** tab to the existing admin dashboard (`src/routers/admin.py` + `templates/admin.html`) showing:
+- Table of recent episodes: agent, date, paper title, PMID, Slack delivered (yes/no), audio generated (yes/no)
+- Link to each agent's RSS feed
+- LLM call counts and token usage for the podcast pipeline (pulled from `LlmCallLog` filtered by `source = "podcast"`)
+
+The LLM calls from the podcast pipeline should set a `source` tag in `LlmCallLog` (add a `source` column via migration if not already present, or use the existing `extra_metadata` JSONB field).
+
+---
+
+## PI Customization
+
+### Via Standing Instructions (Current)
+
+PIs can adjust podcast behavior through standing instructions to their lab bot (same DM mechanism as the agent system — see `pi-interaction.md`). The podcast pipeline reads the private profile when building the selection prompt.
+
+Examples of effective standing instructions:
+- "For my daily podcast, focus only on papers that release a new tool or dataset — I don't need summaries of pure wet-lab findings"
+- "Prioritize papers from computational biology journals for the podcast"
+- "Skip anything about C. elegans — we're not pursuing that direction anymore"
+
+The bot's private profile rewrite (via `prompts/pi-profile-rewrite.md`) should include a `## Podcast Preferences` section that the podcast pipeline reads when constructing the selection and summarization prompts.
+
+### Via Preferences UI (Phase 4)
+
+A structured preferences page at `/agent/{agent_id}/podcast-settings` replaces the `data/podcast_voices.json` file and augments the standing-instructions mechanism with three explicit controls:
+
+1. **Voice** — select the TTS voice used for audio generation
+2. **Extra search keywords** — additional terms appended to PubMed/preprint queries beyond the auto-extracted profile keywords
+3. **Source preferences** — journals or preprint servers to prioritize (boosted in the selection prompt) or deprioritize
+
+See the **Podcast Preferences UI** section below for the full design.
+
+---
+
+## Podcast Preferences UI
+
+### Route and Access Control
+
+| Route | Method | Handler | Access | Notes |
+|---|---|---|---|---|
+| `/agent/{agent_id}/podcast-settings` | `GET` | Render agent preferences form | Agent owner or admin | Agent path |
+| `/agent/{agent_id}/podcast-settings` | `POST` | Save agent preferences | Agent owner or admin | Agent path |
+| `/podcast/settings` | `GET` | Render user preferences form | Any authenticated user with completed profile | User path |
+| `/podcast/settings` | `POST` | Save user preferences | Any authenticated user with completed profile | User path |
+| `/podcast/user/generate` | `POST` | Trigger on-demand episode | Any authenticated user with completed profile | User path |
+
+The agent-path routes remain in `src/routers/agent_page.py` with the same `get_agent_with_access()` ownership check. The user-path routes live in `src/routers/podcast.py` and use `get_current_user()` + a profile-completeness check (`onboarding_complete=True` and `profile.research_summary IS NOT NULL`).
+
+### User Feed URL
+
+After saving preferences or visiting `/podcast/settings`, the user sees their personal feed URL:
+
+```
+{PODCAST_BASE_URL}/podcast/users/{user.id}/feed.xml
+```
+
+This URL:
+- Requires no authentication to read (subscribe in any podcast app)
+- Is stable for the lifetime of the user account
+- Acts as an opaque token — not guessable, not secret, but not publicly listed
+- Is displayed with a one-click copy button on the settings page
+
+### Form Fields
+
+#### 1. Voice Selection
+
+A `<select>` dropdown pre-populated with valid Mistral Voxtral voices. The current TTS model is `voxtral-mini-tts-latest`.
+
+Available voices for `voxtral-mini-tts-latest` (verify current list at [Mistral docs](https://docs.mistral.ai/capabilities/audio/#text-to-speech)):
+
+| Voice ID | Description |
+|---|---|
+| `alex` | US English, male, neutral |
+| `deedee` | US English, female, bright |
+| `jasmine` | US English, female, warm |
+| `laurel` | US English, female, clear |
+| `luna` | US English, female, soft |
+| `rio` | US English, male, energetic |
+| `stella` | US English, female, professional |
+| `theo` | US English, male, measured |
+| `tyler` | US English, male, conversational |
+
+> **Note:** This list should be refreshed from the Mistral API at deploy time. If Mistral exposes a `GET /v1/audio/voices` endpoint, the admin UI should call it to populate the dropdown dynamically. If not available, hardcode from the table above and update as the API evolves.
+
+The form shows a short audio preview label next to each voice name if available. The current agent's voice is pre-selected; if no voice is set, the first voice in the list is shown as the default.
+
+#### 2. Extra Search Keywords
+
+A plain `<textarea>` accepting one keyword or phrase per line. These are appended as additional quoted terms to the PubMed/preprint query in Step 1 of the pipeline.
+
+```
+insulin receptor substrate
+adipose tissue browning
+mitochondrial fission
+```
+
+Stored as `extra_keywords: list[str]` (each non-blank line becomes one entry). Max 20 entries, each up to 100 characters.
+
+#### 3. Source Preferences
+
+Two separate tag-input fields (or textareas with comma-separation):
+
+**Preferred sources** — journals or preprint servers to actively surface. Shown first in the selection-prompt candidate list and referenced explicitly in the prompt:
+> "Prefer papers from: {preferred_journals}. Give these extra weight when relevance is comparable."
+
+**Deprioritized sources** — journals or preprint servers to down-rank. Added as a negative signal in the selection prompt:
+> "Deprioritize papers from: {deprioritized_journals} unless exceptionally relevant."
+
+Examples:
+- Preferred: `Nature Methods`, `Cell Systems`, `bioRxiv`, `eLife`
+- Deprioritized: `Frontiers in ...`, `PLOS ONE`
+
+Stored as `preferred_journals: list[str]` and `deprioritized_journals: list[str]`.
+
+### Template
+
+`templates/agent/podcast_settings.html` — extends `base.html`, matches the visual style of `templates/agent/profile_edit.html`.
+
+Sections:
+1. **Voice** — `<select>` with voice options
+2. **Extra Keywords** — `<textarea>` with instructions
+3. **Source Preferences** — two `<textarea>` fields (preferred / deprioritized), comma or newline separated
+4. **Save button** — POSTs to the same URL, redirects back on success with a flash message
+
+### Pipeline Integration
+
+In `run_pipeline_for_agent()` (`src/podcast/pipeline.py`), after loading profile and preferences text:
+
+```python
+# Load structured preferences from DB
+prefs = await _load_podcast_preferences_structured(agent_id)  # returns PodcastPreferences | None
+
+# Step 2 (query building): inject extra_keywords
+if prefs and prefs.extra_keywords:
+    queries.extend(
+        f'"{kw}"' for kw in prefs.extra_keywords[:20]
+    )
+
+# Step 3 (article selection): inject journal preferences into selection prompt
+journal_context = ""
+if prefs and prefs.preferred_journals:
+    journal_context += f"\nPreferred sources: {', '.join(prefs.preferred_journals)}."
+if prefs and prefs.deprioritized_journals:
+    journal_context += f"\nDeprioritized sources: {', '.join(prefs.deprioritized_journals)}."
+# journal_context is appended to the {preferences} block in the selection prompt
+
+# Step 5 (TTS): use voice from preferences
+voice_override = prefs.voice_id if prefs else None
+# mistral_tts.get_voice() checks PodcastPreferences first, then podcast_voices.json, then env default
+```
+
+Add `_load_podcast_preferences_structured(agent_id)` as an async helper that queries `PodcastPreferences` and returns the ORM row or `None`.
+
+Update `mistral_tts.get_voice()` and `local_tts.get_voice()` to accept an optional `voice_override` parameter passed from the pipeline instead of reading from `podcast_voices.json` directly.
+
+### Admin Visibility
+
+The existing `/admin/podcast` page gets a **Preferences** column in the agent filter section: when an agent is selected, show a summary of its preferences (voice, keyword count, journal counts) with a link to the preferences page.
+
+---
+
+## Module Structure
+
+```
+src/podcast/
+├── main.py            # Scheduler: agent loop then user loop
+├── pipeline.py        # run_pipeline_for_agent() + run_podcast_for_user()
+├── pubmed_search.py   # Query builder from profile dict
+├── preprint_search.py # bioRxiv / medRxiv / arXiv search
+├── mistral_tts.py     # Mistral AI TTS client
+├── local_tts.py       # Local vLLM-Omni TTS client (optional)
+├── tts_utils.py       # ffmpeg loudnorm, duration extraction
+├── rss.py             # RSS feed builder (agent_id or user_id keyed)
+└── state.py           # podcast_state.json helpers (agent + user variants)
+
+src/routers/podcast.py     # All podcast HTTP endpoints
+templates/
+├── agent/podcast_settings.html   # Agent-path preferences UI
+└── podcast_settings.html          # User-path preferences UI (+ feed URL card)
+```
+
+---
+
+## Rollout Phases
+
+### Phase 1: Text-only delivery _(complete)_
+- PubMed search, LLM selection, Opus summarization
+- Slack DM delivery
+- `PodcastEpisode` DB table and admin visibility
+- No audio, no RSS
+
+### Phase 2: Audio + RSS _(complete)_
+- Mistral AI TTS integration
+- Audio file storage and streaming endpoint
+- RSS feed generation and `/podcast/{agent_id}/feed.xml` endpoint
+- Per-agent voice configuration
+
+### Phase 3: PI customization surface _(complete)_
+- Podcast preferences section in private profile
+- Pipeline reads preferences when building prompts
+- Admin dashboard podcast tab with LLM usage metrics
+
+### Phase 4: Structured Preferences UI _(complete)_
+- `PodcastPreferences` DB table (migration `0012`)
+- `GET/POST /agent/{agent_id}/podcast-settings` route and form
+- Voice picker, extra keywords, source preferences
+- Deprecate `data/podcast_voices.json` in favour of DB-stored voice preference
+
+### Phase 5: Open Access for Plain ORCID Users _(implemented in migration 0013)_
+- **Goal**: any researcher who signs in with ORCID and completes their profile receives daily podcast briefings automatically — no agent approval, no Slack bot required.
+- **Schema**: migration `0013` adds `user_id` FK to `podcast_preferences` and `podcast_episodes`; makes `agent_id` nullable in both tables; adds partial unique index for user-path episodes.
+- **Pipeline**: `run_podcast_for_user(user_id, db_session)` in `src/podcast/pipeline.py` — loads profile from `ResearcherProfile` DB row (no disk file), queries PubMed/preprints, selects article, generates audio, and persists a `PodcastEpisode` keyed by `user_id`.
+- **Scheduler**: `src/podcast/main.py` runs the user loop after the agent loop; users whose `user_id` appears in an active `AgentRegistry` row are skipped (covered by agent path).
+- **Endpoints** (all in `src/routers/podcast.py`):
+  - `GET /podcast/users/{user_id}/feed.xml` — public RSS feed
+  - `GET /podcast/users/{user_id}/audio/{date}.mp3` — audio streaming
+  - `GET /podcast/settings` — preferences UI (auth-gated)
+  - `POST /podcast/settings` — save preferences (auth-gated)
+  - `POST /podcast/user/generate` — on-demand episode trigger (auth-gated)
+- **State**: `data/podcast_state.json` gains a `"users"` section keyed by user_id UUID strings.
+- **Eligibility gate**: `user.onboarding_complete == True` and `profile.research_summary IS NOT NULL`. Users who have not yet built their profile are silently skipped.
+
+---
+
+## Out of Scope
+
+- Real-time or on-demand article requests from non-authenticated callers
+- Multi-article episodes (one article per day, selected by the LLM as the single most relevant)
+- Full-text audio of the paper itself (summary only)
+- Publicly listed or shared RSS feeds (each feed URL is personal and opaque)
+- Push notifications or mobile app integration
+- Email delivery of the text summary (RSS + audio only for the user path)
diff --git a/src/agent/agent.py b/src/agent/agent.py
index 1bae847..52e3c53 100644
--- a/src/agent/agent.py
+++ b/src/agent/agent.py
@@ -6,6 +6,7 @@
 from typing import Any
 
 from src.agent.state import AgentState, PostRef, ThreadState
+from src.utils.prompts import load_prompt
 
 logger = logging.getLogger(__name__)
 
@@ -74,10 +75,7 @@ def reload_profiles(self):
 
     def build_system_prompt(self) -> str:
         """Build the full agent system prompt with identity and profiles."""
-        base_prompt = self._load_file(
-            PROMPTS_DIR / "agent-system.md",
-            _default_system_prompt(),
-        )
+        base_prompt = load_prompt(PROMPTS_DIR / "agent-system.md", _default_system_prompt())
         lab_directory_section = ""
         if self._lab_directory:
             lab_directory_section = f"""
@@ -107,10 +105,7 @@ def build_scan_system_prompt(self) -> str:
         Omits working memory and lab directory — scan only needs identity,
         research focus, and private priorities to judge relevance.
         """
-        base_prompt = self._load_file(
-            PROMPTS_DIR / "agent-system.md",
-            _default_system_prompt(),
-        )
+        base_prompt = load_prompt(PROMPTS_DIR / "agent-system.md", _default_system_prompt())
         return f"""{base_prompt}
 
 ## Your Identity
@@ -130,10 +125,7 @@ def build_thread_reply_system_prompt(self) -> str:
         talking to. Use retrieve_profile tool if you need details on another lab.
         Includes working memory since it may contain thread-relevant context.
         """
-        base_prompt = self._load_file(
-            PROMPTS_DIR / "agent-system.md",
-            _default_system_prompt(),
-        )
+        base_prompt = load_prompt(PROMPTS_DIR / "agent-system.md", _default_system_prompt())
         return f"""{base_prompt}
 
 ## Your Identity
@@ -214,10 +206,7 @@ def build_phase4_prompt(
         Returns (system_prompt, messages).
         """
         system_prompt = self.build_thread_reply_system_prompt()
-        phase4_template = self._load_file(
-            PROMPTS_DIR / "phase4-thread-reply.md",
-            "Compose a thread reply.",
-        )
+        phase4_template = load_prompt(PROMPTS_DIR / "phase4-thread-reply.md", "Compose a thread reply.")
 
         # Thread phase guidance
         if thread.message_count <= 4:
diff --git a/src/agent/grantbot.py b/src/agent/grantbot.py
index 2672a51..5af41ed 100644
--- a/src/agent/grantbot.py
+++ b/src/agent/grantbot.py
@@ -402,9 +402,10 @@ async def run_grantbot(
 
     if not dry_run:
         from slack_sdk import WebClient
-        bot_token = getattr(settings, "slack_bot_token_grantbot", "")
+        slack_tokens = settings.get_slack_tokens()
+        bot_token = slack_tokens.get("grantbot", {}).get("bot", "")
         if not bot_token or bot_token.startswith("xoxb-placeholder"):
-            bot_token = settings.slack_bot_token_su
+            bot_token = slack_tokens.get("su", {}).get("bot", "")
             logger.info("No grantbot Slack token — using SuBot's token as fallback")
         if bot_token and not bot_token.startswith("xoxb-placeholder"):
             slack_client = WebClient(token=bot_token)
diff --git a/src/agent/simulation.py b/src/agent/simulation.py
index 6199643..40a69a6 100644
--- a/src/agent/simulation.py
+++ b/src/agent/simulation.py
@@ -721,7 +721,7 @@ async def tool_executor(tool_name: str, tool_input: dict) -> str:
                 tools=TOOL_DEFINITIONS,
                 tool_executor=tool_executor,
                 model=settings.llm_agent_model_opus,
-                max_tokens=1500,
+                max_tokens=4000,
                 log_meta={
                     "agent_id": agent.agent_id,
                     "phase": "thread_reply",
@@ -800,9 +800,13 @@ async def _check_thread_outcome(
                         "[%s] Thread %s: proposal confirmed with ✅",
                         agent.agent_id, thread.thread_id,
                     )
-                    # Extract text starting from :memo: marker
-                    memo_idx = entry.content.find(":memo:")
-                    summary_text = entry.content[memo_idx:].strip() if memo_idx >= 0 else entry.content
+                    # Extract only the <proposal>…</proposal> block; fall back to :memo: slice
+                    proposal_match = re.search(r"<proposal>(.*?)</proposal>", entry.content, re.DOTALL)
+                    if proposal_match:
+                        summary_text = proposal_match.group(1).strip()
+                    else:
+                        memo_idx = entry.content.find(":memo:")
+                        summary_text = entry.content[memo_idx:].strip() if memo_idx >= 0 else entry.content
                     agent.state.pending_proposals = [
                         p for p in agent.state.pending_proposals
                         if p.thread_id != thread.thread_id
diff --git a/src/cli.py b/src/cli.py
index 69f5239..ddf4863 100644
--- a/src/cli.py
+++ b/src/cli.py
@@ -208,6 +208,99 @@ async def _regenerate():
     _run(_regenerate())
 
 
+@app.command(name="seed-pilot-labs")
+def seed_pilot_labs(
+    agent_id: str = typer.Option(None, "--agent-id", help="Seed only this agent (e.g. 'su'). Omit for all."),
+    run_pipeline: bool = typer.Option(False, "--run-pipeline", help="Enqueue profile generation jobs"),
+):
+    """Create User + AgentRegistry rows for all pilot labs (or one), bypassing ORCID login."""
+    from src.agent.simulation import PILOT_LABS
+
+    labs = PILOT_LABS
+    if agent_id:
+        labs = [lab for lab in PILOT_LABS if lab["id"] == agent_id]
+        if not labs:
+            console.print(f"[red]Unknown agent-id '{agent_id}'. Valid IDs: {[l['id'] for l in PILOT_LABS]}[/red]")
+            raise typer.Exit(1)
+
+    async def _seed():
+        from datetime import datetime, timezone
+        from sqlalchemy import select
+        from src.models import AgentRegistry, Job, User
+
+        engine, factory = await _get_db()
+        async with factory() as db:
+            created_users = 0
+            created_agents = 0
+            skipped = 0
+
+            for lab in labs:
+                synthetic_orcid = f"synthetic:{lab['id']}"
+
+                # --- User ---
+                result = await db.execute(select(User).where(User.orcid == synthetic_orcid))
+                user = result.scalar_one_or_none()
+
+                if user:
+                    console.print(f"[yellow]User already exists for {lab['pi']} ({synthetic_orcid})[/yellow]")
+                else:
+                    user = User(
+                        orcid=synthetic_orcid,
+                        name=lab["pi"],
+                        access_status="allowed",
+                        onboarding_complete=True,
+                    )
+                    db.add(user)
+                    await db.flush()
+                    created_users += 1
+                    console.print(f"[green]Created user: {lab['pi']} ({synthetic_orcid})[/green]")
+
+                # --- AgentRegistry ---
+                result = await db.execute(
+                    select(AgentRegistry).where(AgentRegistry.agent_id == lab["id"])
+                )
+                agent_reg = result.scalar_one_or_none()
+
+                if agent_reg:
+                    if agent_reg.user_id is None:
+                        agent_reg.user_id = user.id
+                        console.print(f"[yellow]{lab['name']} already exists — linked to user[/yellow]")
+                    else:
+                        console.print(f"[yellow]{lab['name']} already exists — skipping[/yellow]")
+                    skipped += 1
+                else:
+                    agent_reg = AgentRegistry(
+                        agent_id=lab["id"],
+                        bot_name=lab["name"],
+                        pi_name=lab["pi"],
+                        user_id=user.id,
+                        status="active",
+                        approved_at=datetime.now(timezone.utc),
+                    )
+                    db.add(agent_reg)
+                    created_agents += 1
+                    console.print(f"[green]Created agent: {lab['name']} (status=active)[/green]")
+
+                # --- Optional profile job ---
+                if run_pipeline:
+                    job = Job(
+                        type="generate_profile",
+                        user_id=user.id,
+                        payload={"user_id": str(user.id), "orcid": synthetic_orcid},
+                    )
+                    db.add(job)
+                    console.print(f"  [dim]Enqueued profile generation for {lab['pi']}[/dim]")
+
+            await db.commit()
+            console.print(
+                f"\n[bold green]Done.[/bold green] "
+                f"Created {created_users} user(s), {created_agents} agent(s), skipped {skipped}."
+            )
+        await engine.dispose()
+
+    _run(_seed())
+
+
 @app.command(name="backfill-profile-revisions")
 def backfill_profile_revisions():
     """Create initial ProfileRevision rows from existing profile files on disk."""
diff --git a/src/config.py b/src/config.py
index e6c3b8d..b6f8e1c 100644
--- a/src/config.py
+++ b/src/config.py
@@ -38,39 +38,10 @@ class Settings(BaseSettings):
     notification_check_interval: int = 300  # seconds (5 minutes)
     inbound_poll_interval: int = 60  # seconds
 
-    # Slack tokens — one pair per agent
-    slack_bot_token_su: str = ""
-    slack_app_token_su: str = ""
-    slack_bot_token_wiseman: str = ""
-    slack_app_token_wiseman: str = ""
-    slack_bot_token_lotz: str = ""
-    slack_app_token_lotz: str = ""
-    slack_bot_token_cravatt: str = ""
-    slack_app_token_cravatt: str = ""
-    slack_bot_token_grotjahn: str = ""
-    slack_app_token_grotjahn: str = ""
-    slack_bot_token_petrascheck: str = ""
-    slack_app_token_petrascheck: str = ""
-    slack_bot_token_ken: str = ""
-    slack_app_token_ken: str = ""
-    slack_bot_token_racki: str = ""
-    slack_app_token_racki: str = ""
-    slack_bot_token_saez: str = ""
-    slack_app_token_saez: str = ""
-    slack_bot_token_wu: str = ""
-    slack_app_token_wu: str = ""
-    slack_bot_token_ward: str = ""
-    slack_app_token_ward: str = ""
-    slack_bot_token_briney: str = ""
-    slack_app_token_briney: str = ""
-    slack_bot_token_forli: str = ""
-    slack_app_token_forli: str = ""
-    slack_bot_token_deniz: str = ""
-    slack_app_token_deniz: str = ""
-    slack_bot_token_lairson: str = ""
-    slack_app_token_lairson: str = ""
-    slack_bot_token_grantbot: str = ""
-    slack_app_token_grantbot: str = ""
+    # Slack tokens are loaded dynamically from the environment — see get_slack_tokens().
+    # Add any number of agents to .env using the pattern:
+    #   SLACK_BOT_TOKEN_<AGENT_ID>=xoxb-...
+    #   SLACK_APP_TOKEN_<AGENT_ID>=xapp-...  (optional)
 
     # LLM models
     llm_profile_model: str = "claude-opus-4-6"
@@ -78,6 +49,31 @@ class Settings(BaseSettings):
     llm_agent_model_opus: str = "claude-opus-4-6"
     llm_agent_model_sonnet: str = "claude-sonnet-4-6"
 
+    # Mistral AI (podcast TTS)
+    mistral_api_key: str = ""
+    mistral_tts_model: str = "voxtral-mini-tts-latest"
+    mistral_tts_default_voice: str = ""
+
+    # OpenAI TTS
+    openai_api_key: str = ""
+    openai_tts_model: str = "tts-1"
+    openai_tts_default_voice: str = "alloy"
+
+    # Podcast TTS backend: "mistral" (default), "openai", or "local" (vLLM-Omni)
+    podcast_tts_backend: str = "mistral"
+
+    # Local vLLM-Omni TTS server
+    local_tts_host: str = "127.0.0.1"
+    local_tts_port: int = 8010
+    local_tts_model: str = "Qwen/Qwen2-Audio-7B-Instruct"
+    local_tts_voice: str = "default"
+
+    # Podcast
+    podcast_base_url: str = ""  # e.g. https://copi.science — for RSS enclosure URLs
+    podcast_search_window_days: int = 14
+    podcast_max_candidates: int = 50
+    podcast_normalize_audio: bool = False  # set true to run ffmpeg loudnorm after TTS
+
     # Worker
     worker_poll_interval: int = 5  # seconds
 
@@ -94,33 +90,34 @@ class Settings(BaseSettings):
     max_full_text_per_thread: int = 2
 
     def get_slack_tokens(self) -> dict[str, dict[str, str]]:
-        """Return slack tokens keyed by agent_id."""
-        return {
-            "su": {"bot": self.slack_bot_token_su, "app": self.slack_app_token_su},
-            "wiseman": {"bot": self.slack_bot_token_wiseman, "app": self.slack_app_token_wiseman},
-            "lotz": {"bot": self.slack_bot_token_lotz, "app": self.slack_app_token_lotz},
-            "cravatt": {
-                "bot": self.slack_bot_token_cravatt,
-                "app": self.slack_app_token_cravatt,
-            },
-            "grotjahn": {
-                "bot": self.slack_bot_token_grotjahn,
-                "app": self.slack_app_token_grotjahn,
-            },
-            "petrascheck": {
-                "bot": self.slack_bot_token_petrascheck,
-                "app": self.slack_app_token_petrascheck,
-            },
-            "ken": {"bot": self.slack_bot_token_ken, "app": self.slack_app_token_ken},
-            "racki": {"bot": self.slack_bot_token_racki, "app": self.slack_app_token_racki},
-            "saez": {"bot": self.slack_bot_token_saez, "app": self.slack_app_token_saez},
-            "wu": {"bot": self.slack_bot_token_wu, "app": self.slack_app_token_wu},
-            "ward": {"bot": self.slack_bot_token_ward, "app": self.slack_app_token_ward},
-            "briney": {"bot": self.slack_bot_token_briney, "app": self.slack_app_token_briney},
-            "forli": {"bot": self.slack_bot_token_forli, "app": self.slack_app_token_forli},
-            "deniz": {"bot": self.slack_bot_token_deniz, "app": self.slack_app_token_deniz},
-            "lairson": {"bot": self.slack_bot_token_lairson, "app": self.slack_app_token_lairson},
-        }
+        """Return Slack tokens keyed by agent_id.
+
+        Scans os.environ and the .env file for variables matching:
+            SLACK_BOT_TOKEN_<AGENT_ID>  →  tokens[agent_id]["bot"]
+            SLACK_APP_TOKEN_<AGENT_ID>  →  tokens[agent_id]["app"]
+
+        Agent IDs are lowercased from the suffix, so SLACK_BOT_TOKEN_SU → "su".
+        os.environ takes precedence over .env file values.
+        """
+        import os
+
+        from dotenv import dotenv_values
+
+        # Merge: .env file is the base, actual environment variables override.
+        env: dict[str, str] = {**dotenv_values(".env"), **os.environ}  # type: ignore[arg-type]
+
+        tokens: dict[str, dict[str, str]] = {}
+        for key, val in env.items():
+            if not val:
+                continue
+            upper = key.upper()
+            if upper.startswith("SLACK_BOT_TOKEN_"):
+                agent_id = key[len("SLACK_BOT_TOKEN_"):].lower()
+                tokens.setdefault(agent_id, {"bot": "", "app": ""})["bot"] = val
+            elif upper.startswith("SLACK_APP_TOKEN_"):
+                agent_id = key[len("SLACK_APP_TOKEN_"):].lower()
+                tokens.setdefault(agent_id, {"bot": "", "app": ""})["app"] = val
+        return tokens
 
 
 @lru_cache
diff --git a/src/main.py b/src/main.py
index 73e9bb6..c6f9439 100644
--- a/src/main.py
+++ b/src/main.py
@@ -11,7 +11,7 @@
 
 from src.config import get_settings
 from src.database import get_session_factory
-from src.routers import admin, agent_page, auth, invite, onboarding, profile, public
+from src.routers import admin, agent_page, auth, invite, onboarding, podcast, profile, public
 from src.routers import settings as settings_router
 
 logging.basicConfig(
@@ -114,6 +114,7 @@ def create_app() -> FastAPI:
     application.include_router(admin.router, prefix="/admin", tags=["admin"])
     application.include_router(invite.router, tags=["invite"])
     application.include_router(settings_router.router, prefix="/settings", tags=["settings"])
+    application.include_router(podcast.router, prefix="/podcast", tags=["podcast"])
 
     @application.get("/api/health")
     async def health():
diff --git a/src/models/__init__.py b/src/models/__init__.py
index 97249ff..2e6f2fa 100644
--- a/src/models/__init__.py
+++ b/src/models/__init__.py
@@ -4,11 +4,14 @@
 """
 
 from src.models.access import AccessAllowlist, WaitlistSignup
+from src.models.matchmaker import MatchmakerProposal
 from src.models.agent_activity import AgentChannel, AgentMessage, LlmCallLog, SimulationRun, ThreadDecision
 from src.models.agent_registry import AgentRegistry, ProposalReview
 from src.models.delegate import AgentDelegate, DelegateInvitation
 from src.models.email_notification import EmailEngagementTracker, EmailNotification
 from src.models.job import Job
+from src.models.podcast import PodcastEpisode
+from src.models.podcast_preferences import PodcastPreferences
 from src.models.profile_revision import ProfileRevision
 from src.models.profile import ResearcherProfile
 from src.models.publication import Publication
@@ -31,6 +34,9 @@
     "EmailNotification",
     "EmailEngagementTracker",
     "ProfileRevision",
+    "PodcastEpisode",
+    "PodcastPreferences",
     "AccessAllowlist",
     "WaitlistSignup",
+    "MatchmakerProposal",
 ]
diff --git a/src/models/matchmaker.py b/src/models/matchmaker.py
new file mode 100644
index 0000000..a8b1d91
--- /dev/null
+++ b/src/models/matchmaker.py
@@ -0,0 +1,59 @@
+"""MatchmakerProposal model.
+
+Proposals can be created two ways:
+  1. Admin web UI — pi_a_id / pi_b_id are set (FK → users); pi_a_name / pi_b_name left null.
+  2. CLI script    — pi_a_name / pi_b_name are set (from profiles/ filenames); FKs left null.
+
+Templates use pi_a.name if the FK is populated, otherwise fall back to pi_a_name.
+"""
+
+import uuid
+from datetime import datetime
+
+from sqlalchemy import DateTime, ForeignKey, Integer, String, Text, func
+from sqlalchemy.dialects.postgresql import UUID
+from sqlalchemy.orm import Mapped, mapped_column, relationship
+
+from src.database import Base
+
+
+class MatchmakerProposal(Base):
+    __tablename__ = "matchmaker_proposals"
+
+    id: Mapped[uuid.UUID] = mapped_column(
+        UUID(as_uuid=True), primary_key=True, default=uuid.uuid4
+    )
+    # Web-UI path: FK to users table
+    pi_a_id: Mapped[uuid.UUID | None] = mapped_column(
+        UUID(as_uuid=True), ForeignKey("users.id", ondelete="CASCADE"), nullable=True, index=True
+    )
+    pi_b_id: Mapped[uuid.UUID | None] = mapped_column(
+        UUID(as_uuid=True), ForeignKey("users.id", ondelete="CASCADE"), nullable=True, index=True
+    )
+    # CLI path: display name from profile filename / header
+    pi_a_name: Mapped[str | None] = mapped_column(String(255), nullable=True)
+    pi_b_name: Mapped[str | None] = mapped_column(String(255), nullable=True)
+
+    proposal_md: Mapped[str] = mapped_column(Text, nullable=False)
+    title: Mapped[str] = mapped_column(String(500), nullable=False)
+    confidence: Mapped[str] = mapped_column(String(20), nullable=False)  # high / moderate / speculative
+    llm_model: Mapped[str] = mapped_column(String(100), nullable=False)
+    input_tokens: Mapped[int | None] = mapped_column(Integer, nullable=True)
+    output_tokens: Mapped[int | None] = mapped_column(Integer, nullable=True)
+    generated_at: Mapped[datetime] = mapped_column(
+        DateTime(timezone=True), server_default=func.now(), nullable=False
+    )
+
+    pi_a: Mapped["User | None"] = relationship("User", foreign_keys=[pi_a_id])
+    pi_b: Mapped["User | None"] = relationship("User", foreign_keys=[pi_b_id])
+
+    @property
+    def name_a(self) -> str:
+        return self.pi_a.name if self.pi_a else (self.pi_a_name or "Unknown")
+
+    @property
+    def name_b(self) -> str:
+        return self.pi_b.name if self.pi_b else (self.pi_b_name or "Unknown")
+
+    def __repr__(self) -> str:
+        return f"<MatchmakerProposal {self.name_a!r} × {self.name_b!r} confidence={self.confidence}>"
diff --git a/src/models/podcast.py b/src/models/podcast.py
new file mode 100644
index 0000000..633eb60
--- /dev/null
+++ b/src/models/podcast.py
@@ -0,0 +1,61 @@
+"""PodcastEpisode model.
+
+Episodes are keyed by either agent_id (pilot-lab agents) or user_id (plain
+ORCID users).  Exactly one should be set per row.
+
+Uniqueness constraints:
+  - uq_podcast_agent_date: one episode per agent per day (agent path)
+  - ix_podcast_episodes_user_date: partial unique index (user path, via migration 0013)
+"""
+
+import uuid
+from datetime import date, datetime
+
+from sqlalchemy import Boolean, Date, DateTime, ForeignKey, Integer, String, Text, UniqueConstraint, func
+from sqlalchemy.dialects.postgresql import UUID
+from sqlalchemy.orm import Mapped, mapped_column
+
+from src.database import Base
+
+
+class PodcastEpisode(Base):
+    __tablename__ = "podcast_episodes"
+
+    id: Mapped[uuid.UUID] = mapped_column(
+        UUID(as_uuid=True), primary_key=True, default=uuid.uuid4
+    )
+    # For pilot-lab agents (legacy path) — nullable to support user-only episodes
+    agent_id: Mapped[str | None] = mapped_column(String(50), nullable=True, index=True)
+    # For plain ORCID users (no agent required)
+    user_id: Mapped[uuid.UUID | None] = mapped_column(
+        UUID(as_uuid=True),
+        ForeignKey("users.id", ondelete="CASCADE"),
+        nullable=True,
+        index=True,
+    )
+    episode_date: Mapped[date] = mapped_column(Date, nullable=False)
+    pmid: Mapped[str] = mapped_column(String(100), nullable=False)
+    paper_title: Mapped[str] = mapped_column(String(500), nullable=False)
+    paper_authors: Mapped[str] = mapped_column(String(500), nullable=False)
+    paper_journal: Mapped[str] = mapped_column(String(255), nullable=False)
+    paper_year: Mapped[int] = mapped_column(Integer, nullable=False)
+    paper_url: Mapped[str | None] = mapped_column(String(1000), nullable=True)
+    text_summary: Mapped[str] = mapped_column(Text, nullable=False)
+    audio_file_path: Mapped[str | None] = mapped_column(String(500), nullable=True)
+    audio_duration_seconds: Mapped[int | None] = mapped_column(Integer, nullable=True)
+    slack_delivered: Mapped[bool] = mapped_column(Boolean, default=False, nullable=False)
+    selection_justification: Mapped[str] = mapped_column(Text, nullable=False)
+    created_at: Mapped[datetime] = mapped_column(
+        DateTime(timezone=True), server_default=func.now(), nullable=False
+    )
+
+    __table_args__ = (
+        # Agent-path uniqueness (PostgreSQL ignores NULLs in UNIQUE constraints,
+        # so this only enforces uniqueness when agent_id IS NOT NULL)
+        UniqueConstraint("agent_id", "episode_date", name="uq_podcast_agent_date"),
+        # User-path uniqueness is enforced by the partial index created in migration 0013
+    )
+
+    def __repr__(self) -> str:
+        key = f"agent={self.agent_id}" if self.agent_id else f"user={self.user_id}"
+        return f"<PodcastEpisode {key} date={self.episode_date} pmid={self.pmid}>"
diff --git a/src/models/podcast_preferences.py b/src/models/podcast_preferences.py
new file mode 100644
index 0000000..b498317
--- /dev/null
+++ b/src/models/podcast_preferences.py
@@ -0,0 +1,50 @@
+"""PodcastPreferences model — per-agent or per-user podcast customization.
+
+Rows are keyed by either agent_id (for approved pilot-lab agents) or user_id
+(for any user who has completed ORCID onboarding).  Exactly one of the two
+should be set on each row; both being set is invalid.
+"""
+
+import uuid
+from datetime import datetime
+
+from sqlalchemy import DateTime, ForeignKey, String, func
+from sqlalchemy.dialects.postgresql import ARRAY, UUID
+from sqlalchemy.orm import Mapped, mapped_column
+
+from src.database import Base
+
+
+class PodcastPreferences(Base):
+    __tablename__ = "podcast_preferences"
+
+    id: Mapped[uuid.UUID] = mapped_column(
+        UUID(as_uuid=True), primary_key=True, default=uuid.uuid4
+    )
+    # For pilot-lab agents (legacy path)
+    agent_id: Mapped[str | None] = mapped_column(String(50), nullable=True, unique=True, index=True)
+    # For plain ORCID users (no agent required)
+    user_id: Mapped[uuid.UUID | None] = mapped_column(
+        UUID(as_uuid=True),
+        ForeignKey("users.id", ondelete="CASCADE"),
+        nullable=True,
+        unique=True,
+        index=True,
+    )
+    voice_id: Mapped[str | None] = mapped_column(String(100), nullable=True)
+    extra_keywords: Mapped[list[str]] = mapped_column(
+        ARRAY(String), nullable=False, server_default="{}"
+    )
+    preferred_journals: Mapped[list[str]] = mapped_column(
+        ARRAY(String), nullable=False, server_default="{}"
+    )
+    deprioritized_journals: Mapped[list[str]] = mapped_column(
+        ARRAY(String), nullable=False, server_default="{}"
+    )
+    updated_at: Mapped[datetime] = mapped_column(
+        DateTime(timezone=True), server_default=func.now(), onupdate=func.now(), nullable=False
+    )
+
+    def __repr__(self) -> str:
+        key = f"agent={self.agent_id}" if self.agent_id else f"user={self.user_id}"
+        return f"<PodcastPreferences {key} voice={self.voice_id}>"
diff --git a/src/podcast/__init__.py b/src/podcast/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/podcast/local_tts.py b/src/podcast/local_tts.py
new file mode 100644
index 0000000..104c05a
--- /dev/null
+++ b/src/podcast/local_tts.py
@@ -0,0 +1,101 @@
+"""Local TTS backend using a vLLM-Omni server.
+
+vLLM-Omni exposes an OpenAI-compatible /v1/audio/speech endpoint that accepts
+the same JSON payload as OpenAI TTS and returns raw audio bytes directly.
+
+Start a vLLM-Omni server with, e.g.:
+    vllm serve Qwen/Qwen2-Audio-7B-Instruct --port 8010
+
+Then set in .env:
+    PODCAST_TTS_BACKEND=local
+    LOCAL_TTS_HOST=127.0.0.1
+    LOCAL_TTS_PORT=8010
+    LOCAL_TTS_MODEL=Qwen/Qwen2-Audio-7B-Instruct
+    LOCAL_TTS_VOICE=default
+"""
+
+import json
+import logging
+from pathlib import Path
+
+import httpx
+
+from src.config import get_settings
+from src.podcast.tts_utils import get_audio_duration_seconds, normalize_audio, strip_markdown
+
+logger = logging.getLogger(__name__)
+
+VOICES_FILE = Path("data/podcast_voices.json")
+
+__all__ = ["generate_audio", "get_audio_duration_seconds"]
+
+
+def _get_local_tts_url() -> str:
+    settings = get_settings()
+    return f"http://{settings.local_tts_host}:{settings.local_tts_port}/v1/audio/speech"
+
+
+def get_voice(agent_id: str, voice_override: str | None = None) -> str:
+    """Return the TTS voice for an agent.
+
+    Priority: voice_override (from DB preferences) → podcast_voices.json → env default.
+    """
+    if voice_override:
+        return voice_override
+    settings = get_settings()
+    if VOICES_FILE.exists():
+        try:
+            voices = json.loads(VOICES_FILE.read_text(encoding="utf-8"))
+            if agent_id in voices:
+                return voices[agent_id]
+        except Exception as exc:
+            logger.warning("Failed to load podcast_voices.json: %s", exc)
+    return settings.local_tts_voice or "default"
+
+
+async def generate_audio(
+    text: str, agent_id: str, output_path: Path, voice_override: str | None = None
+) -> bool:
+    """Generate TTS audio via a local vLLM-Omni server and save to output_path.
+
+    The server must expose an OpenAI-compatible /v1/audio/speech endpoint
+    that returns raw audio bytes.
+
+    Returns True on success, False on failure.
+    """
+    settings = get_settings()
+    url = _get_local_tts_url()
+    voice = get_voice(agent_id, voice_override=voice_override)
+    clean_text = strip_markdown(text)
+
+    payload = {
+        "model": settings.local_tts_model,
+        "input": clean_text,
+        "voice": voice,
+        "response_format": "mp3",
+    }
+    headers = {"Content-Type": "application/json"}
+
+    logger.info("Local TTS request to %s (model=%s, voice=%s)", url, settings.local_tts_model, voice)
+
+    try:
+        async with httpx.AsyncClient(timeout=300) as client:
+            resp = await client.post(url, json=payload, headers=headers)
+            if not resp.is_success:
+                logger.error("Local TTS error %s: %s", resp.status_code, resp.text)
+            resp.raise_for_status()
+
+        output_path.parent.mkdir(parents=True, exist_ok=True)
+        output_path.write_bytes(resp.content)
+        logger.info("Audio saved to %s (%d bytes)", output_path, len(resp.content))
+        if settings.podcast_normalize_audio:
+            normalize_audio(output_path)
+        return True
+    except httpx.ConnectError:
+        logger.error(
+            "Could not connect to local TTS server at %s — is vLLM-Omni running?", url
+        )
+        return False
+    except Exception as exc:
+        logger.error("Local TTS failed for agent %s: %s", agent_id, exc)
+        return False
diff --git a/src/podcast/main.py b/src/podcast/main.py
new file mode 100644
index 0000000..8b6e9d1
--- /dev/null
+++ b/src/podcast/main.py
@@ -0,0 +1,224 @@
+"""LabBot Podcast — daily personalized research briefings for each PI.
+
+Usage:
+    python -m src.podcast.main            # run once immediately
+    python -m src.podcast.main scheduler  # long-running daily scheduler
+
+The scheduler runs at 9am UTC daily (1 hour after GrantBot).
+"""
+
+import asyncio
+import logging
+from datetime import datetime, timezone
+
+import typer
+
+from src.config import get_settings
+
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s %(levelname)s %(name)s: %(message)s",
+)
+logger = logging.getLogger(__name__)
+
+app = typer.Typer(invoke_without_command=True)
+
+RUN_HOUR_UTC = 9  # run at 9am UTC
+
+
+async def run_podcast(dry_run: bool = False) -> list[str]:
+    """Run the podcast pipeline for all active agents AND eligible plain users.
+
+    Returns list of identifiers (agent_ids + "user:<uuid>") that produced episodes.
+    """
+    from sqlalchemy import select
+    from sqlalchemy.orm import selectinload
+
+    from src.database import get_session_factory
+    from src.models.agent_registry import AgentRegistry
+    from src.models.profile import ResearcherProfile
+    from src.models.user import User
+    from src.podcast.pipeline import run_pipeline_for_agent, run_podcast_for_user
+
+    settings = get_settings()
+    slack_tokens = settings.get_slack_tokens()
+
+    session_factory = get_session_factory()
+    produced: list[str] = []
+
+    async with session_factory() as db:
+        # ----------------------------------------------------------------
+        # Agent path — pilot-lab agents with active AgentRegistry entries
+        # ----------------------------------------------------------------
+        result = await db.execute(
+            select(AgentRegistry).where(AgentRegistry.status == "active")
+        )
+        agents = result.scalars().all()
+
+        if not agents:
+            logger.warning("No active agents found in registry — trying all known agents")
+            for agent_id, tokens in slack_tokens.items():
+                bot_token = tokens.get("bot", "")
+                if not bot_token or bot_token.startswith("xoxb-placeholder"):
+                    continue
+                if dry_run:
+                    logger.info("DRY RUN — would run pipeline for agent: %s", agent_id)
+                    continue
+                try:
+                    ok = await run_pipeline_for_agent(
+                        agent_id=agent_id,
+                        bot_name=f"{agent_id.capitalize()}Bot",
+                        pi_name=agent_id.capitalize(),
+                        bot_token=bot_token,
+                        slack_user_id=None,
+                        db_session=db,
+                    )
+                    if ok:
+                        produced.append(agent_id)
+                except Exception as exc:
+                    logger.error("Pipeline failed for agent %s: %s", agent_id, exc, exc_info=True)
+        else:
+            for agent in agents:
+                agent_id = agent.agent_id
+                tokens = slack_tokens.get(agent_id, {})
+                bot_token = agent.slack_bot_token or tokens.get("bot", "")
+
+                if dry_run:
+                    logger.info(
+                        "DRY RUN — would run pipeline for agent: %s (%s)", agent_id, agent.pi_name
+                    )
+                    continue
+
+                try:
+                    ok = await run_pipeline_for_agent(
+                        agent_id=agent_id,
+                        bot_name=agent.bot_name,
+                        pi_name=agent.pi_name,
+                        bot_token=bot_token,
+                        slack_user_id=agent.slack_user_id,
+                        db_session=db,
+                    )
+                    if ok:
+                        produced.append(agent_id)
+                except Exception as exc:
+                    logger.error(
+                        "Pipeline failed for agent %s: %s", agent_id, exc, exc_info=True
+                    )
+
+        await db.commit()
+
+        # ----------------------------------------------------------------
+        # User path — plain ORCID users without an active agent
+        # ----------------------------------------------------------------
+        # Collect user_ids of users who already have an active agent so we
+        # can skip them (they're already covered by the agent path above).
+        agent_user_ids_result = await db.execute(
+            select(AgentRegistry.user_id).where(
+                AgentRegistry.status == "active",
+                AgentRegistry.user_id.is_not(None),
+            )
+        )
+        agent_user_ids = {row[0] for row in agent_user_ids_result}
+
+        # Fetch onboarded users who have a completed ResearcherProfile
+        users_result = await db.execute(
+            select(User)
+            .options(selectinload(User.profile))
+            .where(User.onboarding_complete.is_(True))
+        )
+        all_users = users_result.scalars().all()
+
+        eligible_users = [
+            u for u in all_users
+            if u.id not in agent_user_ids
+            and u.profile is not None
+            and u.profile.research_summary
+        ]
+
+        if eligible_users:
+            logger.info("Running user podcast pipeline for %d eligible users", len(eligible_users))
+        else:
+            logger.info("No eligible plain users for podcast pipeline")
+
+        for user in eligible_users:
+            if dry_run:
+                logger.info("DRY RUN — would run pipeline for user: %s (%s)", user.id, user.name)
+                continue
+            try:
+                ok = await run_podcast_for_user(user_id=user.id, db_session=db)
+                if ok:
+                    produced.append(f"user:{user.id}")
+            except Exception as exc:
+                logger.error(
+                    "Pipeline failed for user %s: %s", user.id, exc, exc_info=True
+                )
+
+        await db.commit()
+
+    logger.info("Podcast run complete: %d episodes produced", len(produced))
+    return produced
+
+
+@app.command()
+def main(
+    dry_run: bool = typer.Option(False, "--dry-run", help="Preview without posting or generating audio"),
+):
+    """Run the podcast pipeline once for all active agents."""
+    from src.podcast.state import mark_run_complete
+
+    results = asyncio.run(run_podcast(dry_run=dry_run))
+    if results:
+        typer.echo(f"\nProduced {len(results)} episodes:")
+        for aid in results:
+            typer.echo(f"  {aid}")
+    else:
+        typer.echo("No episodes produced.")
+    if not dry_run:
+        mark_run_complete()
+
+
+@app.command("scheduler")
+def scheduler(
+    run_hour: int = typer.Option(RUN_HOUR_UTC, "--run-hour", help="UTC hour to run daily (0-23)"),
+    check_interval: int = typer.Option(900, "--check-interval", help="Seconds between schedule checks"),
+):
+    """Long-running scheduler: runs podcast pipeline once per calendar day.
+
+    If the container starts after the scheduled hour, runs immediately to catch up.
+    """
+    asyncio.run(_scheduler_loop(run_hour, check_interval))
+
+
+async def _scheduler_loop(run_hour: int, check_interval: int) -> None:
+    """Single long-lived event loop for the daily scheduler.
+
+    Using a single asyncio.run() call keeps the SQLAlchemy asyncpg connection
+    pool bound to one event loop for the lifetime of the process.  Calling
+    asyncio.run() in a while-loop would create a fresh event loop each
+    iteration, leaving the cached engine attached to the closed previous loop
+    and causing "Future attached to a different loop" errors on the second run.
+    """
+    from src.podcast.state import mark_run_complete, should_run_today
+
+    logger.info(
+        "Podcast scheduler started (run_hour=%d UTC, check every %ds)", run_hour, check_interval
+    )
+
+    while True:
+        now = datetime.now(timezone.utc)
+        if should_run_today() and now.hour >= run_hour:
+            logger.info("Running daily podcast pipeline...")
+            try:
+                results = await run_podcast()
+                mark_run_complete()
+                logger.info("Daily run complete: %d episodes", len(results))
+            except Exception as exc:
+                logger.error("Daily run failed: %s", exc, exc_info=True)
+        else:
+            logger.debug("No run needed (last run: %s, hour: %d)", "?", now.hour)
+
+        await asyncio.sleep(check_interval)
+
+
+if __name__ == "__main__":
+    app()
diff --git a/src/podcast/mistral_tts.py b/src/podcast/mistral_tts.py
new file mode 100644
index 0000000..cdce4de
--- /dev/null
+++ b/src/podcast/mistral_tts.py
@@ -0,0 +1,87 @@
+"""Mistral AI TTS client wrapper."""
+
+import base64
+import json
+import logging
+from pathlib import Path
+
+import httpx
+
+from src.config import get_settings
+from src.podcast.tts_utils import get_audio_duration_seconds, normalize_audio, strip_markdown
+
+logger = logging.getLogger(__name__)
+
+VOICES_FILE = Path("data/podcast_voices.json")
+MISTRAL_TTS_URL = "https://api.mistral.ai/v1/audio/speech"
+
+__all__ = ["generate_audio", "get_audio_duration_seconds"]
+
+
+def get_voice(agent_id: str, voice_override: str | None = None) -> str:
+    """Return the TTS voice for an agent.
+
+    Priority: voice_override (from DB preferences) → podcast_voices.json → env default.
+    """
+    if voice_override:
+        return voice_override
+    settings = get_settings()
+    if VOICES_FILE.exists():
+        try:
+            voices = json.loads(VOICES_FILE.read_text(encoding="utf-8"))
+            if agent_id in voices:
+                return voices[agent_id]
+        except Exception as exc:
+            logger.warning("Failed to load podcast_voices.json: %s", exc)
+    return settings.mistral_tts_default_voice
+
+
+async def generate_audio(
+    text: str, agent_id: str, output_path: Path, voice_override: str | None = None
+) -> bool:
+    """Generate TTS audio via Mistral AI and save to output_path.
+
+    Returns True on success, False on failure.
+    """
+    settings = get_settings()
+    if not settings.mistral_api_key:
+        logger.warning("MISTRAL_API_KEY not set — skipping audio generation")
+        return False
+
+    voice = get_voice(agent_id, voice_override=voice_override)
+    clean_text = strip_markdown(text)
+    payload = {
+        "model": settings.mistral_tts_model,
+        "input": clean_text,
+        "voice": voice,
+    }
+    headers = {
+        "Authorization": f"Bearer {settings.mistral_api_key}",
+        "Content-Type": "application/json",
+    }
+
+    try:
+        async with httpx.AsyncClient(timeout=120) as client:
+            resp = await client.post(MISTRAL_TTS_URL, json=payload, headers=headers)
+            if not resp.is_success:
+                logger.error("Mistral TTS API error %s: %s", resp.status_code, resp.text)
+            resp.raise_for_status()
+
+        # Mistral returns {"audio_data": "<base64-encoded mp3>"}
+        content_type = resp.headers.get("content-type", "")
+        if "json" in content_type or resp.content[:1] == b"{":
+            audio_bytes = base64.b64decode(resp.json()["audio_data"])
+        else:
+            audio_bytes = resp.content
+
+        output_path.parent.mkdir(parents=True, exist_ok=True)
+        output_path.write_bytes(audio_bytes)
+        logger.info("Audio saved to %s (%d bytes)", output_path, len(audio_bytes))
+        if settings.podcast_normalize_audio:
+            normalize_audio(output_path)
+        return True
+    except Exception as exc:
+        logger.error("Mistral TTS failed for agent %s: %s", agent_id, exc)
+        return False
+
+
diff --git a/src/podcast/openai_tts.py b/src/podcast/openai_tts.py
new file mode 100644
index 0000000..ac79045
--- /dev/null
+++ b/src/podcast/openai_tts.py
@@ -0,0 +1,91 @@
+"""OpenAI TTS client wrapper.
+
+Uses the OpenAI /v1/audio/speech endpoint.  Returns raw MP3 bytes.
+
+Set in .env:
+    PODCAST_TTS_BACKEND=openai
+    OPENAI_API_KEY=sk-...
+    OPENAI_TTS_MODEL=tts-1          # or tts-1-hd / gpt-4o-mini-tts
+    OPENAI_TTS_DEFAULT_VOICE=alloy  # alloy echo fable onyx nova shimmer
+"""
+
+import json
+import logging
+from pathlib import Path
+
+import httpx
+
+from src.config import get_settings
+from src.podcast.tts_utils import get_audio_duration_seconds, normalize_audio, strip_markdown
+
+logger = logging.getLogger(__name__)
+
+VOICES_FILE = Path("data/podcast_voices.json")
+OPENAI_TTS_URL = "https://api.openai.com/v1/audio/speech"
+
+__all__ = ["generate_audio", "get_audio_duration_seconds"]
+
+
+def get_voice(agent_id: str, voice_override: str | None = None) -> str:
+    """Return the TTS voice for an agent.
+
+    Priority: voice_override (from DB preferences) → podcast_voices.json → env default.
+    """
+    if voice_override:
+        return voice_override
+    settings = get_settings()
+    if VOICES_FILE.exists():
+        try:
+            voices = json.loads(VOICES_FILE.read_text(encoding="utf-8"))
+            if agent_id in voices:
+                return voices[agent_id]
+        except Exception as exc:
+            logger.warning("Failed to load podcast_voices.json: %s", exc)
+    return settings.openai_tts_default_voice or "alloy"
+
+
+async def generate_audio(
+    text: str, agent_id: str, output_path: Path, voice_override: str | None = None
+) -> bool:
+    """Generate TTS audio via OpenAI and save to output_path.
+
+    Returns True on success, False on failure.
+    """
+    settings = get_settings()
+    if not settings.openai_api_key:
+        logger.warning("OPENAI_API_KEY not set — skipping audio generation")
+        return False
+
+    voice = get_voice(agent_id, voice_override=voice_override)
+    clean_text = strip_markdown(text)
+    payload = {
+        "model": settings.openai_tts_model,
+        "input": clean_text,
+        "voice": voice,
+        "response_format": "mp3",
+    }
+    headers = {
+        "Authorization": f"Bearer {settings.openai_api_key}",
+        "Content-Type": "application/json",
+    }
+
+    logger.info(
+        "OpenAI TTS request (model=%s, voice=%s)", settings.openai_tts_model, voice
+    )
+
+    try:
+        async with httpx.AsyncClient(timeout=120) as client:
+            resp = await client.post(OPENAI_TTS_URL, json=payload, headers=headers)
+            if not resp.is_success:
+                logger.error("OpenAI TTS API error %s: %s", resp.status_code, resp.text)
+            resp.raise_for_status()
+
+        output_path.parent.mkdir(parents=True, exist_ok=True)
+        output_path.write_bytes(resp.content)
+        logger.info("Audio saved to %s (%d bytes)", output_path, len(resp.content))
+        if settings.podcast_normalize_audio:
+            normalize_audio(output_path)
+        return True
+    except Exception as exc:
+        logger.error("OpenAI TTS failed for agent %s: %s", agent_id, exc)
+        return False
diff --git a/src/podcast/pipeline.py b/src/podcast/pipeline.py
new file mode 100644
index 0000000..e433251
--- /dev/null
+++ b/src/podcast/pipeline.py
@@ -0,0 +1,674 @@
+"""Per-agent podcast pipeline: search → select → summarize → TTS → Slack DM → DB."""
+
+import json
+import logging
+from datetime import date, datetime, timezone
+from pathlib import Path
+from typing import Any
+
+from src.config import get_settings
+
+logger = logging.getLogger(__name__)
+
+PROFILES_DIR = Path("profiles/public")
+AUDIO_DIR = Path("data/podcast_audio")
+
+
+def _load_public_profile(agent_id: str) -> str:
+    """Load the public profile markdown for an agent."""
+    path = PROFILES_DIR / f"{agent_id}.md"
+    if path.exists():
+        return path.read_text(encoding="utf-8")
+    return ""
+
+
+async def _load_podcast_preferences(agent_id: str) -> str:
+    """Load the Podcast Preferences section from the agent's latest private ProfileRevision in the DB."""
+    try:
+        from sqlalchemy import desc, select
+
+        from src.database import get_session_factory
+        from src.models.agent_registry import AgentRegistry
+        from src.models.profile_revision import ProfileRevision
+
+        session_factory = get_session_factory()
+        async with session_factory() as db:
+            reg_result = await db.execute(
+                select(AgentRegistry.id).where(AgentRegistry.agent_id == agent_id)
+            )
+            reg_row = reg_result.first()
+            if not reg_row:
+                return ""
+
+            rev_result = await db.execute(
+                select(ProfileRevision.content)
+                .where(
+                    ProfileRevision.agent_registry_id == reg_row[0],
+                    ProfileRevision.profile_type == "private",
+                )
+                .order_by(desc(ProfileRevision.created_at))
+                .limit(1)
+            )
+            rev_row = rev_result.first()
+            if not rev_row:
+                return ""
+
+            return _extract_section_text(rev_row[0], "Podcast Preferences")
+    except Exception as exc:
+        logger.warning("Could not load podcast preferences for %s: %s", agent_id, exc)
+        return ""
+
+
+async def _load_structured_preferences(agent_id: str | None = None, user_id=None, db_session=None):
+    """Load PodcastPreferences row from DB by agent_id or user_id. Returns ORM row or None."""
+    if not agent_id and not user_id:
+        return None
+    try:
+        from sqlalchemy import select
+
+        from src.models.podcast_preferences import PodcastPreferences
+
+        def _build_query():
+            if agent_id:
+                return select(PodcastPreferences).where(PodcastPreferences.agent_id == agent_id)
+            return select(PodcastPreferences).where(PodcastPreferences.user_id == user_id)
+
+        if db_session is not None:
+            result = await db_session.execute(_build_query())
+            return result.scalar_one_or_none()
+
+        from src.database import get_session_factory
+        session_factory = get_session_factory()
+        async with session_factory() as db:
+            result = await db.execute(_build_query())
+            return result.scalar_one_or_none()
+    except Exception as exc:
+        key = agent_id or str(user_id)
+        logger.warning("Could not load structured podcast preferences for %s: %s", key, exc)
+        return None
+
+
+def _format_candidates_for_prompt(records: list[dict[str, Any]]) -> str:
+    """Format PubMed records as a numbered list for the selection prompt."""
+    lines = []
+    for i, rec in enumerate(records, 1):
+        title = rec.get("title", "No title")
+        abstract = rec.get("abstract", "No abstract")[:600]
+        journal = rec.get("journal") or "Unknown journal"
+        year = rec.get("year") or "Unknown year"
+        lines.append(f"{i}. [{journal}, {year}] {title}\n   {abstract}")
+    return "\n\n".join(lines)
+
+
+async def _select_article(
+    profile_text: str,
+    candidates: list[dict[str, Any]],
+    agent_id: str,
+    preferences_text: str = "",
+) -> tuple[dict[str, Any], str] | tuple[None, str]:
+    """Use Sonnet to pick the most relevant article.
+
+    Returns (selected_record, justification) or (None, reason).
+    """
+    from src.services.llm import generate_agent_response
+
+    settings = get_settings()
+
+    prompt_path = Path("prompts/podcast-select.md")
+    template = prompt_path.read_text(encoding="utf-8")
+    candidates_text = _format_candidates_for_prompt(candidates)
+    prompt = (
+        template
+        .replace("{profile}", profile_text)
+        .replace("{candidates}", candidates_text)
+        .replace("{preferences}", preferences_text or "No specific preferences set.")
+    )
+
+    try:
+        response = await generate_agent_response(
+            system_prompt=prompt,
+            messages=[{"role": "user", "content": "Select the most relevant article."}],
+            model=settings.llm_agent_model_sonnet,
+            max_tokens=300,
+            log_meta={"agent_id": agent_id, "phase": "podcast_select"},
+        )
+
+        # Extract JSON
+        text = response.strip()
+        start = text.find("{")
+        end = text.rfind("}") + 1
+        if start >= 0 and end > start:
+            data = json.loads(text[start:end])
+        else:
+            raise ValueError("No JSON object found in response")
+
+        idx = data.get("index")
+        justification = data.get("justification", "")
+
+        if idx is None:
+            logger.info("Agent %s: no relevant article found (%s)", agent_id, justification)
+            return None, justification
+
+        idx = int(idx) - 1  # convert 1-based to 0-based
+        if 0 <= idx < len(candidates):
+            return candidates[idx], justification
+        else:
+            logger.warning("Agent %s: LLM returned out-of-range index %d", agent_id, idx + 1)
+            return None, "Index out of range"
+
+    except Exception as exc:
+        logger.error("Article selection failed for agent %s: %s", agent_id, exc)
+        return None, str(exc)
+
+
+async def _generate_summary(
+    profile_text: str,
+    record: dict[str, Any],
+    full_text: str | None,
+    agent_id: str,
+    preferences_text: str = "",
+) -> str | None:
+    """Use Opus to generate the structured text summary."""
+    from src.services.llm import generate_agent_response
+
+    settings = get_settings()
+
+    prompt_path = Path("prompts/podcast-summarize.md")
+    template = prompt_path.read_text(encoding="utf-8")
+
+    # Build paper section
+    authors_list = record.get("authors") or []
+    if not authors_list:
+        authors_str = "Authors not available"
+    elif len(authors_list) > 3:
+        authors_str = ", ".join(authors_list[:3]) + " et al."
+    else:
+        authors_str = ", ".join(authors_list)
+
+    pmid = record.get("pmid", "")
+    # Preprint records carry a canonical URL; PubMed records use the standard URL
+    paper_url = record.get("url") or f"https://pubmed.ncbi.nlm.nih.gov/{pmid}/"
+
+    paper_section = f"""Title: {record.get('title', '')}
+Authors: {authors_str}
+Journal: {record.get('journal') or 'Unknown'}
+Year: {record.get('year') or 'Unknown'}
+URL: {paper_url}
+
+Abstract:
+{record.get('abstract', '')}"""
+
+    if full_text:
+        paper_section += f"\n\nFull text excerpt:\n{full_text[:3000]}"
+
+    today_str = datetime.now(timezone.utc).strftime("%B %d, %Y")
+
+    prompt = (
+        template
+        .replace("{profile}", profile_text)
+        .replace("{paper}", paper_section)
+        .replace("{date}", today_str)
+        .replace("{paper_title}", record.get("title", ""))
+        .replace("{authors}", authors_str)
+        .replace("{journal}", record.get("journal") or "Unknown")
+        .replace("{year}", str(record.get("year") or ""))
+        .replace("{paper_url}", paper_url)
+        .replace("{preferences}", preferences_text or "No specific preferences set.")
+    )
+
+    try:
+        response = await generate_agent_response(
+            system_prompt=prompt,
+            messages=[{"role": "user", "content": "Write the research brief."}],
+            model=settings.llm_agent_model_opus,
+            max_tokens=600,
+            log_meta={"agent_id": agent_id, "phase": "podcast_summarize"},
+        )
+        return response.strip()
+    except Exception as exc:
+        logger.error("Summary generation failed for agent %s: %s", agent_id, exc)
+        return None
+
+
+async def _try_fetch_full_text(pmid: str) -> str | None:
+    """Attempt to fetch full text from PMC; return None on failure or for non-PubMed IDs."""
+    # Preprint IDs are prefixed (e.g. "biorxiv:...", "arxiv:...") — PMC doesn't have them
+    if not pmid.isdigit():
+        return None
+    try:
+        from src.services.pubmed import fetch_full_text
+        result = await fetch_full_text(pmid)
+        if "error" in result:
+            return None
+        return result.get("methods")
+    except Exception:
+        return None
+
+
+async def _deliver_slack_dm(
+    agent_id: str,
+    bot_token: str,
+    slack_user_id: str,
+    summary_text: str,
+    rss_url: str,
+) -> bool:
+    """Send the text summary as a Slack DM from the agent bot to the PI."""
+    if not bot_token or bot_token.startswith("xoxb-placeholder"):
+        logger.info("Agent %s: no valid Slack token, skipping DM delivery", agent_id)
+        return False
+    if not slack_user_id:
+        logger.info("Agent %s: no slack_user_id configured, skipping DM delivery", agent_id)
+        return False
+
+    try:
+        from slack_sdk import WebClient
+        client = WebClient(token=bot_token)
+
+        # Open DM channel
+        dm_resp = client.conversations_open(users=[slack_user_id])
+        channel_id = dm_resp["channel"]["id"]
+
+        # Append RSS link
+        full_message = summary_text
+        if rss_url:
+            full_message += f"\n\n_Listen to the audio version: {rss_url}_"
+
+        client.chat_postMessage(channel=channel_id, text=full_message)
+        logger.info("Agent %s: Slack DM delivered to %s", agent_id, slack_user_id)
+        return True
+    except Exception as exc:
+        logger.error("Agent %s: Slack DM failed: %s", agent_id, exc)
+        return False
+
+
+async def run_pipeline_for_agent(
+    agent_id: str,
+    bot_name: str,
+    pi_name: str,
+    bot_token: str,
+    slack_user_id: str | None,
+    db_session,
+) -> bool:
+    """Run the full podcast pipeline for one agent.
+
+    Returns True if an episode was produced and recorded.
+    """
+    from src.models.podcast import PodcastEpisode
+    from src.podcast.pubmed_search import build_queries, fetch_candidates
+    from src.podcast.tts_utils import get_audio_duration_seconds
+    from src.podcast.state import get_delivered_pmids, record_delivery
+
+    settings = get_settings()
+    today = date.today()
+
+    logger.info("Starting podcast pipeline for agent: %s (%s)", agent_id, pi_name)
+
+    # Step 1: Load profiles
+    profile_text = _load_public_profile(agent_id)
+    if not profile_text:
+        # Fallback: agent may have a linked user with a DB ResearcherProfile
+        from sqlalchemy import select as _select
+        from src.models.agent_registry import AgentRegistry
+        from src.models.profile import ResearcherProfile
+        from src.models.user import User
+
+        agent_row = (await db_session.execute(
+            _select(AgentRegistry).where(AgentRegistry.agent_id == agent_id)
+        )).scalar_one_or_none()
+
+        if agent_row and agent_row.user_id:
+            user_row = (await db_session.execute(
+                _select(User).where(User.id == agent_row.user_id)
+            )).scalar_one_or_none()
+            profile_row = (await db_session.execute(
+                _select(ResearcherProfile).where(ResearcherProfile.user_id == agent_row.user_id)
+            )).scalar_one_or_none()
+
+            if user_row and profile_row and profile_row.research_summary:
+                profile_text = _build_profile_text_from_db(user_row, profile_row)
+                logger.info("Agent %s: no markdown profile, using DB profile for user %s", agent_id, agent_row.user_id)
+
+        if not profile_text:
+            logger.warning("Agent %s: no public profile found, skipping", agent_id)
+            return False
+
+    preferences_text = await _load_podcast_preferences(agent_id)
+    if preferences_text:
+        logger.info("Agent %s: loaded podcast preferences (%d chars)", agent_id, len(preferences_text))
+
+    # Load structured preferences (voice, keywords, journals) from DB
+    prefs = await _load_structured_preferences(agent_id=agent_id, db_session=db_session)
+    if prefs:
+        logger.info(
+            "Agent %s: structured preferences — voice=%s, keywords=%d, preferred_journals=%d",
+            agent_id, prefs.voice_id, len(prefs.extra_keywords), len(prefs.preferred_journals),
+        )
+
+    # Build a minimal profile dict from markdown for query building
+    profile_dict = _parse_profile_markdown(profile_text)
+
+    # Step 2: Build queries and fetch candidates
+    queries = build_queries(profile_dict)
+    if not queries:
+        logger.warning("Agent %s: could not build search queries", agent_id)
+        return False
+
+    # Inject extra keywords from structured preferences as additional quoted queries
+    if prefs and prefs.extra_keywords:
+        extra_terms = [f'"{kw}"' for kw in prefs.extra_keywords[:20] if kw.strip()]
+        if extra_terms:
+            queries.append(" OR ".join(extra_terms))
+            logger.info("Agent %s: injected %d extra keyword terms", agent_id, len(extra_terms))
+
+    already_delivered = get_delivered_pmids(agent_id)
+    candidates = await fetch_candidates(
+        queries,
+        already_delivered=already_delivered,
+        days=settings.podcast_search_window_days,
+        max_total=settings.podcast_max_candidates,
+    )
+
+    if not candidates:
+        logger.info("Agent %s: no new candidate articles found", agent_id)
+        return False
+
+    # Build journal context to append to preferences text for selection prompt
+    journal_context = ""
+    if prefs and prefs.preferred_journals:
+        journal_context += f"\nPreferred sources: {', '.join(prefs.preferred_journals)}. Give these extra weight when relevance is comparable."
+    if prefs and prefs.deprioritized_journals:
+        journal_context += f"\nDeprioritized sources: {', '.join(prefs.deprioritized_journals)}. Avoid unless exceptionally relevant."
+    combined_preferences = (preferences_text or "") + journal_context
+
+    # Step 3: LLM article selection
+    selected, justification = await _select_article(profile_text, candidates, agent_id, combined_preferences)
+    if selected is None:
+        logger.info("Agent %s: no article selected", agent_id)
+        return False
+
+    pmid = selected.get("pmid", "")
+    paper_url = selected.get("url") or f"https://pubmed.ncbi.nlm.nih.gov/{pmid}/"
+    logger.info("Agent %s: selected PMID %s", agent_id, pmid)
+
+    # Step 4: Try to fetch full text
+    full_text = await _try_fetch_full_text(pmid)
+
+    # Step 5: Generate text summary
+    summary = await _generate_summary(profile_text, selected, full_text, agent_id, combined_preferences)
+    if not summary:
+        logger.error("Agent %s: summary generation failed", agent_id)
+        return False
+
+    # Step 6: Generate audio (backend selected by PODCAST_TTS_BACKEND)
+    audio_path = AUDIO_DIR / agent_id / f"{today.isoformat()}.mp3"
+    voice_override = prefs.voice_id if prefs else None
+    if settings.podcast_tts_backend == "local":
+        from src.podcast.local_tts import generate_audio
+        logger.info("Agent %s: using local vLLM-Omni TTS backend", agent_id)
+    elif settings.podcast_tts_backend == "openai":
+        from src.podcast.openai_tts import generate_audio
+        logger.info("Agent %s: using OpenAI TTS backend", agent_id)
+    else:
+        from src.podcast.mistral_tts import generate_audio
+        logger.info("Agent %s: using Mistral AI TTS backend", agent_id)
+    audio_ok = await generate_audio(summary, agent_id, audio_path, voice_override=voice_override)
+    audio_file_path = str(audio_path) if audio_ok else None
+    audio_duration = None
+    if audio_ok:
+        audio_duration = get_audio_duration_seconds(audio_path)
+
+    # Step 7: Build RSS URL for DM
+    base_url = settings.podcast_base_url or settings.base_url
+    rss_url = f"{base_url}/podcast/{agent_id}/feed.xml"
+
+    # Step 8: Deliver Slack DM
+    slack_ok = await _deliver_slack_dm(
+        agent_id=agent_id,
+        bot_token=bot_token,
+        slack_user_id=slack_user_id or "",
+        summary_text=summary,
+        rss_url=rss_url,
+    )
+
+    # Extract metadata from selected record
+    authors_list = selected.get("authors") or []
+    if len(authors_list) > 3:
+        authors_str = ", ".join(authors_list[:3]) + " et al."
+    else:
+        authors_str = ", ".join(authors_list) if authors_list else "Unknown"
+
+    # Step 9: Persist to DB
+    episode = PodcastEpisode(
+        agent_id=agent_id,
+        episode_date=today,
+        pmid=pmid,
+        paper_title=selected.get("title") or "",
+        paper_authors=authors_str,
+        paper_journal=selected.get("journal") or "",
+        paper_year=selected.get("year") or 0,
+        paper_url=paper_url,
+        text_summary=summary,
+        audio_file_path=audio_file_path,
+        audio_duration_seconds=audio_duration,
+        slack_delivered=slack_ok,
+        selection_justification=justification,
+    )
+    db_session.add(episode)
+    await db_session.flush()
+
+    # Step 10: Update state
+    record_delivery(agent_id, pmid)
+
+    logger.info(
+        "Agent %s: episode complete (audio=%s, slack=%s)", agent_id, audio_ok, slack_ok
+    )
+    return True
+
+
+async def run_podcast_for_user(
+    user_id,
+    db_session,
+) -> bool:
+    """Run the full podcast pipeline for a plain ORCID user (no agent required).
+
+    Loads the user's ResearcherProfile from the DB, builds search queries from
+    structured profile fields, selects and summarises an article, generates audio,
+    and persists a PodcastEpisode keyed by user_id.
+
+    Returns True if an episode was produced and recorded.
+    """
+    import uuid as _uuid
+
+    from sqlalchemy import select as _select
+
+    from src.models.podcast import PodcastEpisode
+    from src.models.profile import ResearcherProfile
+    from src.models.user import User
+    from src.podcast.pubmed_search import build_queries, fetch_candidates
+    from src.podcast.state import get_delivered_pmids_for_user, record_delivery_for_user
+    from src.podcast.tts_utils import get_audio_duration_seconds
+
+    settings = get_settings()
+    today = date.today()
+    user_id_str = str(user_id)
+
+    # Load user
+    user_result = await db_session.execute(
+        _select(User).where(User.id == user_id)
+    )
+    user = user_result.scalar_one_or_none()
+    if not user:
+        logger.warning("run_podcast_for_user: user %s not found", user_id_str)
+        return False
+
+    logger.info("Starting podcast pipeline for user: %s (%s)", user_id_str, user.name)
+
+    # Load ResearcherProfile
+    profile_result = await db_session.execute(
+        _select(ResearcherProfile).where(ResearcherProfile.user_id == user_id)
+    )
+    profile = profile_result.scalar_one_or_none()
+    if not profile or not profile.research_summary:
+        logger.warning("User %s: no completed profile found, skipping", user_id_str)
+        return False
+
+    # Build profile text from structured DB fields (no disk file needed)
+    profile_text = _build_profile_text_from_db(user, profile)
+
+    # Load structured preferences keyed by user_id
+    prefs = await _load_structured_preferences(user_id=user_id, db_session=db_session)
+    if prefs:
+        logger.info(
+            "User %s: structured preferences — voice=%s, keywords=%d",
+            user_id_str, prefs.voice_id, len(prefs.extra_keywords),
+        )
+
+    # Build profile dict for query building
+    profile_dict = {
+        "research_summary": profile.research_summary or "",
+        "disease_areas": profile.disease_areas or [],
+        "techniques": profile.techniques or [],
+        "experimental_models": profile.experimental_models or [],
+        "keywords": profile.keywords or [],
+    }
+
+    queries = build_queries(profile_dict)
+    if not queries:
+        logger.warning("User %s: could not build search queries", user_id_str)
+        return False
+
+    if prefs and prefs.extra_keywords:
+        extra_terms = [f'"{kw}"' for kw in prefs.extra_keywords[:20] if kw.strip()]
+        if extra_terms:
+            queries.append(" OR ".join(extra_terms))
+
+    already_delivered = get_delivered_pmids_for_user(user_id_str)
+    candidates = await fetch_candidates(
+        queries,
+        already_delivered=already_delivered,
+        days=settings.podcast_search_window_days,
+        max_total=settings.podcast_max_candidates,
+    )
+
+    if not candidates:
+        logger.info("User %s: no new candidate articles found", user_id_str)
+        return False
+
+    # Build journal context from preferences
+    journal_context = ""
+    if prefs and prefs.preferred_journals:
+        journal_context += f"\nPreferred sources: {', '.join(prefs.preferred_journals)}. Give these extra weight when relevance is comparable."
+    if prefs and prefs.deprioritized_journals:
+        journal_context += f"\nDeprioritized sources: {', '.join(prefs.deprioritized_journals)}. Avoid unless exceptionally relevant."
+    combined_preferences = journal_context
+
+    # Article selection
+    selected, justification = await _select_article(profile_text, candidates, user_id_str, combined_preferences)
+    if selected is None:
+        logger.info("User %s: no article selected", user_id_str)
+        return False
+
+    pmid = selected.get("pmid", "")
+    paper_url = selected.get("url") or f"https://pubmed.ncbi.nlm.nih.gov/{pmid}/"
+    logger.info("User %s: selected PMID %s", user_id_str, pmid)
+
+    full_text = await _try_fetch_full_text(pmid)
+
+    summary = await _generate_summary(profile_text, selected, full_text, user_id_str, combined_preferences)
+    if not summary:
+        logger.error("User %s: summary generation failed", user_id_str)
+        return False
+
+    # Generate audio — stored under data/podcast_audio/users/{user_id}/
+    audio_path = AUDIO_DIR / "users" / user_id_str / f"{today.isoformat()}.mp3"
+    voice_override = prefs.voice_id if prefs else None
+    if settings.podcast_tts_backend == "local":
+        from src.podcast.local_tts import generate_audio
+    elif settings.podcast_tts_backend == "openai":
+        from src.podcast.openai_tts import generate_audio
+    else:
+        from src.podcast.mistral_tts import generate_audio
+    audio_ok = await generate_audio(summary, user_id_str, audio_path, voice_override=voice_override)
+    audio_file_path = str(audio_path) if audio_ok else None
+    audio_duration = get_audio_duration_seconds(audio_path) if audio_ok else None
+
+    # Extract metadata
+    authors_list = selected.get("authors") or []
+    if len(authors_list) > 3:
+        authors_str = ", ".join(authors_list[:3]) + " et al."
+    else:
+        authors_str = ", ".join(authors_list) if authors_list else "Unknown"
+
+    # Persist episode keyed by user_id (agent_id left NULL)
+    episode = PodcastEpisode(
+        user_id=user_id,
+        agent_id=None,
+        episode_date=today,
+        pmid=pmid,
+        paper_title=selected.get("title") or "",
+        paper_authors=authors_str,
+        paper_journal=selected.get("journal") or "",
+        paper_year=selected.get("year") or 0,
+        paper_url=paper_url,
+        text_summary=summary,
+        audio_file_path=audio_file_path,
+        audio_duration_seconds=audio_duration,
+        slack_delivered=False,
+        selection_justification=justification,
+    )
+    db_session.add(episode)
+    await db_session.flush()
+
+    record_delivery_for_user(user_id_str, pmid)
+
+    logger.info(
+        "User %s: episode complete (audio=%s)", user_id_str, audio_ok
+    )
+    return True
+
+
+def _build_profile_text_from_db(user, profile) -> str:
+    """Construct a plain-text profile summary from DB fields for use in LLM prompts."""
+    lines = [f"# {user.name}"]
+    if user.institution:
+        lines.append(f"Institution: {user.institution}")
+    if user.department:
+        lines.append(f"Department: {user.department}")
+    if profile.research_summary:
+        lines.append(f"\n## Research Summary\n{profile.research_summary}")
+    if profile.disease_areas:
+        lines.append("\n## Disease Areas\n" + "\n".join(f"- {v}" for v in profile.disease_areas))
+    if profile.techniques:
+        lines.append("\n## Key Methods and Technologies\n" + "\n".join(f"- {v}" for v in profile.techniques))
+    if profile.experimental_models:
+        lines.append("\n## Model Systems\n" + "\n".join(f"- {v}" for v in profile.experimental_models))
+    if profile.keywords:
+        lines.append("\n## Keywords\n" + "\n".join(f"- {v}" for v in profile.keywords))
+    return "\n".join(lines)
+
+
+def _parse_profile_markdown(text: str) -> dict[str, Any]:
+    """Extract structured fields from public profile markdown for query building."""
+    from src.agent.grantbot import _extract_list_section
+    return {
+        "disease_areas": _extract_list_section(text, "Disease Areas"),
+        "techniques": _extract_list_section(text, "Key Methods and Technologies"),
+        "experimental_models": _extract_list_section(text, "Model Systems"),
+        "keywords": _extract_list_section(text, "Keywords"),
+        "research_summary": _extract_section_text(text, "Research Summary"),
+    }
+
+
+def _extract_section_text(text: str, section_name: str) -> str:
+    """Extract free-form text from a markdown section."""
+    lines = []
+    in_section = False
+    for line in text.splitlines():
+        if section_name.lower() in line.lower() and line.startswith("##"):
+            in_section = True
+            continue
+        if in_section:
+            if line.startswith("##"):
+                break
+            lines.append(line)
+    return " ".join(l.strip() for l in lines if l.strip())
diff --git a/src/podcast/preprint_search.py b/src/podcast/preprint_search.py
new file mode 100644
index 0000000..b15fa3c
--- /dev/null
+++ b/src/podcast/preprint_search.py
@@ -0,0 +1,289 @@
+"""Preprint server search for the podcast pipeline.
+
+Supports bioRxiv, medRxiv (via biorxiv.org content API) and arXiv.
+
+Records returned use the same schema as PubMed records but with:
+  - pmid:    prefixed ID  e.g. "biorxiv:2024.04.01.123456", "arxiv:2401.12345"
+  - url:     canonical preprint URL
+  - journal: "<Server> (preprint)"
+  - source:  "biorxiv" | "medrxiv" | "arxiv"
+"""
+
+import logging
+import re
+import xml.etree.ElementTree as ET
+from datetime import datetime, timedelta, timezone
+from typing import Any
+
+import httpx
+
+logger = logging.getLogger(__name__)
+
+BIORXIV_API = "https://api.biorxiv.org/details"
+ARXIV_API = "https://export.arxiv.org/api/query"
+ARXIV_NS = "http://www.w3.org/2005/Atom"
+
+# arXiv categories relevant to biomedical / computational biology research
+ARXIV_CATEGORIES = "cat:q-bio.BM OR cat:q-bio.GN OR cat:q-bio.MN OR cat:q-bio.QM OR cat:cs.LG"
+
+
+# ---------------------------------------------------------------------------
+# Shared helpers
+# ---------------------------------------------------------------------------
+
+def _extract_search_terms(queries: list[str]) -> list[str]:
+    """Extract individual quoted terms from PubMed query strings."""
+    terms: list[str] = []
+    for q in queries:
+        for match in re.findall(r'"([^"]+)"', q):
+            if match not in terms:
+                terms.append(match)
+    # Fall back to bare words if no quoted terms
+    if not terms:
+        for q in queries:
+            for word in q.split():
+                w = word.strip('"\'')
+                if len(w) > 4 and w.upper() not in ("AND", "OR", "NOT") and w not in terms:
+                    terms.append(w)
+    return terms[:12]
+
+
+def _score_record(title: str, abstract: str, terms: list[str]) -> int:
+    """Count how many search terms appear in title+abstract (case-insensitive)."""
+    text = (title + " " + abstract).lower()
+    return sum(1 for t in terms if t.lower() in text)
+
+
+def _date_range(days: int) -> tuple[str, str]:
+    now = datetime.now(timezone.utc)
+    start = now - timedelta(days=days)
+    return start.strftime("%Y-%m-%d"), now.strftime("%Y-%m-%d")
+
+
+# ---------------------------------------------------------------------------
+# bioRxiv / medRxiv
+# ---------------------------------------------------------------------------
+
+async def _fetch_biorxiv_server(
+    server: str,
+    queries: list[str],
+    days: int,
+    max_results: int,
+) -> list[dict[str, Any]]:
+    """Fetch recent preprints from bioRxiv or medRxiv and score against queries."""
+    terms = _extract_search_terms(queries)
+    if not terms:
+        return []
+
+    start_date, end_date = _date_range(days)
+    url = f"{BIORXIV_API}/{server}/{start_date}/{end_date}/0/json"
+
+    try:
+        async with httpx.AsyncClient(timeout=30) as client:
+            resp = await client.get(url)
+            resp.raise_for_status()
+            data = resp.json()
+    except Exception as exc:
+        logger.warning("%s API request failed: %s", server, exc)
+        return []
+
+    collection = data.get("collection") or []
+    if not isinstance(collection, list):
+        return []
+
+    cutoff = datetime.now(timezone.utc) - timedelta(days=days)
+    scored: list[tuple[int, dict[str, Any]]] = []
+    for item in collection:
+        title = item.get("title") or ""
+        abstract = item.get("abstract") or ""
+        if not abstract:
+            continue
+
+        # The bioRxiv API date-range filter includes revised preprints; filter by
+        # the item's own date so we only include recently posted/first-version papers.
+        date_str = item.get("date") or ""
+        if date_str:
+            try:
+                item_date = datetime.strptime(date_str, "%Y-%m-%d").replace(tzinfo=timezone.utc)
+                if item_date < cutoff:
+                    continue
+            except ValueError:
+                pass
+
+        score = _score_record(title, abstract, terms)
+        if score == 0:
+            continue
+
+        doi = item.get("doi") or ""
+        doi_suffix = doi.removeprefix("10.1101/")
+        record_id = f"{server}:{doi_suffix}"
+
+        # Authors stored as semicolon-separated string
+        authors_raw = item.get("authors") or ""
+        authors_list = [a.strip() for a in authors_raw.split(";") if a.strip()]
+
+        year_str = date_str[:4]
+        year = int(year_str) if year_str.isdigit() else datetime.now(timezone.utc).year
+
+        scored.append((score, {
+            "pmid": record_id,
+            "url": f"https://www.{server}.org/content/{doi}v1",
+            "title": title,
+            "abstract": abstract,
+            "journal": f"{server.capitalize()} (preprint)",
+            "year": year,
+            "authors": authors_list,
+            "pub_types": ["Preprint"],
+            "source": server,
+        }))
+
+    scored.sort(key=lambda x: x[0], reverse=True)
+    return [r for _, r in scored[:max_results]]
+
+
+# ---------------------------------------------------------------------------
+# arXiv
+# ---------------------------------------------------------------------------
+
+async def _fetch_arxiv(
+    queries: list[str],
+    days: int,
+    max_results: int,
+) -> list[dict[str, Any]]:
+    """Fetch recent preprints from arXiv matching researcher queries."""
+    terms = _extract_search_terms(queries)
+    if not terms:
+        return []
+
+    # Build arXiv search: keyword terms in abstract + category filter
+    term_clause = " OR ".join(f'abs:"{t}"' for t in terms[:6])
+    search_query = f"({term_clause}) AND ({ARXIV_CATEGORIES})"
+
+    start_date, _ = _date_range(days)
+    # arXiv date filter via submittedDate
+    arxiv_date = start_date.replace("-", "") + "000000"
+
+    params = {
+        "search_query": search_query,
+        "start": "0",
+        "max_results": str(max_results * 2),
+        "sortBy": "submittedDate",
+        "sortOrder": "descending",
+    }
+
+    try:
+        async with httpx.AsyncClient(timeout=30) as client:
+            resp = await client.get(ARXIV_API, params=params)
+            resp.raise_for_status()
+            xml_text = resp.text
+    except Exception as exc:
+        logger.warning("arXiv API request failed: %s", exc)
+        return []
+
+    try:
+        root = ET.fromstring(xml_text)
+    except ET.ParseError as exc:
+        logger.warning("arXiv XML parse error: %s", exc)
+        return []
+
+    records: list[dict[str, Any]] = []
+    cutoff = datetime.now(timezone.utc) - timedelta(days=days)
+
+    for entry in root.findall(f"{{{ARXIV_NS}}}entry"):
+        title_el = entry.find(f"{{{ARXIV_NS}}}title")
+        summary_el = entry.find(f"{{{ARXIV_NS}}}summary")
+        id_el = entry.find(f"{{{ARXIV_NS}}}id")
+        published_el = entry.find(f"{{{ARXIV_NS}}}published")
+
+        title = (title_el.text or "").strip().replace("\n", " ") if title_el is not None else ""
+        abstract = (summary_el.text or "").strip() if summary_el is not None else ""
+        arxiv_url = (id_el.text or "").strip() if id_el is not None else ""
+        published_str = (published_el.text or "").strip() if published_el is not None else ""
+
+        if not abstract or not arxiv_url:
+            continue
+
+        # Parse submission date and apply cutoff
+        try:
+            pub_dt = datetime.fromisoformat(published_str.replace("Z", "+00:00"))
+            if pub_dt < cutoff:
+                continue
+            year = pub_dt.year
+        except ValueError:
+            year = datetime.now(timezone.utc).year
+
+        # Extract arxiv ID from URL like http://arxiv.org/abs/2401.12345v1
+        arxiv_id = arxiv_url.split("/abs/")[-1].split("v")[0]
+
+        authors_list = [
+            (n_el.text or "").strip()
+            for author in entry.findall(f"{{{ARXIV_NS}}}author")
+            for n_el in [author.find(f"{{{ARXIV_NS}}}name")]
+            if n_el is not None and n_el.text
+        ]
+
+        records.append({
+            "pmid": f"arxiv:{arxiv_id}",
+            "url": f"https://arxiv.org/abs/{arxiv_id}",
+            "title": title,
+            "abstract": abstract,
+            "journal": "arXiv (preprint)",
+            "year": year,
+            "authors": authors_list,
+            "pub_types": ["Preprint"],
+            "source": "arxiv",
+        })
+
+        if len(records) >= max_results:
+            break
+
+    return records
+
+
+# ---------------------------------------------------------------------------
+# Public interface
+# ---------------------------------------------------------------------------
+
+async def fetch_preprint_candidates(
+    queries: list[str],
+    already_delivered: set[str],
+    days: int = 14,
+    max_total: int = 20,
+) -> list[dict[str, Any]]:
+    """Fetch preprints from bioRxiv, medRxiv, and arXiv.
+
+    Returns records filtered against already_delivered, up to max_total total.
+    Each record has the same schema as PubMed records with an added 'url' field.
+    """
+    import asyncio
+
+    per_source = max(max_total // 3, 5)
+
+    biorxiv_task = _fetch_biorxiv_server("biorxiv", queries, days, per_source)
+    medrxiv_task = _fetch_biorxiv_server("medrxiv", queries, days, per_source)
+    arxiv_task = _fetch_arxiv(queries, days, per_source)
+
+    results = await asyncio.gather(biorxiv_task, medrxiv_task, arxiv_task, return_exceptions=True)
+
+    candidates: list[dict[str, Any]] = []
+    seen_ids: set[str] = set()
+    source_names = ("bioRxiv", "medRxiv", "arXiv")
+    for name, result in zip(source_names, results):
+        if isinstance(result, Exception):
+            logger.warning("Preprint fetch failed for %s: %s", name, result)
+            continue
+        for rec in result:
+            pid = rec["pmid"]
+            if pid not in already_delivered and pid not in seen_ids:
+                seen_ids.add(pid)
+                candidates.append(rec)
+
+    logger.info(
+        "Preprint candidates: %d total (%s)",
+        len(candidates),
+        ", ".join(
+            f"{name}: {len(r) if not isinstance(r, Exception) else 'err'}"
+            for name, r in zip(source_names, results)
+        ),
+    )
+    return candidates[:max_total]
diff --git a/src/podcast/pubmed_search.py b/src/podcast/pubmed_search.py
new file mode 100644
index 0000000..05ede0f
--- /dev/null
+++ b/src/podcast/pubmed_search.py
@@ -0,0 +1,149 @@
+"""PubMed query builder and search for the podcast pipeline."""
+
+import asyncio
+import logging
+from typing import Any
+
+import httpx
+
+from src.config import get_settings
+from src.services.pubmed import _ncbi_get, fetch_pubmed_records
+
+logger = logging.getLogger(__name__)
+
+EUTILS_BASE = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils"
+
+
+def build_queries(profile: dict[str, Any]) -> list[str]:
+    """Build 2–3 PubMed search query strings from a researcher's profile fields.
+
+    profile keys used: disease_areas, techniques, experimental_models, keywords
+    """
+    disease_areas: list[str] = profile.get("disease_areas") or []
+    techniques: list[str] = profile.get("techniques") or []
+    experimental_models: list[str] = profile.get("experimental_models") or []
+    keywords: list[str] = profile.get("keywords") or []
+
+    queries: list[str] = []
+
+    # Query 1: disease areas (most specific to the field)
+    da_terms = [_simplify_term(t) for t in disease_areas[:6] if t]
+    da_terms = [t for t in da_terms if t and len(t.split()) <= 5]
+    if da_terms:
+        queries.append(" OR ".join(f'"{t}"' for t in da_terms[:4]))
+
+    # Query 2: techniques + experimental models (finds methods papers)
+    tech_terms = [_simplify_term(t) for t in techniques[:4] if t]
+    tech_terms = [t for t in tech_terms if t and len(t.split()) <= 4]
+    if tech_terms:
+        queries.append(" OR ".join(f'"{t}"' for t in tech_terms[:4]))
+
+    # Query 3: keywords (broad coverage)
+    kw_terms = [_simplify_term(t) for t in keywords[:8] if t]
+    kw_terms = [t for t in kw_terms if t and len(t.split()) <= 4]
+    if kw_terms:
+        queries.append(" OR ".join(f'"{t}"' for t in kw_terms[:5]))
+
+    # Fallback: use research summary words if nothing else
+    if not queries:
+        summary = profile.get("research_summary") or ""
+        words = [w.strip(".,;:") for w in summary.split() if len(w) > 6][:5]
+        if words:
+            queries.append(" OR ".join(f'"{w}"' for w in words))
+
+    return queries
+
+
+def _simplify_term(term: str) -> str:
+    """Strip parenthetical qualifiers and trim whitespace."""
+    return term.split("(")[0].strip()
+
+
+async def search_recent_pmids(
+    queries: list[str],
+    days: int = 14,
+    max_total: int = 50,
+) -> list[str]:
+    """Run PubMed ESearch for each query, return deduplicated list of recent PMIDs."""
+    settings = get_settings()
+    seen: set[str] = set()
+    pmids: list[str] = []
+
+    # Date filter: last N days
+    from datetime import datetime, timedelta, timezone
+    cutoff = (datetime.now(timezone.utc) - timedelta(days=days)).strftime("%Y/%m/%d")
+    today = datetime.now(timezone.utc).strftime("%Y/%m/%d")
+    date_filter = f"{cutoff}:{today}[pdat]"
+
+    for query in queries:
+        if len(pmids) >= max_total:
+            break
+        try:
+            params = {
+                "db": "pubmed",
+                "term": f"({query}) AND {date_filter}",
+                "retmode": "json",
+                "retmax": str(max_total),
+                "sort": "relevance",
+            }
+            resp = await _ncbi_get(f"{EUTILS_BASE}/esearch.fcgi", params)
+            data = resp.json()
+            ids = data.get("esearchresult", {}).get("idlist", [])
+            for pid in ids:
+                if pid not in seen and len(pmids) < max_total:
+                    seen.add(pid)
+                    pmids.append(pid)
+            logger.debug("Query '%s': %d results", query[:60], len(ids))
+        except Exception as exc:
+            logger.warning("PubMed search failed for query '%s': %s", query[:60], exc)
+
+    logger.info("Found %d candidate PMIDs across %d queries", len(pmids), len(queries))
+    return pmids
+
+
+async def fetch_candidates(
+    queries: list[str],
+    already_delivered: set[str],
+    days: int = 14,
+    max_total: int = 50,
+) -> list[dict[str, Any]]:
+    """Search PubMed and preprint servers, return candidate records excluding already-delivered IDs.
+
+    Returns list of dicts with: pmid, title, abstract, journal, year, pub_types.
+    Preprint records also include a 'url' and 'source' field.
+    """
+    from src.podcast.preprint_search import fetch_preprint_candidates
+
+    # Fetch PubMed and preprints concurrently
+    pubmed_pmids_task = search_recent_pmids(queries, days=days, max_total=max_total * 2)
+    preprint_task = fetch_preprint_candidates(
+        queries,
+        already_delivered=already_delivered,
+        days=days,
+        max_total=max(max_total // 3, 10),
+    )
+
+    pmids_raw, preprint_candidates = await asyncio.gather(pubmed_pmids_task, preprint_task)
+
+    # Filter PubMed results
+    pmids = [p for p in pmids_raw if p not in already_delivered]
+    pubmed_records = await fetch_pubmed_records(pmids[:max_total]) if pmids else []
+
+    # Filter out reviews/editorials and items without abstracts from PubMed
+    pubmed_candidates = []
+    for rec in pubmed_records:
+        if not rec.get("abstract"):
+            continue
+        pub_types = [pt.lower() for pt in (rec.get("pub_types") or [])]
+        if any(t in pt for t in ("review", "editorial", "comment", "letter") for pt in pub_types):
+            continue
+        pubmed_candidates.append(rec)
+
+    candidates = pubmed_candidates + preprint_candidates
+    logger.info(
+        "%d total candidates (PubMed: %d, preprints: %d)",
+        len(candidates),
+        len(pubmed_candidates),
+        len(preprint_candidates),
+    )
+    return candidates
diff --git a/src/podcast/rss.py b/src/podcast/rss.py
new file mode 100644
index 0000000..73c8d29
--- /dev/null
+++ b/src/podcast/rss.py
@@ -0,0 +1,116 @@
+"""RSS feed builder for podcast episodes.
+
+Supports two keying modes:
+  - agent_id  (str)  — pilot-lab agents, URLs at /podcast/{agent_id}/...
+  - user_id   (UUID) — plain ORCID users, URLs at /podcast/users/{user_id}/...
+"""
+
+import logging
+from datetime import datetime, timezone
+from email.utils import format_datetime
+from pathlib import Path
+from typing import Any
+
+logger = logging.getLogger(__name__)
+
+AUDIO_DIR = Path("data/podcast_audio")
+
+
+def build_feed(
+    pi_name: str,
+    episodes: list[Any],
+    base_url: str,
+    agent_id: str | None = None,
+    user_id: str | None = None,
+) -> str:
+    """Build an RSS 2.0 feed with iTunes extensions.
+
+    episodes: list of PodcastEpisode ORM objects, newest first.
+    base_url: public base URL (e.g. https://copi.science)
+    agent_id: set for pilot-lab agent feeds.
+    user_id:  set for plain-user feeds (UUID as string).
+    """
+    if agent_id:
+        feed_url = f"{base_url}/podcast/{agent_id}/feed.xml"
+    else:
+        feed_url = f"{base_url}/podcast/users/{user_id}/feed.xml"
+
+    items_xml = "\n".join(
+        _build_item(ep, base_url, agent_id=agent_id, user_id=user_id) for ep in episodes
+    )
+
+    return f"""<?xml version="1.0" encoding="UTF-8"?>
+<rss version="2.0"
+     xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd"
+     xmlns:atom="http://www.w3.org/2005/Atom">
+  <channel>
+    <title>{_escape(pi_name)} — LabBot Research Briefings</title>
+    <description>Daily personalized research summaries for {_escape(pi_name)}.</description>
+    <link>{_escape(feed_url)}</link>
+    <language>en-us</language>
+    <atom:link href="{_escape(feed_url)}" rel="self" type="application/rss+xml"/>
+    <itunes:author>{_escape(pi_name)}</itunes:author>
+    <itunes:category text="Science"/>
+    <itunes:explicit>false</itunes:explicit>
+{items_xml}
+  </channel>
+</rss>"""
+
+
+def _build_item(
+    ep: Any,
+    base_url: str,
+    agent_id: str | None = None,
+    user_id: str | None = None,
+) -> str:
+    """Build a single RSS <item> for a PodcastEpisode."""
+    date_str = ep.episode_date.isoformat()
+    pub_date = format_datetime(
+        datetime(ep.episode_date.year, ep.episode_date.month, ep.episode_date.day,
+                 9, 0, 0, tzinfo=timezone.utc)
+    )
+    title = _escape(f"{ep.paper_title} — {date_str}")
+    description = _escape(ep.text_summary)
+    pmid_url = getattr(ep, "paper_url", None) or f"https://pubmed.ncbi.nlm.nih.gov/{ep.pmid}/"
+
+    if agent_id:
+        guid = f"{agent_id}-{date_str}"
+        audio_url = f"{base_url}/podcast/{agent_id}/audio/{date_str}.mp3"
+    else:
+        guid = f"user-{user_id}-{date_str}"
+        audio_url = f"{base_url}/podcast/users/{user_id}/audio/{date_str}.mp3"
+
+    enclosure_xml = ""
+    duration_xml = ""
+    if ep.audio_file_path:
+        audio_path = Path(ep.audio_file_path)
+        file_size = audio_path.stat().st_size if audio_path.exists() else 0
+        enclosure_xml = (
+            f'    <enclosure url="{_escape(audio_url)}" '
+            f'type="audio/mpeg" length="{file_size}"/>'
+        )
+        if ep.audio_duration_seconds:
+            mins, secs = divmod(ep.audio_duration_seconds, 60)
+            duration_xml = f"    <itunes:duration>{mins}:{secs:02d}</itunes:duration>"
+
+    return f"""  <item>
+    <title>{title}</title>
+    <description>{description}</description>
+    <link>{_escape(pmid_url)}</link>
+    <guid isPermaLink="false">{_escape(guid)}</guid>
+    <pubDate>{pub_date}</pubDate>
+{enclosure_xml}
+{duration_xml}
+  </item>"""
+
+
+def _escape(text: str) -> str:
+    """Escape XML special characters."""
+    return (
+        str(text)
+        .replace("&", "&amp;")
+        .replace("<", "&lt;")
+        .replace(">", "&gt;")
+        .replace('"', "&quot;")
+        .replace("'", "&apos;")
+    )
diff --git a/src/podcast/state.py b/src/podcast/state.py
new file mode 100644
index 0000000..e22d675
--- /dev/null
+++ b/src/podcast/state.py
@@ -0,0 +1,122 @@
+"""Podcast state persistence — tracks delivered PMIDs and last run timestamp.
+
+State is keyed separately for agents (by agent_id string) and for plain ORCID
+users (by user_id UUID string, stored under "users" in the JSON).
+
+JSON structure:
+{
+  "agents": {
+    "<agent_id>": {"delivered_pmids": ["12345", ...]},
+    ...
+  },
+  "users": {
+    "<user_id UUID string>": {"delivered_pmids": ["12345", ...]},
+    ...
+  },
+  "last_run_date": "2026-04-14"
+}
+"""
+
+import json
+import logging
+import threading
+from datetime import datetime, timezone
+from pathlib import Path
+
+logger = logging.getLogger(__name__)
+
+STATE_FILE = Path("data/podcast_state.json")
+_LOCK = threading.Lock()
+
+
+def _load() -> dict:
+    if STATE_FILE.exists():
+        try:
+            return json.loads(STATE_FILE.read_text(encoding="utf-8"))
+        except Exception as exc:
+            logger.warning("Failed to load podcast state: %s", exc)
+    return {}
+
+
+def _save(data: dict) -> None:
+    """Write state atomically via temp-file + rename."""
+    import os
+    import tempfile
+
+    STATE_FILE.parent.mkdir(parents=True, exist_ok=True)
+    fd, tmp = tempfile.mkstemp(dir=STATE_FILE.parent, suffix=".tmp")
+    try:
+        with os.fdopen(fd, "w", encoding="utf-8") as f:
+            f.write(json.dumps(data, indent=2))
+        os.replace(tmp, STATE_FILE)
+    except Exception:
+        os.unlink(tmp)
+        raise
+
+
+# ---------------------------------------------------------------------------
+# Agent-keyed helpers (existing behaviour, unchanged interface)
+# ---------------------------------------------------------------------------
+
+def get_delivered_pmids(agent_id: str) -> set[str]:
+    """Return the set of PMIDs already delivered to this agent."""
+    data = _load()
+    return set(data.get("agents", {}).get(agent_id, {}).get("delivered_pmids", []))
+
+
+def record_delivery(agent_id: str, pmid: str) -> None:
+    """Record that a PMID was delivered to this agent."""
+    with _LOCK:
+        data = _load()
+        agents = data.setdefault("agents", {})
+        agent_data = agents.setdefault(agent_id, {"delivered_pmids": []})
+        pmids = agent_data.setdefault("delivered_pmids", [])
+        if pmid not in pmids:
+            pmids.append(pmid)
+        _save(data)
+
+
+# ---------------------------------------------------------------------------
+# User-keyed helpers (new — for plain ORCID users)
+# ---------------------------------------------------------------------------
+
+def get_delivered_pmids_for_user(user_id: str) -> set[str]:
+    """Return the set of PMIDs already delivered to this user (no agent)."""
+    data = _load()
+    return set(data.get("users", {}).get(str(user_id), {}).get("delivered_pmids", []))
+
+
+def record_delivery_for_user(user_id: str, pmid: str) -> None:
+    """Record that a PMID was delivered to this user."""
+    with _LOCK:
+        data = _load()
+        users = data.setdefault("users", {})
+        user_data = users.setdefault(str(user_id), {"delivered_pmids": []})
+        pmids = user_data.setdefault("delivered_pmids", [])
+        if pmid not in pmids:
+            pmids.append(pmid)
+        _save(data)
+
+
+# ---------------------------------------------------------------------------
+# Scheduler helpers
+# ---------------------------------------------------------------------------
+
+def get_last_run_date() -> str | None:
+    """Return ISO date string of the last completed podcast run, or None."""
+    data = _load()
+    return data.get("last_run_date")
+
+
+def mark_run_complete() -> None:
+    """Record that the podcast pipeline ran today (UTC)."""
+    with _LOCK:
+        data = _load()
+        data["last_run_date"] = datetime.now(timezone.utc).strftime("%Y-%m-%d")
+        _save(data)
+
+
+def should_run_today() -> bool:
+    """Return True if the podcast pipeline has not run today (UTC)."""
+    today = datetime.now(timezone.utc).strftime("%Y-%m-%d")
+    return get_last_run_date() != today
diff --git a/src/podcast/tts_utils.py b/src/podcast/tts_utils.py
new file mode 100644
index 0000000..7a56bea
--- /dev/null
+++ b/src/podcast/tts_utils.py
@@ -0,0 +1,81 @@
+"""Shared utilities for podcast TTS backends."""
+
+import logging
+import re
+import shutil
+import subprocess
+import tempfile
+from pathlib import Path
+
+logger = logging.getLogger(__name__)
+
+
+def strip_markdown(text: str) -> str:
+    """Remove markdown formatting so TTS reads clean prose."""
+    # Remove bold/italic markers (* and _)
+    text = re.sub(r"\*+([^*]+)\*+", r"\1", text)
+    text = re.sub(r"_+([^_]+)_+", r"\1", text)
+    # Remove inline code
+    text = re.sub(r"`[^`]+`", "", text)
+    # Remove URLs but keep surrounding text
+    text = re.sub(r"https?://\S+", "", text)
+    return text.strip()
+
+
+def normalize_audio(audio_path: Path) -> bool:
+    """Normalize audio loudness in-place using ffmpeg loudnorm (EBU R128).
+
+    Targets -16 LUFS integrated loudness, -1.5 dBTP true peak — standard
+    podcast levels. Writes to a temp file then atomically replaces the original.
+
+    Returns True if normalization succeeded, False if ffmpeg is unavailable or
+    the command fails (the original file is preserved on failure).
+    """
+    ffmpeg = shutil.which("ffmpeg")
+    if not ffmpeg:
+        logger.warning("ffmpeg not found on PATH — skipping audio normalization")
+        return False
+
+    with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as tmp:
+        tmp_path = Path(tmp.name)
+
+    try:
+        result = subprocess.run(
+            [
+                ffmpeg,
+                "-y",                          # overwrite tmp if it exists
+                "-i", str(audio_path),
+                "-af", "loudnorm=I=-16:TP=-1.5:LRA=11",
+                "-ar", "44100",
+                str(tmp_path),
+            ],
+            capture_output=True,
+            timeout=120,
+        )
+        if result.returncode != 0:
+            logger.error(
+                "ffmpeg loudnorm failed (exit %d): %s",
+                result.returncode,
+                result.stderr.decode(errors="replace")[-500:],
+            )
+            tmp_path.unlink(missing_ok=True)
+            return False
+
+        tmp_path.replace(audio_path)
+        logger.info("Audio normalized (loudnorm -16 LUFS) → %s", audio_path)
+        return True
+    except Exception as exc:
+        logger.error("Audio normalization failed: %s", exc)
+        tmp_path.unlink(missing_ok=True)
+        return False
+
+
+def get_audio_duration_seconds(audio_path: Path) -> int | None:
+    """Return audio duration in seconds using mutagen, or None if unavailable."""
+    try:
+        from mutagen.mp3 import MP3
+        audio = MP3(str(audio_path))
+        return int(audio.info.length)
+    except Exception as exc:
+        logger.debug("Could not read audio duration from %s: %s", audio_path, exc)
+        return None
diff --git a/src/routers/admin.py b/src/routers/admin.py
index 46c05ac..d9f78de 100644
--- a/src/routers/admin.py
+++ b/src/routers/admin.py
@@ -11,6 +11,7 @@
 from sqlalchemy.ext.asyncio import AsyncSession
 from sqlalchemy.orm import selectinload
 
+from src.config import get_settings
 from src.database import get_db
 from src.dependencies import get_admin_user, get_current_user
 from src.models import (
@@ -20,6 +21,9 @@
     AgentRegistry,
     Job,
     LlmCallLog,
+    MatchmakerProposal,
+    PodcastEpisode,
+    PodcastPreferences,
     Publication,
     ResearcherProfile,
     SimulationRun,
@@ -34,6 +38,16 @@
 templates = Jinja2Templates(directory="templates")
 
 
+def _extract_proposal(text: str) -> str:
+    """Return content inside <proposal>…</proposal> tags, or the full text if absent."""
+    import re
+    match = re.search(r"<proposal>(.*?)</proposal>", text or "", re.DOTALL)
+    return match.group(1).strip() if match else (text or "").strip()
+
+
+templates.env.filters["extract_proposal"] = _extract_proposal
+
+
 def _template_context(
     request: Request, current_user: User, active_admin: str = "", **kwargs
 ) -> dict:
@@ -671,7 +685,7 @@ async def admin_discussions(
                 "agent_b": d.agent_b,
                 "outcome": d.outcome,
                 "date": d.decided_at.strftime("%Y-%m-%d %H:%M UTC"),
-                "summary": d.summary_text.strip(),
+                "summary": _extract_proposal(d.summary_text),
             })
 
         if export == "html":
@@ -960,6 +974,55 @@ async def impersonate_user(
     return response
 
 
+@router.get("/podcast", response_class=HTMLResponse)
+async def admin_podcast(
+    request: Request,
+    agent_filter: str | None = None,
+    db: AsyncSession = Depends(get_db),
+    current_user: User = Depends(get_admin_user),
+):
+    """Podcast episodes overview."""
+    query = select(PodcastEpisode).order_by(PodcastEpisode.episode_date.desc()).limit(200)
+    result = await db.execute(query)
+    all_episodes = result.scalars().all()
+
+    # Apply agent filter
+    episodes = [e for e in all_episodes if not agent_filter or e.agent_id == agent_filter]
+
+    # Summary stats
+    total = len(all_episodes)
+    with_audio = sum(1 for e in all_episodes if e.audio_file_path)
+    slack_delivered = sum(1 for e in all_episodes if e.slack_delivered)
+    agent_ids = sorted({e.agent_id for e in all_episodes if e.agent_id is not None})
+
+    # Load preferences for all agents that have episodes
+    prefs_result = await db.execute(select(PodcastPreferences))
+    prefs_by_agent: dict[str, PodcastPreferences] = {
+        p.agent_id: p for p in prefs_result.scalars().all() if p.agent_id is not None
+    }
+
+    settings = get_settings()
+    base_url = settings.podcast_base_url or settings.base_url
+
+    return templates.TemplateResponse(
+        request,
+        "admin/podcast.html",
+        _template_context(
+            request,
+            current_user,
+            active_admin="podcast",
+            episodes=episodes,
+            total=total,
+            with_audio=with_audio,
+            slack_delivered=slack_delivered,
+            agent_ids=agent_ids,
+            agent_filter=agent_filter,
+            base_url=base_url,
+            prefs_by_agent=prefs_by_agent,
+        ),
+    )
+
+
 @router.post("/impersonate/stop")
 async def stop_impersonating(
     request: Request,
@@ -1218,3 +1281,350 @@ async def admin_waitlist_mark_contacted(
         signup.contacted_at = datetime.now(timezone.utc)
         await db.commit()
     return RedirectResponse(url="/admin/waitlist", status_code=302)
+
+
+# ---------------------------------------------------------------------------
+# Matchmaker
+# ---------------------------------------------------------------------------
+
+async def _get_eligible_matchmaker_users(db: AsyncSession) -> list:
+    """Users eligible for matchmaker: have a DB profile OR a disk profile via AgentRegistry."""
+    from pathlib import Path
+
+    # Users with a complete DB profile
+    db_result = await db.execute(
+        select(User)
+        .join(ResearcherProfile, ResearcherProfile.user_id == User.id)
+        .where(ResearcherProfile.research_summary.isnot(None))
+    )
+    db_users = {u.id: u for u in db_result.scalars().all()}
+
+    # Users linked to an agent that has a disk profile
+    agent_result = await db.execute(
+        select(AgentRegistry).where(AgentRegistry.user_id.isnot(None))
+    )
+    for agent_reg in agent_result.scalars().all():
+        if agent_reg.user_id in db_users:
+            continue
+        if Path(f"profiles/public/{agent_reg.agent_id}.md").exists():
+            user_result = await db.execute(
+                select(User).where(User.id == agent_reg.user_id)
+            )
+            user = user_result.scalar_one_or_none()
+            if user:
+                db_users[user.id] = user
+
+    return sorted(db_users.values(), key=lambda u: u.name)
+
+
+@router.get("/matchmaker", response_class=HTMLResponse)
+async def admin_matchmaker(
+    request: Request,
+    pi_filter: list[uuid.UUID] = Query(default=[]),
+    confidence_filter: str | None = None,
+    export: str = "",
+    db: AsyncSession = Depends(get_db),
+    current_user: User = Depends(get_admin_user),
+):
+    """Matchmaker tab — generate and view admin-initiated collaboration proposals."""
+    eligible_users = await _get_eligible_matchmaker_users(db)
+
+    # Fetch proposals with PI relationships eager-loaded
+    proposals_query = (
+        select(MatchmakerProposal)
+        .options(
+            selectinload(MatchmakerProposal.pi_a),
+            selectinload(MatchmakerProposal.pi_b),
+        )
+        .order_by(MatchmakerProposal.generated_at.desc())
+    )
+    if pi_filter:
+        proposals_query = proposals_query.where(
+            (MatchmakerProposal.pi_a_id.in_(pi_filter))
+            | (MatchmakerProposal.pi_b_id.in_(pi_filter))
+        )
+    if confidence_filter:
+        proposals_query = proposals_query.where(
+            MatchmakerProposal.confidence == confidence_filter
+        )
+    proposals_result = await db.execute(proposals_query)
+    proposals = proposals_result.scalars().all()
+
+    if export:
+        export_rows = [
+            {
+                "name_a": p.name_a,
+                "name_b": p.name_b,
+                "title": p.title or "",
+                "confidence": p.confidence or "",
+                "date": p.generated_at.strftime("%Y-%m-%d %H:%M UTC"),
+                "proposal_md": _extract_proposal(p.proposal_md or ""),
+            }
+            for p in proposals
+        ]
+
+        if export == "html":
+            return templates.TemplateResponse(
+                request,
+                "admin/matchmaker_export.html",
+                {"request": request, "proposals": export_rows},
+                headers={"Content-Disposition": "attachment; filename=matchmaker_proposals.html"},
+            )
+
+        from fastapi.responses import PlainTextResponse
+        lines = []
+        for p in export_rows:
+            lines.append(f"{'=' * 72}")
+            lines.append(f"PI A: {p['name_a']}")
+            lines.append(f"PI B: {p['name_b']}")
+            lines.append(f"Title: {p['title']}")
+            lines.append(f"Confidence: {p['confidence'].capitalize()}")
+            lines.append(f"Generated: {p['date']}")
+            lines.append("")
+            lines.append(p["proposal_md"])
+            lines.append("")
+        if not lines:
+            lines.append("No proposals found with current filters.")
+        return PlainTextResponse(
+            "\n".join(lines),
+            headers={"Content-Disposition": "attachment; filename=matchmaker_proposals.txt"},
+        )
+
+    return templates.TemplateResponse(
+        request,
+        "admin/matchmaker.html",
+        _template_context(
+            request,
+            current_user,
+            active_admin="matchmaker",
+            eligible_users=eligible_users,
+            proposals=proposals,
+            pi_filter=pi_filter,
+            confidence_filter=confidence_filter,
+            error=None,
+        ),
+    )
+
+
+@router.post("/matchmaker/generate", response_class=HTMLResponse)
+async def admin_matchmaker_generate(
+    request: Request,
+    pi_a_id: uuid.UUID = Form(...),
+    pi_b_id: uuid.UUID = Form(...),
+    db: AsyncSession = Depends(get_db),
+    current_user: User = Depends(get_admin_user),
+):
+    """Run the LLM matchmaker pipeline and store the resulting proposal."""
+    from src.services.llm import generate_matchmaker_proposal
+
+    async def _render_error(msg: str):
+        eligible_users = await _get_eligible_matchmaker_users(db)
+        proposals_result = await db.execute(
+            select(MatchmakerProposal)
+            .options(
+                selectinload(MatchmakerProposal.pi_a),
+                selectinload(MatchmakerProposal.pi_b),
+            )
+            .order_by(MatchmakerProposal.generated_at.desc())
+        )
+        proposals = proposals_result.scalars().all()
+        return templates.TemplateResponse(
+            request,
+            "admin/matchmaker.html",
+            _template_context(
+                request,
+                current_user,
+                active_admin="matchmaker",
+                eligible_users=eligible_users,
+                proposals=proposals,
+                pi_filter=[],
+                confidence_filter=None,
+                error=msg,
+            ),
+        )
+
+    if pi_a_id == pi_b_id:
+        return await _render_error("Please select two different PIs.")
+
+    # Load profiles and recent publications for both PIs
+    async def _load_user_data(user_id: uuid.UUID):
+        from pathlib import Path
+        user_result = await db.execute(
+            select(User)
+            .where(User.id == user_id)
+            .options(selectinload(User.profile))
+        )
+        user = user_result.scalar_one_or_none()
+        if not user:
+            return None, None, None, None
+
+        # DB profile takes precedence; fall back to disk profile for seeded agents
+        profile_text = None
+        private_text = ""
+        if user.profile and user.profile.research_summary:
+            profile = user.profile
+            parts = [profile.research_summary]
+            if profile.techniques:
+                parts.append("**Techniques:** " + ", ".join(profile.techniques))
+            if profile.experimental_models:
+                parts.append("**Model systems:** " + ", ".join(profile.experimental_models))
+            if profile.disease_areas:
+                parts.append("**Disease areas:** " + ", ".join(profile.disease_areas))
+            if profile.key_targets:
+                parts.append("**Key targets:** " + ", ".join(profile.key_targets))
+            if profile.grant_titles:
+                parts.append("**Active grants:** " + "; ".join(profile.grant_titles))
+            profile_text = "\n\n".join(parts)
+            private_text = profile.private_profile_md or ""
+        else:
+            # Try disk profile via AgentRegistry
+            agent_result = await db.execute(
+                select(AgentRegistry).where(AgentRegistry.user_id == user_id)
+            )
+            agent_reg = agent_result.scalar_one_or_none()
+            if agent_reg:
+                pub_path = Path(f"profiles/public/{agent_reg.agent_id}.md")
+                priv_path = Path(f"profiles/private/{agent_reg.agent_id}.md")
+                if pub_path.exists():
+                    profile_text = pub_path.read_text(encoding="utf-8").strip()
+                if priv_path.exists():
+                    private_text = priv_path.read_text(encoding="utf-8").strip()
+
+        if not profile_text:
+            return None, None, None, None
+
+        pubs_result = await db.execute(
+            select(Publication)
+            .where(Publication.user_id == user_id)
+            .order_by(Publication.year.desc())
+            .limit(20)
+        )
+        pubs = pubs_result.scalars().all()
+
+        def _format_pubs(pubs) -> str:
+            if not pubs:
+                return "(none)"
+            lines = []
+            for p in pubs:
+                pos = f" [{p.author_position}]" if p.author_position else ""
+                journal = f" — {p.journal}" if p.journal else ""
+                year = f" ({p.year})" if p.year else ""
+                pmid = f" PMID:{p.pmid}" if p.pmid else ""
+                lines.append(f"- {p.title}{pos}{year}{journal}{pmid}")
+            return "\n".join(lines)
+
+        return user, profile_text, private_text, _format_pubs(pubs)
+
+    user_a, profile_text_a, private_a, pubs_text_a = await _load_user_data(pi_a_id)
+    user_b, profile_text_b, private_b, pubs_text_b = await _load_user_data(pi_b_id)
+
+    if not profile_text_a:
+        return await _render_error("PI A does not have a complete profile yet.")
+    if not profile_text_b:
+        return await _render_error("PI B does not have a complete profile yet.")
+
+    settings = get_settings()
+
+    try:
+        result = await generate_matchmaker_proposal(
+            name_a=user_a.name,
+            public_profile_a=profile_text_a,
+            private_profile_a=private_a,
+            publications_a=pubs_text_a,
+            name_b=user_b.name,
+            public_profile_b=profile_text_b,
+            private_profile_b=private_b,
+            publications_b=pubs_text_b,
+            model=settings.llm_agent_model_opus,
+        )
+    except Exception as exc:
+        logger.error("Matchmaker LLM call failed: %s", exc)
+        return await _render_error(f"LLM call failed: {exc}")
+
+    proposal = MatchmakerProposal(
+        pi_a_id=pi_a_id,
+        pi_b_id=pi_b_id,
+        proposal_md=result["proposal_md"],
+        title=result["title"],
+        confidence=result["confidence"],
+        llm_model=result["model"],
+        input_tokens=result["input_tokens"],
+        output_tokens=result["output_tokens"],
+    )
+    db.add(proposal)
+    await db.commit()
+
+    return RedirectResponse(url=f"/admin/matchmaker/{proposal.id}", status_code=302)
+
+
+@router.get("/matchmaker/{proposal_id}", response_class=HTMLResponse)
+async def admin_matchmaker_detail(
+    request: Request,
+    proposal_id: uuid.UUID,
+    db: AsyncSession = Depends(get_db),
+    current_user: User = Depends(get_admin_user),
+):
+    """Detail view for a single matchmaker proposal."""
+    result = await db.execute(
+        select(MatchmakerProposal)
+        .where(MatchmakerProposal.id == proposal_id)
+        .options(
+            selectinload(MatchmakerProposal.pi_a),
+            selectinload(MatchmakerProposal.pi_b),
+        )
+    )
+    proposal = result.scalar_one_or_none()
+    if not proposal:
+        raise HTTPException(status_code=404, detail="Proposal not found")
+
+    return templates.TemplateResponse(
+        request,
+        "admin/matchmaker_detail.html",
+        _template_context(
+            request,
+            current_user,
+            active_admin="matchmaker",
+            proposal=proposal,
+        ),
+    )
+
+
+@router.post("/matchmaker/{proposal_id}/delete")
+async def admin_matchmaker_delete(
+    proposal_id: uuid.UUID,
+    db: AsyncSession = Depends(get_db),
+    current_user: User = Depends(get_admin_user),
+):
+    """Delete a matchmaker proposal."""
+    result = await db.execute(
+        select(MatchmakerProposal).where(MatchmakerProposal.id == proposal_id)
+    )
+    proposal = result.scalar_one_or_none()
+    if proposal:
+        await db.delete(proposal)
+        await db.commit()
+    return RedirectResponse(url="/admin/matchmaker", status_code=302)
+
+
+@router.post("/matchmaker/clear")
+async def admin_matchmaker_clear(
+    db: AsyncSession = Depends(get_db),
+    current_user: User = Depends(get_admin_user),
+):
+    """Delete all matchmaker proposals from the database (does not affect Slack)."""
+    from sqlalchemy import delete as sql_delete
+    await db.execute(sql_delete(MatchmakerProposal))
+    await db.commit()
+    return RedirectResponse(url="/admin/matchmaker", status_code=302)
+
+
+@router.post("/discussions/clear")
+async def admin_discussions_clear(
+    db: AsyncSession = Depends(get_db),
+    current_user: User = Depends(get_admin_user),
+):
+    """Delete all thread decisions (and cascaded proposal reviews) from the database (does not affect Slack)."""
+    from sqlalchemy import delete as sql_delete
+    await db.execute(sql_delete(ThreadDecision))
+    await db.commit()
+    return RedirectResponse(url="/admin/discussions", status_code=302)
diff --git a/src/routers/agent_page.py b/src/routers/agent_page.py
index 1f98a69..0019808 100644
--- a/src/routers/agent_page.py
+++ b/src/routers/agent_page.py
@@ -707,6 +707,112 @@ async def save_public_profile(
     )
 
 
+# --------------------------------------------------------------------------
+# Podcast settings (owner or admin)
+# --------------------------------------------------------------------------
+
+# Valid Mistral voxtral-mini-tts-latest voices (verify at docs.mistral.ai/capabilities/audio/)
+MISTRAL_VOICES = [
+    ("alex", "Alex — US English, male, neutral"),
+    ("deedee", "Deedee — US English, female, bright"),
+    ("jasmine", "Jasmine — US English, female, warm"),
+    ("laurel", "Laurel — US English, female, clear"),
+    ("luna", "Luna — US English, female, soft"),
+    ("rio", "Rio — US English, male, energetic"),
+    ("stella", "Stella — US English, female, professional"),
+    ("theo", "Theo — US English, male, measured"),
+    ("tyler", "Tyler — US English, male, conversational"),
+]
+
+
+@router.get("/{agent_id}/podcast-settings", response_class=HTMLResponse)
+async def get_podcast_settings(
+    agent_id: str,
+    request: Request,
+    saved: bool = False,
+    db: AsyncSession = Depends(get_db),
+    current_user: User = Depends(get_current_user),
+):
+    """View podcast preferences for an agent."""
+    from sqlalchemy import select as sa_select
+
+    from src.models.podcast_preferences import PodcastPreferences
+
+    agent, is_owner = await get_agent_with_access(agent_id, db, current_user)
+    if agent.status != "active":
+        return RedirectResponse(url="/agent", status_code=302)
+
+    result = await db.execute(
+        sa_select(PodcastPreferences).where(PodcastPreferences.agent_id == agent_id)
+    )
+    prefs = result.scalar_one_or_none()
+
+    return templates.TemplateResponse(
+        request,
+        "agent/podcast_settings.html",
+        _template_context(
+            request, current_user,
+            agent=agent,
+            is_owner=is_owner,
+            prefs=prefs,
+            voices=MISTRAL_VOICES,
+            saved=saved,
+        ),
+    )
+
+
+@router.post("/{agent_id}/podcast-settings")
+async def save_podcast_settings(
+    agent_id: str,
+    request: Request,
+    voice_id: str = Form(""),
+    extra_keywords_raw: str = Form(""),
+    preferred_journals_raw: str = Form(""),
+    deprioritized_journals_raw: str = Form(""),
+    db: AsyncSession = Depends(get_db),
+    current_user: User = Depends(get_current_user),
+):
+    """Save podcast preferences for an agent."""
+    from sqlalchemy import select as sa_select
+
+    from src.models.podcast_preferences import PodcastPreferences
+
+    agent, is_owner = await get_agent_with_access(agent_id, db, current_user)
+    if agent.status != "active":
+        return RedirectResponse(url="/agent", status_code=302)
+
+    # Keywords: newline-only (phrases can legitimately contain commas)
+    def _parse_keywords(raw: str) -> list[str]:
+        return [v for line in raw.splitlines() if (v := line.strip())][:20]
+
+    # Journals: accept both newlines and commas as separators
+    def _parse_journals(raw: str) -> list[str]:
+        return [v for part in raw.replace(",", "\n").splitlines() if (v := part.strip())][:20]
+
+    extra_keywords = _parse_keywords(extra_keywords_raw)
+    preferred_journals = _parse_journals(preferred_journals_raw)
+    deprioritized_journals = _parse_journals(deprioritized_journals_raw)
+    clean_voice = voice_id.strip() or None
+
+    result = await db.execute(
+        sa_select(PodcastPreferences).where(PodcastPreferences.agent_id == agent_id)
+    )
+    prefs = result.scalar_one_or_none()
+
+    if prefs is None:
+        prefs = PodcastPreferences(agent_id=agent_id)
+        db.add(prefs)
+
+    prefs.voice_id = clean_voice
+    prefs.extra_keywords = extra_keywords
+    prefs.preferred_journals = preferred_journals
+    prefs.deprioritized_journals = deprioritized_journals
+    await db.commit()
+
+    logger.info("Podcast preferences saved for agent %s by %s", agent_id, current_user.name)
+    return RedirectResponse(url=f"/agent/{agent_id}/podcast-settings?saved=1", status_code=302)
+
+
 # --------------------------------------------------------------------------
 # Slack connection (PI only)
 # --------------------------------------------------------------------------
diff --git a/src/routers/podcast.py b/src/routers/podcast.py
new file mode 100644
index 0000000..c7eb41d
--- /dev/null
+++ b/src/routers/podcast.py
@@ -0,0 +1,372 @@
+"""Podcast RSS feed, audio serving, settings, and on-demand generation endpoints.
+
+Two delivery paths:
+  Agent path  — pilot-lab agents with an approved AgentRegistry entry.
+                URLs are keyed by agent_id string.
+                Endpoints: /podcast/{agent_id}/...
+
+  User path   — any user who has completed ORCID onboarding and has a
+                ResearcherProfile with a research_summary.
+                URLs are keyed by user_id UUID (opaque, stable, subscribable).
+                Endpoints: /podcast/users/{user_id}/...  (public RSS + audio)
+                           /podcast/settings             (auth-gated settings UI)
+                           /podcast/user/generate        (auth-gated on-demand trigger)
+"""
+
+import asyncio
+import logging
+import uuid as _uuid
+from pathlib import Path
+
+from fastapi import APIRouter, Depends, Form, HTTPException, Request
+from fastapi.responses import FileResponse, HTMLResponse, RedirectResponse, Response
+from fastapi.templating import Jinja2Templates
+from sqlalchemy import select
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from src.config import get_settings
+from src.database import get_db, get_session_factory
+from src.dependencies import get_current_user
+from src.models.agent_registry import AgentRegistry
+from src.models.podcast import PodcastEpisode
+from src.models.user import User
+from src.podcast.rss import build_feed
+
+logger = logging.getLogger(__name__)
+router = APIRouter()
+templates = Jinja2Templates(directory="templates")
+
+AUDIO_DIR = Path("data/podcast_audio")
+
+MISTRAL_VOICES = [
+    ("alex", "Alex — US English, male, calm"),
+    ("deedee", "DeeDee — US English, female, upbeat"),
+    ("jessica", "Jessica — US English, female, expressive"),
+    ("luna", "Luna — US English, female, soft"),
+    ("rio", "Rio — US English, male, energetic"),
+    ("stella", "Stella — US English, female, professional"),
+    ("theo", "Theo — US English, male, measured"),
+    ("tyler", "Tyler — US English, male, conversational"),
+]
+
+
+# ---------------------------------------------------------------------------
+# Agent path — existing endpoints (unchanged behaviour)
+# ---------------------------------------------------------------------------
+
+@router.get("/{agent_id}/feed.xml", response_class=Response)
+async def podcast_feed(
+    agent_id: str,
+    db: AsyncSession = Depends(get_db),
+):
+    """RSS 2.0 podcast feed for a pilot-lab agent's daily research briefings."""
+    agent_result = await db.execute(
+        select(AgentRegistry).where(AgentRegistry.agent_id == agent_id)
+    )
+    agent = agent_result.scalar_one_or_none()
+    if not agent:
+        raise HTTPException(status_code=404, detail="Agent not found")
+
+    episodes_result = await db.execute(
+        select(PodcastEpisode)
+        .where(PodcastEpisode.agent_id == agent_id)
+        .order_by(PodcastEpisode.episode_date.desc())
+        .limit(30)
+    )
+    episodes = episodes_result.scalars().all()
+
+    settings = get_settings()
+    base_url = settings.podcast_base_url or settings.base_url
+
+    xml = build_feed(
+        pi_name=agent.pi_name,
+        episodes=episodes,
+        base_url=base_url,
+        agent_id=agent_id,
+    )
+    return Response(content=xml, media_type="application/rss+xml; charset=utf-8")
+
+
+@router.get("/{agent_id}/audio/{date}.mp3")
+async def podcast_audio(agent_id: str, date: str):
+    """Stream a podcast audio file for an agent."""
+    if "/" in date or ".." in date or not date.replace("-", "").isdigit():
+        raise HTTPException(status_code=400, detail="Invalid date format")
+
+    audio_path = AUDIO_DIR / agent_id / f"{date}.mp3"
+    if not audio_path.exists():
+        raise HTTPException(status_code=404, detail="Audio file not found")
+
+    return FileResponse(
+        path=str(audio_path),
+        media_type="audio/mpeg",
+        filename=f"{agent_id}-{date}.mp3",
+    )
+
+
+async def _run_pipeline_background(
+    agent_id: str, bot_name: str, pi_name: str, bot_token: str, slack_user_id: str | None
+) -> None:
+    """Run the agent podcast pipeline in a background task with its own DB session."""
+    from src.podcast.pipeline import run_pipeline_for_agent
+
+    session_factory = get_session_factory()
+    try:
+        async with session_factory() as db:
+            ok = await run_pipeline_for_agent(
+                agent_id=agent_id,
+                bot_name=bot_name,
+                pi_name=pi_name,
+                bot_token=bot_token,
+                slack_user_id=slack_user_id,
+                db_session=db,
+            )
+            await db.commit()
+            logger.info("On-demand podcast pipeline for %s: %s", agent_id, "produced" if ok else "no episode")
+    except Exception as exc:
+        logger.error("On-demand podcast pipeline failed for %s: %s", agent_id, exc, exc_info=True)
+
+
+@router.api_route("/{agent_id}/generate", methods=["GET", "POST"])
+async def podcast_generate(
+    agent_id: str,
+    db: AsyncSession = Depends(get_db),
+):
+    """Trigger on-demand podcast generation for an agent (returns immediately)."""
+    agent_result = await db.execute(
+        select(AgentRegistry).where(AgentRegistry.agent_id == agent_id)
+    )
+    agent = agent_result.scalar_one_or_none()
+    if not agent:
+        raise HTTPException(status_code=404, detail="Agent not found")
+
+    settings = get_settings()
+    slack_tokens = settings.get_slack_tokens()
+    bot_token = agent.slack_bot_token or slack_tokens.get(agent_id, {}).get("bot", "")
+
+    asyncio.create_task(
+        _run_pipeline_background(
+            agent_id=agent_id,
+            bot_name=agent.bot_name,
+            pi_name=agent.pi_name,
+            bot_token=bot_token,
+            slack_user_id=agent.slack_user_id,
+        )
+    )
+    return {
+        "status": "started",
+        "agent_id": agent_id,
+        "message": f"Podcast pipeline started for {agent.pi_name}. Check the RSS feed shortly.",
+    }
+
+
+# ---------------------------------------------------------------------------
+# User path — plain ORCID users (no agent required)
+# ---------------------------------------------------------------------------
+
+@router.get("/users/{user_id}/feed.xml", response_class=Response)
+async def podcast_feed_for_user(
+    user_id: str,
+    db: AsyncSession = Depends(get_db),
+):
+    """Public RSS 2.0 feed for a plain ORCID user's daily research briefings.
+
+    The user_id in the URL is the UUID primary key of the User record, which
+    acts as an opaque, stable, subscribable token — no authentication required.
+    """
+    try:
+        uid = _uuid.UUID(user_id)
+    except ValueError:
+        raise HTTPException(status_code=400, detail="Invalid user ID format")
+
+    user_result = await db.execute(select(User).where(User.id == uid))
+    user = user_result.scalar_one_or_none()
+    if not user:
+        raise HTTPException(status_code=404, detail="User not found")
+
+    episodes_result = await db.execute(
+        select(PodcastEpisode)
+        .where(PodcastEpisode.user_id == uid)
+        .order_by(PodcastEpisode.episode_date.desc())
+        .limit(30)
+    )
+    episodes = episodes_result.scalars().all()
+
+    settings = get_settings()
+    base_url = settings.podcast_base_url or settings.base_url
+
+    xml = build_feed(
+        pi_name=user.name,
+        episodes=episodes,
+        base_url=base_url,
+        user_id=user_id,
+    )
+    return Response(content=xml, media_type="application/rss+xml; charset=utf-8")
+
+
+@router.get("/users/{user_id}/audio/{date}.mp3")
+async def podcast_audio_for_user(user_id: str, date: str):
+    """Stream a podcast audio file for a plain ORCID user."""
+    if "/" in date or ".." in date or not date.replace("-", "").isdigit():
+        raise HTTPException(status_code=400, detail="Invalid date format")
+    try:
+        _uuid.UUID(user_id)
+    except ValueError:
+        raise HTTPException(status_code=400, detail="Invalid user ID format")
+
+    audio_path = AUDIO_DIR / "users" / user_id / f"{date}.mp3"
+    if not audio_path.exists():
+        raise HTTPException(status_code=404, detail="Audio file not found")
+
+    return FileResponse(
+        path=str(audio_path),
+        media_type="audio/mpeg",
+        filename=f"briefing-{date}.mp3",
+    )
+
+
+def _podcast_eligible(user: User) -> bool:
+    """Return True if a plain user is eligible for the podcast feature."""
+    return (
+        user.onboarding_complete
+        and getattr(user, "profile", None) is not None
+        and bool(getattr(user.profile, "research_summary", None))
+    )
+
+
+@router.get("/settings", response_class=HTMLResponse)
+async def get_podcast_settings_user(
+    request: Request,
+    saved: bool = False,
+    db: AsyncSession = Depends(get_db),
+    current_user: User = Depends(get_current_user),
+):
+    """Podcast settings page for a plain ORCID user (no agent required)."""
+    from sqlalchemy.orm import selectinload
+
+    from src.models.podcast_preferences import PodcastPreferences
+
+    # Eagerly load profile relationship
+    user_result = await db.execute(
+        select(User)
+        .options(selectinload(User.profile))
+        .where(User.id == current_user.id)
+    )
+    user = user_result.scalar_one_or_none() or current_user
+
+    if not _podcast_eligible(user):
+        return RedirectResponse(url="/profile?podcast_incomplete=1", status_code=302)
+
+    prefs_result = await db.execute(
+        select(PodcastPreferences).where(PodcastPreferences.user_id == current_user.id)
+    )
+    prefs = prefs_result.scalar_one_or_none()
+
+    settings = get_settings()
+    base_url = settings.podcast_base_url or settings.base_url
+    feed_url = f"{base_url}/podcast/users/{current_user.id}/feed.xml"
+
+    return templates.TemplateResponse(
+        request,
+        "podcast_settings.html",
+        {
+            "request": request,
+            "current_user": current_user,
+            "active_page": "podcast",
+            "prefs": prefs,
+            "voices": MISTRAL_VOICES,
+            "saved": saved,
+            "feed_url": feed_url,
+        },
+    )
+
+
+@router.post("/settings")
+async def save_podcast_settings_user(
+    request: Request,
+    voice_id: str = Form(""),
+    extra_keywords_raw: str = Form(""),
+    preferred_journals_raw: str = Form(""),
+    deprioritized_journals_raw: str = Form(""),
+    db: AsyncSession = Depends(get_db),
+    current_user: User = Depends(get_current_user),
+):
+    """Save podcast preferences for a plain ORCID user."""
+    from sqlalchemy.orm import selectinload
+
+    from src.models.podcast_preferences import PodcastPreferences
+
+    user_result = await db.execute(
+        select(User)
+        .options(selectinload(User.profile))
+        .where(User.id == current_user.id)
+    )
+    user = user_result.scalar_one_or_none() or current_user
+
+    if not _podcast_eligible(user):
+        raise HTTPException(status_code=403, detail="Complete your profile before setting podcast preferences.")
+
+    def _parse_keywords(raw: str) -> list[str]:
+        return [v for line in raw.splitlines() if (v := line.strip())][:20]
+
+    def _parse_journals(raw: str) -> list[str]:
+        return [v for part in raw.replace(",", "\n").splitlines() if (v := part.strip())][:20]
+
+    prefs_result = await db.execute(
+        select(PodcastPreferences).where(PodcastPreferences.user_id == current_user.id)
+    )
+    prefs = prefs_result.scalar_one_or_none()
+
+    if prefs is None:
+        prefs = PodcastPreferences(user_id=current_user.id, agent_id=None)
+        db.add(prefs)
+
+    prefs.voice_id = voice_id.strip() or None
+    prefs.extra_keywords = _parse_keywords(extra_keywords_raw)
+    prefs.preferred_journals = _parse_journals(preferred_journals_raw)
+    prefs.deprioritized_journals = _parse_journals(deprioritized_journals_raw)
+    await db.commit()
+
+    logger.info("Podcast preferences saved for user %s", current_user.id)
+    return RedirectResponse(url="/podcast/settings?saved=1", status_code=302)
+
+
+async def _run_user_pipeline_background(user_id) -> None:
+    """Run the user podcast pipeline in a background task with its own DB session."""
+    from src.podcast.pipeline import run_podcast_for_user
+
+    session_factory = get_session_factory()
+    try:
+        async with session_factory() as db:
+            ok = await run_podcast_for_user(user_id=user_id, db_session=db)
+            await db.commit()
+            logger.info("On-demand podcast pipeline for user %s: %s", user_id, "produced" if ok else "no episode")
+    except Exception as exc:
+        logger.error("On-demand podcast pipeline failed for user %s: %s", user_id, exc, exc_info=True)
+
+
+@router.post("/user/generate")
+async def podcast_generate_for_user(
+    db: AsyncSession = Depends(get_db),
+    current_user: User = Depends(get_current_user),
+):
+    """Trigger on-demand podcast generation for the current user (returns immediately)."""
+    from sqlalchemy.orm import selectinload
+
+    user_result = await db.execute(
+        select(User)
+        .options(selectinload(User.profile))
+        .where(User.id == current_user.id)
+    )
+    user = user_result.scalar_one_or_none() or current_user
+
+    if not _podcast_eligible(user):
+        raise HTTPException(status_code=403, detail="Complete your profile before generating a podcast.")
+
+    asyncio.create_task(_run_user_pipeline_background(current_user.id))
+
+    return {
+        "status": "started",
+        "user_id": str(current_user.id),
+        "message": "Podcast pipeline started. Check your feed URL shortly.",
+    }
diff --git a/src/services/llm.py b/src/services/llm.py
index f3cf606..24c69df 100644
--- a/src/services/llm.py
+++ b/src/services/llm.py
@@ -8,6 +8,7 @@
 import anthropic
 
 from src.config import get_settings
+from src.utils.prompts import load_prompt
 
 logger = logging.getLogger(__name__)
 
@@ -401,6 +402,122 @@ async def generate_with_tools(
     return response_text
 
 
+async def generate_matchmaker_proposal(
+    name_a: str,
+    public_profile_a: str,
+    private_profile_a: str,
+    publications_a: str,
+    name_b: str,
+    public_profile_b: str,
+    private_profile_b: str,
+    publications_b: str,
+    model: str | None = None,
+) -> dict[str, Any]:
+    """
+    Generate a collaboration proposal between two PIs using their profiles.
+
+    Returns a dict with keys: proposal_md, title, confidence, input_tokens, output_tokens, model.
+    """
+    settings = get_settings()
+    model = model or settings.llm_agent_model_opus
+
+    prompt_path = "prompts/matchmaker.md"
+    system_prompt = load_prompt(prompt_path)
+    if not system_prompt:
+        raise RuntimeError(f"Matchmaker prompt not found at {prompt_path}")
+
+    user_message = f"""## PI A: {name_a}
+
+### Public Profile
+{public_profile_a}
+
+### Private Instructions (confidential — do not quote directly)
+{private_profile_a or '(none provided)'}
+
+### Recent Publications
+{publications_a or '(none available)'}
+
+---
+
+## PI B: {name_b}
+
+### Public Profile
+{public_profile_b}
+
+### Private Instructions (confidential — do not quote directly)
+{private_profile_b or '(none provided)'}
+
+### Recent Publications
+{publications_b or '(none available)'}"""
+
+    client = get_anthropic_client()
+    t0 = time.monotonic()
+    message = client.messages.create(
+        model=model,
+        max_tokens=2000,
+        system=[
+            {
+                "type": "text",
+                "text": system_prompt,
+                "cache_control": {"type": "ephemeral"},
+            }
+        ],
+        messages=[{"role": "user", "content": user_message}],
+    )
+    latency_ms = (time.monotonic() - t0) * 1000
+    logger.info(
+        "Matchmaker LLM call: model=%s input=%d output=%d latency=%.0fms",
+        model,
+        message.usage.input_tokens,
+        message.usage.output_tokens,
+        latency_ms,
+    )
+
+    response_text = message.content[0].text if message.content else ""
+
+    # Extract content inside <proposal> tags
+    proposal_md = response_text
+    if "<proposal>" in response_text and "</proposal>" in response_text:
+        start = response_text.find("<proposal>") + len("<proposal>")
+        end = response_text.find("</proposal>")
+        proposal_md = response_text[start:end].strip()
+
+    # Extract title from first heading
+    title = "Untitled Proposal"
+    for line in proposal_md.splitlines():
+        line = line.strip()
+        if line.startswith("# "):
+            title = line[2:].strip()
+            break
+
+    # Extract confidence label
+    confidence = "speculative"
+    lower = proposal_md.lower()
+    conf_line = next(
+        (ln for ln in proposal_md.splitlines() if "**confidence:**" in ln.lower()), ""
+    )
+    if conf_line:
+        if "high" in conf_line.lower():
+            confidence = "high"
+        elif "moderate" in conf_line.lower():
+            confidence = "moderate"
+        else:
+            confidence = "speculative"
+    elif "**confidence:** high" in lower:
+        confidence = "high"
+    elif "**confidence:** moderate" in lower:
+        confidence = "moderate"
+
+    return {
+        "proposal_md": proposal_md,
+        "title": title,
+        "confidence": confidence,
+        "input_tokens": message.usage.input_tokens,
+        "output_tokens": message.usage.output_tokens,
+        "model": model,
+    }
+
+
 def _default_synthesis_prompt() -> str:
     return """You are a scientific profile synthesizer. Given information about a researcher's publications, grants, and submitted texts, generate a structured JSON profile.
 
diff --git a/src/utils/prompts.py b/src/utils/prompts.py
new file mode 100644
index 0000000..a9a14ae
--- /dev/null
+++ b/src/utils/prompts.py
@@ -0,0 +1,28 @@
+"""Prompt file loader with {{include: filename}} support."""
+
+from __future__ import annotations
+
+import re
+from pathlib import Path
+
+PROMPTS_DIR = Path("prompts")
+
+
+def load_prompt(path: str | Path, default: str = "") -> str:
+    """Load a prompt file, resolving {{include: filename}} directives.
+
+    Include paths are resolved relative to the prompts/ directory.
+    """
+    try:
+        text = Path(path).read_text(encoding="utf-8")
+    except FileNotFoundError:
+        return default
+
+    def _resolve(match: re.Match) -> str:
+        included_path = PROMPTS_DIR / match.group(1).strip()
+        try:
+            return included_path.read_text(encoding="utf-8")
+        except FileNotFoundError:
+            return f"[include not found: {included_path}]"
+
+    return re.sub(r"\{\{include:\s*(.+?)\}\}", _resolve, text)
diff --git a/templates/admin/discussions.html b/templates/admin/discussions.html
index 088616a..19c5ae6 100644
--- a/templates/admin/discussions.html
+++ b/templates/admin/discussions.html
@@ -90,6 +90,11 @@ <h1 class="text-2xl font-bold text-gray-900">Discussions</h1>
         <a href="/admin/discussions?run_id={{ selected_run_id }}" class="text-sm text-gray-500 hover:text-gray-700">Clear</a>
         {% endif %}
     </form>
+    <form method="POST" action="/admin/discussions/clear"
+          onsubmit="return confirm('Delete all thread decisions and proposal reviews from the database? Slack conversations are not affected.')">
+        <button type="submit"
+                class="px-3 py-1.5 bg-red-600 text-white text-sm rounded-md hover:bg-red-700">Clear All</button>
+    </form>
 </div>
 
 <!-- Threads table -->
@@ -146,7 +151,7 @@ <h1 class="text-2xl font-bold text-gray-900">Discussions</h1>
             <tr id="detail-{{ loop.index }}" class="hidden">
                 <td colspan="7" class="px-6 py-4 bg-gray-50">
                     {% if t.decision.summary_text %}
-                    <div class="text-sm text-gray-700 max-w-3xl proposal-md mb-4" data-markdown="{{ t.decision.summary_text | e }}"></div>
+                    <div class="text-sm text-gray-700 max-w-3xl proposal-md mb-4" data-markdown="{{ t.decision.summary_text | extract_proposal | e }}"></div>
                     {% endif %}
                     {% if t.reviews %}
                     <div class="border-t border-gray-200 pt-3 mt-3">
diff --git a/templates/admin/matchmaker.html b/templates/admin/matchmaker.html
new file mode 100644
index 0000000..418d251
--- /dev/null
+++ b/templates/admin/matchmaker.html
@@ -0,0 +1,178 @@
+{% extends "base.html" %}
+{% block title %}Admin — Matchmaker — CoPI{% endblock %}
+
+{% block extra_head %}
+<script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
+<style>
+    .proposal-md h1 { font-weight: 700; font-size: 1.1rem; margin-top: 0.75rem; margin-bottom: 0.25rem; }
+    .proposal-md h2 { font-weight: 600; font-size: 1rem; margin-top: 0.75rem; margin-bottom: 0.25rem; }
+    .proposal-md p { margin-bottom: 0.5rem; }
+    .proposal-md ul, .proposal-md ol { padding-left: 1.5rem; margin-bottom: 0.5rem; }
+    .proposal-md li { margin-bottom: 0.25rem; }
+    .proposal-md strong { font-weight: 600; }
+</style>
+{% endblock %}
+
+{% block content %}
+<div class="flex items-center justify-between mb-6">
+    <h1 class="text-2xl font-bold text-gray-900">Matchmaker</h1>
+</div>
+
+<!-- Generate form -->
+<div class="bg-white rounded-xl shadow-sm border border-gray-200 p-6 mb-8">
+    <h2 class="text-sm font-semibold text-gray-700 uppercase tracking-wide mb-4">Generate Proposal</h2>
+    {% if error %}
+    <div class="mb-4 px-4 py-3 rounded-lg bg-red-50 border border-red-200 text-red-700 text-sm">{{ error }}</div>
+    {% endif %}
+    <form method="POST" action="/admin/matchmaker/generate" id="generate-form"
+          class="flex items-end gap-4 flex-wrap">
+        <div class="flex flex-col gap-1">
+            <label class="text-xs text-gray-500 font-medium">PI A</label>
+            <select name="pi_a_id" id="pi_a_select" required
+                    class="text-sm border-gray-300 rounded-md shadow-sm focus:ring-indigo-500 focus:border-indigo-500 min-w-[200px]"
+                    onchange="updateDropdowns()">
+                <option value="">— select PI —</option>
+                {% for u in eligible_users %}
+                <option value="{{ u.id }}">{{ u.name }}{% if u.institution %} ({{ u.institution }}){% endif %}</option>
+                {% endfor %}
+            </select>
+        </div>
+        <div class="flex flex-col gap-1">
+            <label class="text-xs text-gray-500 font-medium">PI B</label>
+            <select name="pi_b_id" id="pi_b_select" required
+                    class="text-sm border-gray-300 rounded-md shadow-sm focus:ring-indigo-500 focus:border-indigo-500 min-w-[200px]"
+                    onchange="updateDropdowns()">
+                <option value="">— select PI —</option>
+                {% for u in eligible_users %}
+                <option value="{{ u.id }}">{{ u.name }}{% if u.institution %} ({{ u.institution }}){% endif %}</option>
+                {% endfor %}
+            </select>
+        </div>
+        <button type="submit" id="generate-btn" disabled
+                class="px-4 py-2 bg-indigo-600 text-white text-sm font-medium rounded-md hover:bg-indigo-700 disabled:opacity-40 disabled:cursor-not-allowed flex items-center gap-2">
+            <span id="btn-label">Generate Proposal</span>
+            <svg id="btn-spinner" class="hidden animate-spin h-4 w-4 text-white" xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24">
+                <circle class="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" stroke-width="4"></circle>
+                <path class="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8v8H4z"></path>
+            </svg>
+        </button>
+    </form>
+    <p class="mt-3 text-xs text-gray-400">Uses Claude Opus. Typical generation time: 10–20 seconds.</p>
+</div>
+
+<!-- Filters + table -->
+{% if proposals or pi_filter or confidence_filter %}
+
+<div class="flex items-center gap-4 mb-4">
+    <form method="GET" action="/admin/matchmaker" class="flex items-center gap-3 flex-wrap">
+        <select name="pi_filter" multiple size="4"
+                class="text-sm border-gray-300 rounded-md shadow-sm min-w-[160px]"
+                title="Hold Ctrl/Cmd to select multiple">
+            {% for u in eligible_users %}
+            <option value="{{ u.id }}" {% if u.id in pi_filter %}selected{% endif %}>{{ u.name }}</option>
+            {% endfor %}
+        </select>
+        <select name="confidence_filter"
+                class="text-sm border-gray-300 rounded-md shadow-sm">
+            <option value="">All Confidence</option>
+            <option value="high" {% if confidence_filter == 'high' %}selected{% endif %}>High</option>
+            <option value="moderate" {% if confidence_filter == 'moderate' %}selected{% endif %}>Moderate</option>
+            <option value="speculative" {% if confidence_filter == 'speculative' %}selected{% endif %}>Speculative</option>
+        </select>
+        <button type="submit"
+                class="px-3 py-1.5 bg-indigo-600 text-white text-sm rounded-md hover:bg-indigo-700">Filter</button>
+        <button type="submit" name="export" value="true"
+                class="px-3 py-1.5 bg-gray-600 text-white text-sm rounded-md hover:bg-gray-700">Export TXT</button>
+        <button type="submit" name="export" value="html"
+                class="px-3 py-1.5 bg-gray-600 text-white text-sm rounded-md hover:bg-gray-700">Export HTML</button>
+        {% if pi_filter or confidence_filter %}
+        <a href="/admin/matchmaker" class="text-sm text-gray-500 hover:text-gray-700">Clear</a>
+        {% endif %}
+    </form>
+    <form method="POST" action="/admin/matchmaker/clear"
+          onsubmit="return confirm('Delete all matchmaker proposals from the database? Slack conversations are not affected.')">
+        <button type="submit"
+                class="px-3 py-1.5 bg-red-600 text-white text-sm rounded-md hover:bg-red-700">Clear All</button>
+    </form>
+</div>
+
+<div class="bg-white rounded-xl shadow-sm border border-gray-200 overflow-hidden">
+    <table class="min-w-full divide-y divide-gray-200">
+        <thead class="bg-gray-50">
+            <tr>
+                <th class="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">Confidence</th>
+                <th class="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">PI A</th>
+                <th class="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">PI B</th>
+                <th class="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">Title</th>
+                <th class="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">Generated</th>
+                <th class="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">Actions</th>
+            </tr>
+        </thead>
+        <tbody class="divide-y divide-gray-100">
+            {% for p in proposals %}
+            {% set conf_meta = {
+                'high':       {'label': 'High',       'color': 'green'},
+                'moderate':   {'label': 'Moderate',   'color': 'yellow'},
+                'speculative':{'label': 'Speculative','color': 'gray'},
+            }.get(p.confidence, {'label': p.confidence | capitalize, 'color': 'gray'}) %}
+            <tr class="hover:bg-gray-50 cursor-pointer" onclick="window.location='/admin/matchmaker/{{ p.id }}'">
+                <td class="px-4 py-3">
+                    <span class="px-2 py-0.5 rounded-full text-xs bg-{{ conf_meta.color }}-100 text-{{ conf_meta.color }}-700">
+                        {{ conf_meta.label }}
+                    </span>
+                </td>
+                <td class="px-4 py-3 text-sm font-medium">{{ p.name_a }}</td>
+                <td class="px-4 py-3 text-sm font-medium">{{ p.name_b }}</td>
+                <td class="px-4 py-3 text-sm text-gray-700 max-w-xs truncate">{{ p.title }}</td>
+                <td class="px-4 py-3 text-sm text-gray-500">
+                    <span data-utc="{{ p.generated_at.isoformat() }}" data-utc-fmt="short">
+                        {{ p.generated_at.strftime('%b %d %H:%M') }}
+                    </span>
+                </td>
+                <td class="px-4 py-3 text-sm" onclick="event.stopPropagation()">
+                    <a href="/admin/matchmaker/{{ p.id }}" class="text-indigo-600 hover:text-indigo-800 mr-3">View</a>
+                    <form method="POST" action="/admin/matchmaker/{{ p.id }}/delete" class="inline"
+                          onsubmit="return confirm('Delete this proposal?')">
+                        <button type="submit" class="text-red-500 hover:text-red-700">Delete</button>
+                    </form>
+                </td>
+            </tr>
+            {% else %}
+            <tr>
+                <td colspan="6" class="px-4 py-8 text-center text-gray-400 text-sm">No proposals yet</td>
+            </tr>
+            {% endfor %}
+        </tbody>
+    </table>
+</div>
+
+<div class="mt-4 text-sm text-gray-400">
+    {{ proposals | length }} proposal{{ 's' if proposals | length != 1 }}
+    {% if pi_filter or confidence_filter %}(filtered){% endif %}
+</div>
+
+{% else %}
+<p class="text-gray-400 text-sm">No proposals generated yet. Use the form above to create one.</p>
+{% endif %}
+
+{% endblock %}
+
+{% block scripts %}
+<script>
+function updateDropdowns() {
+    const aVal = document.getElementById('pi_a_select').value;
+    const bVal = document.getElementById('pi_b_select').value;
+    const btn = document.getElementById('generate-btn');
+    btn.disabled = !(aVal && bVal && aVal !== bVal);
+}
+
+document.getElementById('generate-form').addEventListener('submit', function() {
+    const btn = document.getElementById('generate-btn');
+    const label = document.getElementById('btn-label');
+    const spinner = document.getElementById('btn-spinner');
+    btn.disabled = true;
+    label.textContent = 'Generating…';
+    spinner.classList.remove('hidden');
+});
+</script>
+{% endblock %}
diff --git a/templates/admin/matchmaker_detail.html b/templates/admin/matchmaker_detail.html
new file mode 100644
index 0000000..c849f61
--- /dev/null
+++ b/templates/admin/matchmaker_detail.html
@@ -0,0 +1,79 @@
+{% extends "base.html" %}
+{% block title %}Admin — Matchmaker — {{ proposal.title | truncate(60) }} — CoPI{% endblock %}
+
+{% block extra_head %}
+<script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
+<style>
+    .proposal-md h1 { font-weight: 700; font-size: 1.15rem; margin-top: 1rem; margin-bottom: 0.35rem; }
+    .proposal-md h2 { font-weight: 600; font-size: 1rem; margin-top: 0.85rem; margin-bottom: 0.25rem; border-bottom: 1px solid #e5e7eb; padding-bottom: 0.2rem; }
+    .proposal-md p { margin-bottom: 0.6rem; line-height: 1.6; }
+    .proposal-md ul, .proposal-md ol { padding-left: 1.5rem; margin-bottom: 0.6rem; }
+    .proposal-md li { margin-bottom: 0.3rem; line-height: 1.5; }
+    .proposal-md strong { font-weight: 600; }
+</style>
+{% endblock %}
+
+{% block content %}
+
+<!-- Back link -->
+<div class="mb-5">
+    <a href="/admin/matchmaker" class="text-sm text-indigo-600 hover:text-indigo-800">&larr; Back to Matchmaker</a>
+</div>
+
+<!-- Header card -->
+<div class="bg-white rounded-xl shadow-sm border border-gray-200 p-6 mb-6">
+    <div class="flex items-start justify-between gap-4">
+        <div>
+            <h1 class="text-xl font-bold text-gray-900 mb-2">{{ proposal.title }}</h1>
+            <div class="flex items-center gap-4 text-sm text-gray-600 flex-wrap">
+                <span class="font-medium">{{ proposal.name_a }}</span>
+                <span class="text-gray-400">&times;</span>
+                <span class="font-medium">{{ proposal.name_b }}</span>
+                <span class="text-gray-300">|</span>
+                {% set conf_meta = {
+                    'high':       {'label': 'High',       'color': 'green'},
+                    'moderate':   {'label': 'Moderate',   'color': 'yellow'},
+                    'speculative':{'label': 'Speculative','color': 'gray'},
+                }.get(proposal.confidence, {'label': proposal.confidence | capitalize, 'color': 'gray'}) %}
+                <span class="px-2 py-0.5 rounded-full text-xs font-medium bg-{{ conf_meta.color }}-100 text-{{ conf_meta.color }}-700">
+                    {{ conf_meta.label }} Confidence
+                </span>
+                <span class="text-gray-400">|</span>
+                <span class="text-gray-400">
+                    Generated <span data-utc="{{ proposal.generated_at.isoformat() }}" data-utc-fmt="short">{{ proposal.generated_at.strftime('%b %d, %Y %H:%M') }}</span>
+                </span>
+            </div>
+            {% if proposal.input_tokens %}
+            <div class="mt-2 text-xs text-gray-400">
+                {{ proposal.llm_model }} &bull; {{ proposal.input_tokens | int }} in / {{ proposal.output_tokens | int }} out tokens
+            </div>
+            {% endif %}
+        </div>
+        <form method="POST" action="/admin/matchmaker/{{ proposal.id }}/delete"
+              onsubmit="return confirm('Delete this proposal?')" class="shrink-0">
+            <button type="submit"
+                    class="px-3 py-1.5 text-sm text-red-600 border border-red-200 rounded-md hover:bg-red-50">
+                Delete
+            </button>
+        </form>
+    </div>
+</div>
+
+<!-- Proposal body -->
+<div class="bg-white rounded-xl shadow-sm border border-gray-200 p-8">
+    <div class="proposal-md text-gray-800 max-w-3xl"
+         data-markdown="{{ proposal.proposal_md | extract_proposal | e }}"></div>
+</div>
+
+{% endblock %}
+
+{% block scripts %}
+<script>
+document.addEventListener('DOMContentLoaded', function() {
+    document.querySelectorAll('[data-markdown]').forEach(function(el) {
+        var md = el.getAttribute('data-markdown');
+        if (md) { el.innerHTML = marked.parse(md); }
+    });
+});
+</script>
+{% endblock %}
diff --git a/templates/admin/matchmaker_export.html b/templates/admin/matchmaker_export.html
new file mode 100644
index 0000000..23e6488
--- /dev/null
+++ b/templates/admin/matchmaker_export.html
@@ -0,0 +1,57 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <title>CoPI — Matchmaker Proposals Export</title>
+    <script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
+    <style>
+        body { font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; max-width: 800px; margin: 2rem auto; padding: 0 1rem; color: #1f2937; }
+        h1 { font-size: 1.5rem; border-bottom: 2px solid #e5e7eb; padding-bottom: 0.5rem; margin-bottom: 2rem; }
+        .proposal { margin-bottom: 2.5rem; page-break-inside: avoid; }
+        .proposal-header { background: #f9fafb; border: 1px solid #e5e7eb; border-radius: 8px; padding: 0.75rem 1rem; margin-bottom: 1rem; font-size: 0.875rem; color: #6b7280; }
+        .proposal-header strong { color: #1f2937; }
+        .proposal-title { font-size: 1rem; font-weight: 600; color: #1f2937; margin-bottom: 0.25rem; }
+        .proposal-body { font-size: 0.9375rem; line-height: 1.6; }
+        .proposal-body h1 { font-weight: 700; font-size: 1.1rem; margin-top: 0.75rem; margin-bottom: 0.25rem; }
+        .proposal-body h2 { font-weight: 600; font-size: 1rem; margin-top: 0.75rem; margin-bottom: 0.25rem; }
+        .proposal-body h3 { font-size: 0.95rem; margin-top: 0.5rem; margin-bottom: 0.25rem; }
+        .proposal-body p { margin-bottom: 0.5rem; }
+        .proposal-body ul, .proposal-body ol { padding-left: 1.5rem; margin-bottom: 0.5rem; }
+        .proposal-body li { margin-bottom: 0.25rem; }
+        .proposal-body a { color: #4f46e5; }
+        .proposal-body strong { font-weight: 600; }
+        hr { border: none; border-top: 1px solid #e5e7eb; margin: 2rem 0; }
+        .footer { text-align: center; font-size: 0.75rem; color: #9ca3af; margin-top: 3rem; }
+        @media print { body { margin: 0; } .proposal { page-break-inside: avoid; } }
+    </style>
+</head>
+<body>
+    <h1>CoPI — Matchmaker Proposals</h1>
+
+    {% for p in proposals %}
+    {% if not loop.first %}<hr>{% endif %}
+    <div class="proposal">
+        <div class="proposal-header">
+            {% if p.title %}<div class="proposal-title">{{ p.title }}</div>{% endif %}
+            <strong>{{ p.name_a }}</strong> + <strong>{{ p.name_b }}</strong>
+            &nbsp;&middot;&nbsp; {{ p.confidence | capitalize }}
+            &nbsp;&middot;&nbsp; {{ p.date }}
+        </div>
+        <div class="proposal-body" data-markdown="{{ p.proposal_md | e }}"></div>
+    </div>
+    {% else %}
+    <p style="color: #9ca3af;">No proposals found with current filters.</p>
+    {% endfor %}
+
+    <div class="footer">
+        Exported from CoPI &mdash; {{ proposals | length }} proposal{{ 's' if proposals | length != 1 }}
+    </div>
+
+    <script>
+    document.querySelectorAll('[data-markdown]').forEach(function(el) {
+        var md = el.getAttribute('data-markdown');
+        if (md) { el.innerHTML = marked.parse(md); }
+    });
+    </script>
+</body>
+</html>
diff --git a/templates/admin/podcast.html b/templates/admin/podcast.html
new file mode 100644
index 0000000..c79ebfb
--- /dev/null
+++ b/templates/admin/podcast.html
@@ -0,0 +1,175 @@
+{% extends "base.html" %}
+{% block title %}Admin — Podcast — CoPI{% endblock %}
+
+{% block content %}
+<div class="flex items-center justify-between mb-6">
+    <h1 class="text-2xl font-bold text-gray-900">Podcast Episodes</h1>
+    <span class="text-sm text-gray-500">{{ total }} total episodes</span>
+</div>
+
+<!-- Summary cards -->
+<div class="grid grid-cols-3 gap-4 mb-8">
+    <div class="bg-white rounded-xl border border-gray-200 p-4 text-center">
+        <div class="text-3xl font-bold text-indigo-600">{{ total }}</div>
+        <div class="text-sm text-gray-500 mt-1">Total Episodes</div>
+    </div>
+    <div class="bg-white rounded-xl border border-gray-200 p-4 text-center">
+        <div class="text-3xl font-bold text-green-600">{{ with_audio }}</div>
+        <div class="text-sm text-gray-500 mt-1">With Audio</div>
+    </div>
+    <div class="bg-white rounded-xl border border-gray-200 p-4 text-center">
+        <div class="text-3xl font-bold text-blue-600">{{ slack_delivered }}</div>
+        <div class="text-sm text-gray-500 mt-1">Slack Delivered</div>
+    </div>
+</div>
+
+<!-- Filters -->
+<div class="bg-white rounded-lg border border-gray-200 p-4 mb-4 flex flex-wrap gap-4 items-end">
+    <div>
+        <label class="text-xs font-medium text-gray-600 block mb-1">Agent</label>
+        <select onchange="applyFilter()" id="agent-filter"
+                class="border border-gray-300 rounded px-2 py-1 text-sm">
+            <option value="">All agents</option>
+            {% for aid in agent_ids %}
+            <option value="{{ aid }}" {% if agent_filter == aid %}selected{% endif %}>{{ aid }}</option>
+            {% endfor %}
+        </select>
+    </div>
+    {% if agent_filter %}
+    <div>
+        <a href="/admin/podcast" class="text-sm text-indigo-600 hover:underline">Clear filter</a>
+    </div>
+    <div>
+        <a href="{{ base_url }}/podcast/{{ agent_filter }}/feed.xml" target="_blank"
+           class="inline-flex items-center gap-1 text-sm text-orange-600 hover:underline">
+            RSS Feed ↗
+        </a>
+    </div>
+    <div>
+        <a href="/agent/{{ agent_filter }}/podcast-settings"
+           class="inline-flex items-center gap-1 text-sm text-indigo-600 hover:underline">
+            Podcast Settings ↗
+        </a>
+    </div>
+    {% endif %}
+</div>
+
+<!-- Preferences summary (visible when an agent is selected) -->
+{% if agent_filter %}
+{% set prefs = prefs_by_agent.get(agent_filter) %}
+<div class="bg-white rounded-lg border border-gray-200 p-4 mb-4">
+    <div class="flex items-center justify-between mb-2">
+        <h3 class="text-sm font-medium text-gray-700">Podcast Preferences — {{ agent_filter }}</h3>
+        <a href="/agent/{{ agent_filter }}/podcast-settings"
+           class="text-xs text-indigo-600 hover:underline">Edit ↗</a>
+    </div>
+    {% if prefs %}
+    <dl class="grid grid-cols-3 gap-4 text-sm">
+        <div>
+            <dt class="text-xs font-medium text-gray-500 uppercase mb-0.5">Voice</dt>
+            <dd class="text-gray-800">
+                {% if prefs.voice_id %}{{ prefs.voice_id }}{% else %}<span class="text-gray-400">system default</span>{% endif %}
+            </dd>
+        </div>
+        <div>
+            <dt class="text-xs font-medium text-gray-500 uppercase mb-0.5">Extra Keywords</dt>
+            <dd class="text-gray-800">
+                {% if prefs.extra_keywords %}
+                <span class="text-gray-600">{{ prefs.extra_keywords | length }} term(s)</span>
+                <div class="text-xs text-gray-400 mt-0.5">{{ prefs.extra_keywords | join(', ') }}</div>
+                {% else %}
+                <span class="text-gray-400">none</span>
+                {% endif %}
+            </dd>
+        </div>
+        <div>
+            <dt class="text-xs font-medium text-gray-500 uppercase mb-0.5">Source Preferences</dt>
+            <dd class="text-gray-800 text-xs">
+                {% if prefs.preferred_journals %}
+                <div class="text-green-700 mb-0.5">▲ {{ prefs.preferred_journals | join(', ') }}</div>
+                {% endif %}
+                {% if prefs.deprioritized_journals %}
+                <div class="text-amber-700">▼ {{ prefs.deprioritized_journals | join(', ') }}</div>
+                {% endif %}
+                {% if not prefs.preferred_journals and not prefs.deprioritized_journals %}
+                <span class="text-gray-400">none</span>
+                {% endif %}
+            </dd>
+        </div>
+    </dl>
+    {% else %}
+    <p class="text-sm text-gray-400">No preferences configured for this agent.
+        <a href="/agent/{{ agent_filter }}/podcast-settings" class="text-indigo-600 hover:underline">Set up preferences ↗</a>
+    </p>
+    {% endif %}
+</div>
+{% endif %}
+
+<!-- Episodes table -->
+<div class="bg-white rounded-xl shadow-sm border border-gray-200 overflow-hidden">
+    <table class="min-w-full divide-y divide-gray-200">
+        <thead class="bg-gray-50">
+            <tr>
+                <th class="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">Date</th>
+                <th class="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">Agent</th>
+                <th class="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">Paper</th>
+                <th class="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">PMID</th>
+                <th class="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">Audio</th>
+                <th class="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">Slack</th>
+            </tr>
+        </thead>
+        <tbody class="divide-y divide-gray-100">
+            {% for ep in episodes %}
+            <tr class="hover:bg-gray-50">
+                <td class="px-4 py-3 text-sm text-gray-700 whitespace-nowrap">{{ ep.episode_date }}</td>
+                <td class="px-4 py-3 text-sm">
+                    <span class="font-mono text-xs bg-gray-100 px-1.5 py-0.5 rounded">{{ ep.agent_id }}</span>
+                </td>
+                <td class="px-4 py-3 text-sm max-w-xs">
+                    <div class="font-medium text-gray-800 truncate" title="{{ ep.paper_title }}">{{ ep.paper_title }}</div>
+                    <div class="text-xs text-gray-500 mt-0.5">{{ ep.paper_journal }} · {{ ep.paper_year }}</div>
+                    {% if ep.selection_justification %}
+                    <div class="text-xs text-gray-400 mt-0.5 italic truncate" title="{{ ep.selection_justification }}">{{ ep.selection_justification }}</div>
+                    {% endif %}
+                </td>
+                <td class="px-4 py-3 text-sm">
+                    <a href="{{ ep.paper_url or 'https://pubmed.ncbi.nlm.nih.gov/' ~ ep.pmid ~ '/' }}" target="_blank"
+                       class="text-indigo-600 hover:underline font-mono text-xs">{{ ep.pmid }}</a>
+                </td>
+                <td class="px-4 py-3 text-sm">
+                    {% if ep.audio_file_path %}
+                    <span class="text-green-600 text-xs">✓
+                        {% if ep.audio_duration_seconds %}
+                        {{ ep.audio_duration_seconds // 60 }}:{{ '%02d' % (ep.audio_duration_seconds % 60) }}
+                        {% endif %}
+                    </span>
+                    {% else %}
+                    <span class="text-gray-400 text-xs">—</span>
+                    {% endif %}
+                </td>
+                <td class="px-4 py-3 text-sm">
+                    {% if ep.slack_delivered %}
+                    <span class="text-green-600 text-xs">✓</span>
+                    {% else %}
+                    <span class="text-gray-400 text-xs">—</span>
+                    {% endif %}
+                </td>
+            </tr>
+            {% else %}
+            <tr>
+                <td colspan="6" class="px-4 py-8 text-center text-gray-400 text-sm">No podcast episodes yet</td>
+            </tr>
+            {% endfor %}
+        </tbody>
+    </table>
+</div>
+
+<script>
+function applyFilter() {
+    const agent = document.getElementById('agent-filter').value;
+    const params = new URLSearchParams();
+    if (agent) params.set('agent_filter', agent);
+    location.href = '/admin/podcast' + (params.toString() ? '?' + params.toString() : '');
+}
+</script>
+{% endblock %}
diff --git a/templates/agent/dashboard.html b/templates/agent/dashboard.html
index fcc73dd..5614cd9 100644
--- a/templates/agent/dashboard.html
+++ b/templates/agent/dashboard.html
@@ -222,6 +222,13 @@ <h2 class="text-lg font-semibold text-gray-800 mb-4">Reviewed Proposals</h2>
             <p class="text-sm text-gray-500">View and edit your agent's private behavioral profile.</p>
         </a>
 
+        <!-- Podcast settings -->
+        <a href="/agent/{{ agent.agent_id }}/podcast-settings"
+           class="bg-white rounded-xl border border-gray-200 p-5 hover:border-indigo-300 transition">
+            <div class="text-lg font-semibold text-gray-800 mb-1">Podcast Settings</div>
+            <p class="text-sm text-gray-500">Customize your daily research briefing: voice, extra keywords, and journal preferences.</p>
+        </a>
+
         <!-- Slack -->
         <a href="{{ slack_invite_url }}" target="_blank"
            class="bg-white rounded-xl border border-gray-200 p-5 hover:border-indigo-300 transition">
diff --git a/templates/agent/podcast_settings.html b/templates/agent/podcast_settings.html
new file mode 100644
index 0000000..70117f6
--- /dev/null
+++ b/templates/agent/podcast_settings.html
@@ -0,0 +1,103 @@
+{% extends "base.html" %}
+{% block title %}Podcast Settings — {{ agent.bot_name }} — CoPI{% endblock %}
+
+{% block content %}
+<div class="max-w-2xl mx-auto">
+    <div class="flex items-center justify-between mb-6">
+        <div>
+            <a href="/agent/{{ agent.agent_id }}/dashboard" class="text-sm text-gray-500 hover:text-gray-700">&larr; My Agent</a>
+            <h1 class="text-2xl font-bold text-gray-900 mt-1">Podcast Settings</h1>
+            <p class="text-sm text-gray-500">Customize your daily research briefing for {{ agent.bot_name }}</p>
+        </div>
+    </div>
+
+    {% if saved %}
+    <div class="bg-green-50 border border-green-200 rounded-lg p-3 mb-6 text-sm text-green-700">
+        Preferences saved successfully.
+    </div>
+    {% endif %}
+
+    <form method="POST" action="/agent/{{ agent.agent_id }}/podcast-settings">
+
+        <!-- Voice selection -->
+        <div class="bg-white rounded-xl shadow-sm border border-gray-200 p-6 mb-4">
+            <h2 class="text-base font-semibold text-gray-800 mb-1">Voice</h2>
+            <p class="text-sm text-gray-500 mb-4">
+                Select the text-to-speech voice used for your audio episodes.
+                Voices are from Mistral AI's <code class="text-xs bg-gray-100 px-1 rounded">voxtral-mini-tts-latest</code> model.
+            </p>
+            <select name="voice_id"
+                    class="w-full border border-gray-300 rounded-lg px-3 py-2 text-sm focus:ring-indigo-500 focus:border-indigo-500">
+                <option value="" {% if not prefs or not prefs.voice_id %}selected{% endif %}>
+                    Default (system setting)
+                </option>
+                {% for voice_id, voice_label in voices %}
+                <option value="{{ voice_id }}"
+                        {% if prefs and prefs.voice_id == voice_id %}selected{% endif %}>
+                    {{ voice_label }}
+                </option>
+                {% endfor %}
+            </select>
+        </div>
+
+        <!-- Extra search keywords -->
+        <div class="bg-white rounded-xl shadow-sm border border-gray-200 p-6 mb-4">
+            <h2 class="text-base font-semibold text-gray-800 mb-1">Extra Search Keywords</h2>
+            <p class="text-sm text-gray-500 mb-4">
+                Additional terms to include in the daily literature search, beyond what is
+                auto-extracted from your lab profile. One keyword or phrase per line (max 20).
+                These are added as quoted PubMed search terms.
+            </p>
+            <textarea name="extra_keywords_raw" rows="6"
+                      placeholder="insulin receptor substrate&#10;adipose tissue browning&#10;mitochondrial fission"
+                      class="w-full text-sm border border-gray-300 rounded-lg px-3 py-2 focus:ring-indigo-500 focus:border-indigo-500 font-mono">{% if prefs and prefs.extra_keywords %}{{ prefs.extra_keywords | join('\n') }}{% endif %}</textarea>
+            <p class="text-xs text-gray-400 mt-1">
+                {% if prefs and prefs.extra_keywords %}{{ prefs.extra_keywords | length }} keyword(s) saved.{% else %}No extra keywords set.{% endif %}
+            </p>
+        </div>
+
+        <!-- Source preferences -->
+        <div class="bg-white rounded-xl shadow-sm border border-gray-200 p-6 mb-4">
+            <h2 class="text-base font-semibold text-gray-800 mb-1">Source Preferences</h2>
+            <p class="text-sm text-gray-500 mb-5">
+                Guide the article selection by telling the AI which journals or preprint servers
+                to prioritize or avoid. One source per line (or comma-separated).
+            </p>
+
+            <div class="mb-4">
+                <label class="block text-sm font-medium text-gray-700 mb-1">
+                    Preferred sources
+                    <span class="font-normal text-gray-400 ml-1">— given extra weight when relevance is comparable</span>
+                </label>
+                <textarea name="preferred_journals_raw" rows="4"
+                          placeholder="Nature Methods&#10;Cell Systems&#10;eLife&#10;bioRxiv"
+                          class="w-full text-sm border border-gray-300 rounded-lg px-3 py-2 focus:ring-indigo-500 focus:border-indigo-500 font-mono">{% if prefs and prefs.preferred_journals %}{{ prefs.preferred_journals | join('\n') }}{% endif %}</textarea>
+            </div>
+
+            <div>
+                <label class="block text-sm font-medium text-gray-700 mb-1">
+                    Deprioritized sources
+                    <span class="font-normal text-gray-400 ml-1">— skipped unless exceptionally relevant</span>
+                </label>
+                <textarea name="deprioritized_journals_raw" rows="4"
+                          placeholder="Frontiers in Neuroscience&#10;PLOS ONE"
+                          class="w-full text-sm border border-gray-300 rounded-lg px-3 py-2 focus:ring-indigo-500 focus:border-indigo-500 font-mono">{% if prefs and prefs.deprioritized_journals %}{{ prefs.deprioritized_journals | join('\n') }}{% endif %}</textarea>
+            </div>
+        </div>
+
+        <div class="flex items-center gap-3">
+            <button type="submit"
+                    class="px-5 py-2 bg-indigo-600 text-white text-sm font-medium rounded-lg hover:bg-indigo-700">
+                Save Preferences
+            </button>
+            <a href="/agent/{{ agent.agent_id }}/dashboard"
+               class="px-5 py-2 bg-gray-100 text-gray-700 text-sm font-medium rounded-lg hover:bg-gray-200">
+                Cancel
+            </a>
+        </div>
+        <p class="mt-3 text-xs text-gray-400">
+            Changes take effect on the next scheduled podcast run (daily at 9am UTC).
+        </p>
+    </form>
+</div>
+{% endblock %}
diff --git a/templates/base.html b/templates/base.html
index 74db818..4d15b05 100644
--- a/templates/base.html
+++ b/templates/base.html
@@ -85,7 +85,9 @@
             <a href="/admin/jobs" class="{% if active_admin == 'jobs' %}text-indigo-600 font-semibold{% else %}text-gray-500 hover:text-gray-700{% endif %}">Jobs</a>
             <a href="/admin/activity" class="{% if active_admin == 'activity' %}text-indigo-600 font-semibold{% else %}text-gray-500 hover:text-gray-700{% endif %}">Activity</a>
             <a href="/admin/discussions" class="{% if active_admin == 'discussions' %}text-indigo-600 font-semibold{% else %}text-gray-500 hover:text-gray-700{% endif %}">Discussions</a>
+            <a href="/admin/matchmaker" class="{% if active_admin == 'matchmaker' %}text-indigo-600 font-semibold{% else %}text-gray-500 hover:text-gray-700{% endif %}">Matchmaker</a>
             <a href="/admin/agents" class="{% if active_admin == 'agents' %}text-indigo-600 font-semibold{% else %}text-gray-500 hover:text-gray-700{% endif %}">Agents</a>
+            <a href="/admin/podcast" class="{% if active_admin == 'podcast' %}text-indigo-600 font-semibold{% else %}text-gray-500 hover:text-gray-700{% endif %}">Podcast</a>
             <a href="/admin/access-requests" class="{% if active_admin == 'access' %}text-indigo-600 font-semibold{% else %}text-gray-500 hover:text-gray-700{% endif %}">Access</a>
             <a href="/admin/waitlist" class="{% if active_admin == 'waitlist' %}text-indigo-600 font-semibold{% else %}text-gray-500 hover:text-gray-700{% endif %}">Waitlist</a>
         </div>
diff --git a/templates/podcast_settings.html b/templates/podcast_settings.html
new file mode 100644
index 0000000..59f8a1b
--- /dev/null
+++ b/templates/podcast_settings.html
@@ -0,0 +1,155 @@
+{% extends "base.html" %}
+{% block title %}Podcast Settings — CoPI{% endblock %}
+
+{% block content %}
+<div class="max-w-2xl mx-auto">
+    <div class="flex items-center justify-between mb-6">
+        <div>
+            <a href="/profile" class="text-sm text-gray-500 hover:text-gray-700">&larr; My Profile</a>
+            <h1 class="text-2xl font-bold text-gray-900 mt-1">Podcast Settings</h1>
+            <p class="text-sm text-gray-500">Customize your daily LabBot research briefing</p>
+        </div>
+    </div>
+
+    {% if saved %}
+    <div class="bg-green-50 border border-green-200 rounded-lg p-3 mb-6 text-sm text-green-700">
+        Preferences saved successfully.
+    </div>
+    {% endif %}
+
+    <!-- Feed URL card -->
+    <div class="bg-indigo-50 border border-indigo-200 rounded-xl p-5 mb-6">
+        <h2 class="text-sm font-semibold text-indigo-800 mb-1">Your Podcast Feed URL</h2>
+        <p class="text-xs text-indigo-600 mb-3">
+            Subscribe to this URL in any podcast app (Apple Podcasts, Overcast, Pocket Casts, etc.)
+            to receive audio episodes automatically. The URL is stable and does not require login.
+        </p>
+        <div class="flex items-center gap-2">
+            <input type="text" readonly value="{{ feed_url }}"
+                   id="feed-url-input"
+                   class="flex-1 text-xs bg-white border border-indigo-300 rounded-lg px-3 py-2 font-mono text-gray-700 focus:outline-none" />
+            <button type="button"
+                    onclick="navigator.clipboard.writeText(document.getElementById('feed-url-input').value).then(() => { this.textContent = 'Copied!'; setTimeout(() => this.textContent = 'Copy', 1500); })"
+                    class="px-3 py-2 bg-indigo-600 text-white text-xs font-medium rounded-lg hover:bg-indigo-700 whitespace-nowrap">
+                Copy
+            </button>
+        </div>
+        <p class="text-xs text-indigo-500 mt-2">
+            New episodes are generated daily at 9am UTC. You can also
+            <button type="button" id="generate-btn"
+                    onclick="triggerGenerate()"
+                    class="underline hover:text-indigo-700">generate one now</button>.
+        </p>
+        <p id="generate-status" class="text-xs text-indigo-700 mt-1 hidden"></p>
+    </div>
+
+    <form method="POST" action="/podcast/settings">
+
+        <!-- Voice selection -->
+        <div class="bg-white rounded-xl shadow-sm border border-gray-200 p-6 mb-4">
+            <h2 class="text-base font-semibold text-gray-800 mb-1">Voice</h2>
+            <p class="text-sm text-gray-500 mb-4">
+                Select the text-to-speech voice used for your audio episodes.
+                Voices are from Mistral AI's <code class="text-xs bg-gray-100 px-1 rounded">voxtral-mini-tts-latest</code> model.
+            </p>
+            <select name="voice_id"
+                    class="w-full border border-gray-300 rounded-lg px-3 py-2 text-sm focus:ring-indigo-500 focus:border-indigo-500">
+                <option value="" {% if not prefs or not prefs.voice_id %}selected{% endif %}>
+                    Default (system setting)
+                </option>
+                {% for voice_id, voice_label in voices %}
+                <option value="{{ voice_id }}"
+                        {% if prefs and prefs.voice_id == voice_id %}selected{% endif %}>
+                    {{ voice_label }}
+                </option>
+                {% endfor %}
+            </select>
+        </div>
+
+        <!-- Extra search keywords -->
+        <div class="bg-white rounded-xl shadow-sm border border-gray-200 p-6 mb-4">
+            <h2 class="text-base font-semibold text-gray-800 mb-1">Extra Search Keywords</h2>
+            <p class="text-sm text-gray-500 mb-4">
+                Additional terms to include in the daily literature search, beyond what is
+                auto-extracted from your profile. One keyword or phrase per line (max 20).
+                These are added as quoted PubMed search terms.
+            </p>
+            <textarea name="extra_keywords_raw" rows="6"
+                      placeholder="insulin receptor substrate&#10;adipose tissue browning&#10;mitochondrial fission"
+                      class="w-full text-sm border border-gray-300 rounded-lg px-3 py-2 focus:ring-indigo-500 focus:border-indigo-500 font-mono">{% if prefs and prefs.extra_keywords %}{{ prefs.extra_keywords | join('\n') }}{% endif %}</textarea>
+            <p class="text-xs text-gray-400 mt-1">
+                {% if prefs and prefs.extra_keywords %}{{ prefs.extra_keywords | length }} keyword(s) saved.{% else %}No extra keywords set.{% endif %}
+            </p>
+        </div>
+
+        <!-- Source preferences -->
+        <div class="bg-white rounded-xl shadow-sm border border-gray-200 p-6 mb-4">
+            <h2 class="text-base font-semibold text-gray-800 mb-1">Source Preferences</h2>
+            <p class="text-sm text-gray-500 mb-5">
+                Guide the article selection by telling the AI which journals or preprint servers
+                to prioritize or avoid. One source per line (or comma-separated).
+            </p>
+
+            <div class="mb-4">
+                <label class="block text-sm font-medium text-gray-700 mb-1">
+                    Preferred sources
+                    <span class="font-normal text-gray-400 ml-1">— given extra weight when relevance is comparable</span>
+                </label>
+                <textarea name="preferred_journals_raw" rows="4"
+                          placeholder="Nature Methods&#10;Cell Systems&#10;eLife&#10;bioRxiv"
+                          class="w-full text-sm border border-gray-300 rounded-lg px-3 py-2 focus:ring-indigo-500 focus:border-indigo-500 font-mono">{% if prefs and prefs.preferred_journals %}{{ prefs.preferred_journals | join('\n') }}{% endif %}</textarea>
+            </div>
+
+            <div>
+                <label class="block text-sm font-medium text-gray-700 mb-1">
+                    Deprioritized sources
+                    <span class="font-normal text-gray-400 ml-1">— skipped unless exceptionally relevant</span>
+                </label>
+                <textarea name="deprioritized_journals_raw" rows="4"
+                          placeholder="Frontiers in Neuroscience&#10;PLOS ONE"
+                          class="w-full text-sm border border-gray-300 rounded-lg px-3 py-2 focus:ring-indigo-500 focus:border-indigo-500 font-mono">{% if prefs and prefs.deprioritized_journals %}{{ prefs.deprioritized_journals | join('\n') }}{% endif %}</textarea>
+            </div>
+        </div>
+
+        <div class="flex items-center gap-3">
+            <button type="submit"
+                    class="px-5 py-2 bg-indigo-600 text-white text-sm font-medium rounded-lg hover:bg-indigo-700">
+                Save Preferences
+            </button>
+            <a href="/profile"
+               class="px-5 py-2 bg-gray-100 text-gray-700 text-sm font-medium rounded-lg hover:bg-gray-200">
+                Cancel
+            </a>
+        </div>
+        <p class="mt-3 text-xs text-gray-400">
+            Changes take effect on the next scheduled podcast run (daily at 9am UTC).
+        </p>
+    </form>
+</div>
+{% endblock %}
+
+{% block scripts %}
+<script>
+async function triggerGenerate() {
+    const btn = document.getElementById('generate-btn');
+    const status = document.getElementById('generate-status');
+    btn.disabled = true;
+    btn.textContent = 'Starting…';
+    status.classList.remove('hidden');
+    status.textContent = 'Requesting episode generation…';
+    try {
+        const resp = await fetch('/podcast/user/generate', { method: 'POST' });
+        if (resp.ok) {
+            status.textContent = 'Generation started — check your feed in a few minutes.';
+        } else {
+            const data = await resp.json().catch(() => ({}));
+            status.textContent = 'Error: ' + (data.detail || resp.statusText);
+        }
+    } catch (e) {
+        status.textContent = 'Request failed: ' + e.message;
+    }
+    btn.disabled = false;
+    btn.textContent = 'generate one now';
+}
+</script>
+{% endblock %}
diff --git a/testpairs.tsv b/testpairs.tsv
new file mode 100644
index 0000000..090f63c
--- /dev/null
+++ b/testpairs.tsv
@@ -0,0 +1,23 @@
+Briney	Ward
+Briney	Wiseman
+Cravatt	Lairson
+Cravatt	Lotz
+Cravatt	Petrascheck
+Cravatt	Wiseman
+Deniz	Grotjahn
+Deniz	Ken
+Forli	Ken
+Forli	Petrascheck
+Forli	Saez
+Grotjahn	Racki
+Grotjahn	Ward
+Grotjahn	Wiseman
+Lairson	Petrascheck
+Lairson	Su
+Lairson	Wiseman
+Lotz	Su
+Lotz	Wu
+Petrascheck	Su
+Petrascheck	Wiseman
+Saez	Wiseman
+Su	Wiseman
\ No newline at end of file
diff --git a/tests/test_podcast.py b/tests/test_podcast.py
new file mode 100644
index 0000000..76e6138
--- /dev/null
+++ b/tests/test_podcast.py
@@ -0,0 +1,343 @@
+"""Unit tests for podcast pipeline pure-logic functions and RSS builder."""
+
+import json
+import os
+import tempfile
+from datetime import date
+from pathlib import Path
+from types import SimpleNamespace
+
+import pytest
+
+from src.podcast.pubmed_search import build_queries
+from src.podcast.pipeline import (
+    _format_candidates_for_prompt,
+    _extract_section_text,
+    _build_profile_text_from_db,
+)
+from src.podcast.rss import build_feed
+from src.podcast.state import (
+    get_delivered_pmids,
+    record_delivery,
+    get_delivered_pmids_for_user,
+    record_delivery_for_user,
+)
+
+
+# ---------------------------------------------------------------------------
+# build_queries
+# ---------------------------------------------------------------------------
+
+class TestBuildQueries:
+    def test_disease_areas_produce_query(self):
+        profile = {"disease_areas": ["neurodegeneration", "Alzheimer's disease"], "techniques": [], "experimental_models": [], "keywords": []}
+        queries = build_queries(profile)
+        assert len(queries) >= 1
+        assert "neurodegeneration" in queries[0]
+
+    def test_techniques_produce_second_query(self):
+        profile = {
+            "disease_areas": ["cancer"],
+            "techniques": ["CRISPR", "flow cytometry"],
+            "experimental_models": [],
+            "keywords": [],
+        }
+        queries = build_queries(profile)
+        assert len(queries) >= 2
+        assert any("CRISPR" in q for q in queries)
+
+    def test_keywords_produce_third_query(self):
+        profile = {
+            "disease_areas": ["diabetes"],
+            "techniques": ["proteomics"],
+            "experimental_models": [],
+            "keywords": ["insulin signaling", "beta cell"],
+        }
+        queries = build_queries(profile)
+        assert len(queries) >= 3
+        assert any("insulin signaling" in q or "beta cell" in q for q in queries)
+
+    def test_empty_profile_returns_empty(self):
+        queries = build_queries({})
+        assert queries == []
+
+    def test_fallback_to_research_summary(self):
+        profile = {"research_summary": "Studying ribosome biogenesis mechanisms"}
+        queries = build_queries(profile)
+        assert len(queries) == 1
+
+    def test_queries_are_quoted_terms(self):
+        profile = {"disease_areas": ["proteostasis"], "techniques": [], "experimental_models": [], "keywords": []}
+        queries = build_queries(profile)
+        assert '"proteostasis"' in queries[0]
+
+
+# ---------------------------------------------------------------------------
+# _format_candidates_for_prompt
+# ---------------------------------------------------------------------------
+
+class TestFormatCandidates:
+    def test_numbers_candidates_from_one(self):
+        records = [
+            {"title": "Paper A", "abstract": "Abstract A", "journal": "Nature", "year": 2024},
+            {"title": "Paper B", "abstract": "Abstract B", "journal": "Science", "year": 2024},
+        ]
+        text = _format_candidates_for_prompt(records)
+        assert text.startswith("1.")
+        assert "2." in text
+
+    def test_includes_title_and_abstract(self):
+        records = [{"title": "CRISPR therapy", "abstract": "We developed a new approach.", "journal": "Cell", "year": 2025}]
+        text = _format_candidates_for_prompt(records)
+        assert "CRISPR therapy" in text
+        assert "We developed a new approach." in text
+
+    def test_truncates_long_abstract(self):
+        long_abstract = "x" * 1000
+        records = [{"title": "T", "abstract": long_abstract, "journal": "J", "year": 2024}]
+        text = _format_candidates_for_prompt(records)
+        assert len(text) < 1000  # abstract truncated to 600 chars
+
+    def test_handles_missing_fields(self):
+        records = [{"title": "Minimal record"}]
+        text = _format_candidates_for_prompt(records)
+        assert "Minimal record" in text
+        assert "No abstract" in text
+
+
+# ---------------------------------------------------------------------------
+# _extract_section_text
+# ---------------------------------------------------------------------------
+
+class TestExtractSectionText:
+    SAMPLE_MD = """## Research Summary
+We study protein folding in neurons.
+
+## Key Methods and Technologies
+- Cryo-EM
+- Mass spectrometry
+
+## Podcast Preferences
+Focus on computational tools only.
+"""
+
+    def test_extracts_research_summary(self):
+        text = _extract_section_text(self.SAMPLE_MD, "Research Summary")
+        assert "protein folding" in text
+
+    def test_extracts_podcast_preferences(self):
+        text = _extract_section_text(self.SAMPLE_MD, "Podcast Preferences")
+        assert "computational tools" in text
+
+    def test_stops_at_next_section(self):
+        text = _extract_section_text(self.SAMPLE_MD, "Research Summary")
+        assert "Cryo-EM" not in text
+
+    def test_missing_section_returns_empty(self):
+        text = _extract_section_text(self.SAMPLE_MD, "Nonexistent Section")
+        assert text == ""
+
+
+# ---------------------------------------------------------------------------
+# RSS feed builder
+# ---------------------------------------------------------------------------
+
+def _make_episode(**kwargs):
+    """Create a minimal PodcastEpisode-like object for RSS tests."""
+    defaults = dict(
+        episode_date=date(2026, 4, 10),
+        paper_title="A Great Paper",
+        paper_authors="Smith J et al.",
+        paper_journal="Nature",
+        paper_year=2026,
+        pmid="12345678",
+        paper_url=None,
+        text_summary="This paper found something important.",
+        audio_file_path=None,
+        audio_duration_seconds=None,
+        slack_delivered=True,
+        selection_justification="Highly relevant to the PI's work.",
+    )
+    defaults.update(kwargs)
+    return SimpleNamespace(**defaults)
+
+
+class TestBuildFeed:
+    # --- agent path ---
+
+    def test_returns_valid_xml_root(self):
+        xml = build_feed("Jane Smith", [], "https://example.com", agent_id="testagent")
+        assert xml.startswith("<?xml")
+        assert "<rss" in xml
+
+    def test_includes_pi_name_in_channel(self):
+        xml = build_feed("Jane Smith", [], "https://example.com", agent_id="testagent")
+        assert "Jane Smith" in xml
+
+    def test_agent_feed_url_uses_agent_id(self):
+        xml = build_feed("Jane Smith", [], "https://example.com", agent_id="testagent")
+        assert "/podcast/testagent/feed.xml" in xml
+
+    def test_single_episode_appears_in_feed(self):
+        ep = _make_episode()
+        xml = build_feed("Jane Smith", [ep], "https://example.com", agent_id="testagent")
+        assert "A Great Paper" in xml
+        assert "2026-04-10" in xml
+
+    def test_pubmed_link_used_when_no_paper_url(self):
+        ep = _make_episode(pmid="99887766", paper_url=None)
+        xml = build_feed("Jane Smith", [ep], "https://example.com", agent_id="testagent")
+        assert "pubmed.ncbi.nlm.nih.gov/99887766" in xml
+
+    def test_paper_url_overrides_pubmed_link(self):
+        ep = _make_episode(
+            pmid="biorxiv:2026.01.01.123456",
+            paper_url="https://www.biorxiv.org/content/10.1101/2026.01.01.123456v1",
+        )
+        xml = build_feed("Jane Smith", [ep], "https://example.com", agent_id="testagent")
+        assert "biorxiv.org" in xml
+        assert "pubmed.ncbi.nlm.nih.gov" not in xml
+
+    def test_audio_enclosure_when_audio_present(self, tmp_path):
+        audio_file = tmp_path / "2026-04-10.mp3"
+        audio_file.write_bytes(b"\x00" * 1000)
+        ep = _make_episode(audio_file_path=str(audio_file), audio_duration_seconds=90)
+        xml = build_feed("Jane Smith", [ep], "https://example.com", agent_id="testagent")
+        assert "<enclosure" in xml
+        assert 'type="audio/mpeg"' in xml
+        assert "<itunes:duration>1:30</itunes:duration>" in xml
+
+    def test_no_enclosure_when_no_audio(self):
+        ep = _make_episode(audio_file_path=None)
+        xml = build_feed("Jane Smith", [ep], "https://example.com", agent_id="testagent")
+        assert "<enclosure" not in xml
+
+    def test_xml_escaping_in_title(self):
+        ep = _make_episode(paper_title="Proteins & <Stuff>")
+        xml = build_feed("Jane Smith", [ep], "https://example.com", agent_id="testagent")
+        assert "Proteins &amp; &lt;Stuff&gt;" in xml
+
+    def test_empty_episodes_list(self):
+        xml = build_feed("Jane Smith", [], "https://example.com", agent_id="testagent")
+        assert "<item>" not in xml
+
+    def test_agent_guid_format(self):
+        ep = _make_episode()
+        xml = build_feed("Jane Smith", [ep], "https://example.com", agent_id="testagent")
+        assert "testagent-2026-04-10" in xml
+
+    # --- user path ---
+
+    def test_user_feed_url_uses_user_id(self):
+        uid = "11111111-2222-3333-4444-555555555555"
+        xml = build_feed("Alice Brown", [], "https://example.com", user_id=uid)
+        assert f"/podcast/users/{uid}/feed.xml" in xml
+
+    def test_user_feed_has_correct_pi_name(self):
+        uid = "11111111-2222-3333-4444-555555555555"
+        xml = build_feed("Alice Brown", [], "https://example.com", user_id=uid)
+        assert "Alice Brown" in xml
+
+    def test_user_audio_url_uses_user_path(self, tmp_path):
+        uid = "11111111-2222-3333-4444-555555555555"
+        audio_file = tmp_path / "2026-04-10.mp3"
+        audio_file.write_bytes(b"\x00" * 500)
+        ep = _make_episode(audio_file_path=str(audio_file))
+        xml = build_feed("Alice Brown", [ep], "https://example.com", user_id=uid)
+        assert f"/podcast/users/{uid}/audio/2026-04-10.mp3" in xml
+
+    def test_user_guid_format(self):
+        uid = "11111111-2222-3333-4444-555555555555"
+        ep = _make_episode()
+        xml = build_feed("Alice Brown", [ep], "https://example.com", user_id=uid)
+        assert f"user-{uid}-2026-04-10" in xml
+
+
+# ---------------------------------------------------------------------------
+# State helpers — user path
+# ---------------------------------------------------------------------------
+
+class TestUserState:
+    def test_new_user_has_empty_delivered_set(self, tmp_path, monkeypatch):
+        monkeypatch.setattr("src.podcast.state.STATE_FILE", tmp_path / "state.json")
+        result = get_delivered_pmids_for_user("user-uuid-abc")
+        assert result == set()
+
+    def test_record_and_retrieve_user_delivery(self, tmp_path, monkeypatch):
+        monkeypatch.setattr("src.podcast.state.STATE_FILE", tmp_path / "state.json")
+        record_delivery_for_user("user-uuid-abc", "12345")
+        record_delivery_for_user("user-uuid-abc", "67890")
+        result = get_delivered_pmids_for_user("user-uuid-abc")
+        assert result == {"12345", "67890"}
+
+    def test_user_and_agent_state_are_independent(self, tmp_path, monkeypatch):
+        monkeypatch.setattr("src.podcast.state.STATE_FILE", tmp_path / "state.json")
+        record_delivery("myagent", "11111")
+        record_delivery_for_user("user-uuid-abc", "22222")
+        assert get_delivered_pmids("myagent") == {"11111"}
+        assert get_delivered_pmids_for_user("user-uuid-abc") == {"22222"}
+        # no cross-contamination
+        assert "22222" not in get_delivered_pmids("myagent")
+        assert "11111" not in get_delivered_pmids_for_user("user-uuid-abc")
+
+    def test_duplicate_pmid_not_added_twice(self, tmp_path, monkeypatch):
+        monkeypatch.setattr("src.podcast.state.STATE_FILE", tmp_path / "state.json")
+        record_delivery_for_user("user-uuid-abc", "99999")
+        record_delivery_for_user("user-uuid-abc", "99999")
+        raw = json.loads((tmp_path / "state.json").read_text())
+        assert raw["users"]["user-uuid-abc"]["delivered_pmids"].count("99999") == 1
+
+    def test_atomic_write_leaves_valid_json(self, tmp_path, monkeypatch):
+        monkeypatch.setattr("src.podcast.state.STATE_FILE", tmp_path / "state.json")
+        record_delivery_for_user("u1", "aaa")
+        content = (tmp_path / "state.json").read_text()
+        parsed = json.loads(content)  # must be valid JSON
+        assert "users" in parsed
+
+
+# ---------------------------------------------------------------------------
+# _build_profile_text_from_db
+# ---------------------------------------------------------------------------
+
+class TestBuildProfileTextFromDb:
+    def _make_user(self, **kwargs):
+        defaults = dict(name="Dr. Alice", institution="MIT", department="Biology")
+        defaults.update(kwargs)
+        return SimpleNamespace(**defaults)
+
+    def _make_profile(self, **kwargs):
+        defaults = dict(
+            research_summary="We study protein aggregation.",
+            disease_areas=["Alzheimer's", "Parkinson's"],
+            techniques=["cryo-EM", "mass spectrometry"],
+            experimental_models=["mouse", "iPSC"],
+            keywords=["proteostasis", "neurodegeneration"],
+        )
+        defaults.update(kwargs)
+        return SimpleNamespace(**defaults)
+
+    def test_includes_user_name(self):
+        text = _build_profile_text_from_db(self._make_user(), self._make_profile())
+        assert "Dr. Alice" in text
+
+    def test_includes_research_summary(self):
+        text = _build_profile_text_from_db(self._make_user(), self._make_profile())
+        assert "protein aggregation" in text
+
+    def test_includes_disease_areas(self):
+        text = _build_profile_text_from_db(self._make_user(), self._make_profile())
+        assert "Alzheimer" in text
+
+    def test_includes_techniques(self):
+        text = _build_profile_text_from_db(self._make_user(), self._make_profile())
+        assert "cryo-EM" in text
+
+    def test_handles_none_fields_gracefully(self):
+        profile = self._make_profile(disease_areas=None, techniques=None, keywords=None)
+        text = _build_profile_text_from_db(self._make_user(), profile)
+        assert "protein aggregation" in text  # summary still present
+
+    def test_handles_missing_institution(self):
+        user = self._make_user(institution=None, department=None)
+        text = _build_profile_text_from_db(user, self._make_profile())
+        assert "Dr. Alice" in text