Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 17 additions & 3 deletions src/agent/loop.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1382,6 +1382,20 @@ export async function interactiveSession(
callMaxTokens = 2048; // Short plan output
callSystemPrompt = systemPrompt + '\n\n' + getPlanningPrompt();
}
// Force a final answer: withhold tools so the model must commit to text,
// either on the last turn or once the tool-call budget is spent. Without
// this, models that keep calling tools every turn hit maxTurns with no
// answer (and waste the spend). Opt-in per config.
const onFinalTurn = config.forceAnswerOnFinalTurn && loopCount === maxTurns;
const toolBudgetSpent = config.maxToolCalls != null && turnToolCalls >= config.maxToolCalls;
if ((onFinalTurn || toolBudgetSpent) && callToolDefs.length > 0) {
callToolDefs = [];
callSystemPrompt = systemPrompt + '\n\n' +
(toolBudgetSpent
? `You have used your research budget (${config.maxToolCalls} tool calls) — no more tools are available.`
: 'This is your FINAL turn — no more tools are available.') +
' Based on the research so far, output ONLY the final answer now, in the exact format requested.';
}

// ── Hallucination guard for weak models ──
// Weak / free models (nemotron-ultra, GLM-4, qwen coder, free-profile
Expand Down Expand Up @@ -1492,7 +1506,7 @@ export async function interactiveSession(
if (!hasText && !hasTools && !hasThinking) {
const EMPTY_FALLBACK_MODELS = ['nvidia/qwen3-coder-480b', 'nvidia/llama-4-maverick', 'zai/glm-5.1'];
const nextModel = EMPTY_FALLBACK_MODELS.find(m => m !== config.model && !turnFailedModels.has(m));
if (nextModel && recoveryAttempts < 2) {
if (nextModel && recoveryAttempts < 2 && !config.disableModelFallback) {
recoveryAttempts++;
turnFailedModels.add(config.model);
const oldModel = config.model;
Expand Down Expand Up @@ -1540,7 +1554,7 @@ export async function interactiveSession(
const nextModel = TOOL_USE_FALLBACK_MODELS.find(
m => m !== config.model && !turnFailedModels.has(m),
);
if (nextModel && recoveryAttempts < 2) {
if (nextModel && recoveryAttempts < 2 && !config.disableModelFallback) {
recoveryAttempts++;
turnFailedModels.add(config.model);
const oldModel = config.model;
Expand Down Expand Up @@ -2126,7 +2140,7 @@ export async function interactiveSession(
.filter(p => p.type === 'text' && typeof (p as { text?: string }).text === 'string')
.map(p => (p as { text: string }).text)
.join('');
if (shouldCheckGrounding(lastUserInput || '', assistantText)) {
if (!config.disableGroundingRetry && shouldCheckGrounding(lastUserInput || '', assistantText)) {
const gResult = await checkGrounding(lastUserInput, history, assistantText, client, {
abortSignal: abort.signal,
});
Expand Down
24 changes: 24 additions & 0 deletions src/agent/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,30 @@ export interface AgentConfig {
maxSpendUsd?: number;
/** Show user-visible harness prefetch status lines (interactive UX only). */
showPrefetchStatus?: boolean;
/**
* On the final turn, withhold tools so the model must commit to a text answer
* instead of researching until cut off. For one-shot forecasting/extraction
* callers (e.g. `franklin predict`) where some models never stop calling tools
* and would otherwise hit maxTurns with no answer.
*/
forceAnswerOnFinalTurn?: boolean;
/**
* Hard cap on total tool calls for the turn. Once reached, tools are withheld
* and the model is forced to answer from what it has. Bounds research/cost
* deterministically (a turn budget alone doesn't — a turn may have no tool).
*/
maxToolCalls?: number;
/**
* Disable Franklin's automatic model-switching (empty-response / stalled-intent
* fallbacks). One-shot callers want a clean abstain from the requested model,
* not a silent switch to a different one.
*/
disableModelFallback?: boolean;
/**
* Disable the post-response "ungrounded claims → force a tool-use retry" guard.
* It fights the forced-answer path and pollutes one-shot structured output.
*/
disableGroundingRetry?: boolean;
/** Mid-turn "research-bloat" compaction — summarizes history when a turn
* racks up many tool calls + spend, to cut input-replay cost. Default on;
* set false to disable (the desktop exposes this as a toggle). */
Expand Down
164 changes: 164 additions & 0 deletions src/commands/predict.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
/**
* `franklin predict` — Franklin prediction mode (headless).
*
* Runs ONE model as a disciplined forecaster: it researches a single real-world
* event with a tight, read-only toolset (web search, source fetch, Exa, X, live
* prediction markets, a little market data) the way a bettor would before
* putting money down — then commits to a pick with a confidence.
*
* Designed for machine callers (e.g. BlockRun Oracle): with --json it emits a
* single JSON envelope on stdout containing the model's final answer, the full
* tool-call trace (what it searched and what it found), the terminal reason and
* token usage. Human-readable streaming otherwise.
*
* franklin predict --model anthropic/claude-opus-4.8 \
* --question "Who wins the 2026 FIFA World Cup? Pick one country." --json
*/
import { interactiveSession } from '../agent/loop.js';
import type { AgentConfig, StreamEvent, StreamTurnDone } from '../agent/types.js';
import { predictionCapabilities, resetToolSessionState } from '../tools/index.js';
import { loadChain, API_URLS } from '../config.js';
import { resolveModel } from '../ui/model-picker.js';

export interface PredictOptions {
model?: string;
question?: string;
maxTurns?: string;
maxToolCalls?: string;
maxSpend?: string;
json?: boolean;
debug?: boolean;
}

const PREDICTION_SYSTEM: string[] = [
'You are a sharp, disciplined forecasting analyst — think like a professional who is about to put real money on this question.',
'Your job: predict the outcome of ONE real-world event. Before answering you MUST do research the way a bettor would:',
"1. Use web_search (and webfetch / exa tools) for the most CURRENT facts and news — today's real-world state matters far more than your training data.",
'2. Use search_prediction_markets to read the CURRENT market-implied odds (Polymarket, Kalshi, etc.) for this or a closely related question.',
'3. Weigh it: where is the consensus, where might the market be mispriced, what is your edge.',
'Budget your research: make AT MOST 4-5 focused tool calls in total. As soon as you have enough to decide, STOP calling tools and output the JSON. Do not keep researching — an answer with light research beats no answer.',
'Your FINAL message must end with EXACTLY ONE single-line minified JSON object and NOTHING after it:',
'{"pick": string, "confidence": number, "rationale": string, "analysis": string, "marketOdds": string}',
'- pick: one option from the question (a short label, e.g. a country, party, bucket, or Yes/No).',
'- confidence: your probability (0-1) that THIS pick is correct.',
'- rationale: one sharp sentence (max 22 words).',
'- analysis: 3-5 sentences citing what your research found, the strongest counter-argument, and why you still land here. No literal newlines inside the string.',
"- marketOdds: what the prediction market currently implies (e.g. 'Polymarket: France 18%'), or 'n/a' if none found.",
'Be decisive. Do not hedge with "it depends".',
];

interface TraceEntry {
tool: string;
input: string;
output: string;
isError?: boolean;
}

export async function predictCommand(options: PredictOptions): Promise<void> {
const question = options.question?.trim();
if (!question) {
process.stderr.write('predict: --question is required\n');
process.exitCode = 1;
return;
}
if (!options.model) {
process.stderr.write('predict: --model is required\n');
process.exitCode = 1;
return;
}

const chain = loadChain();
const apiUrl = API_URLS[chain];
const model = resolveModel(options.model);
const asJson = options.json !== false;

resetToolSessionState();

const agentConfig: AgentConfig = {
model,
apiUrl,
chain,
systemInstructions: PREDICTION_SYSTEM,
capabilities: predictionCapabilities,
maxTurns: options.maxTurns != null ? Number(options.maxTurns) : 8,
permissionMode: 'trust',
debug: !!options.debug,
showPrefetchStatus: false,
// Governance for one-shot forecasting: bound research by tool-call count and
// force an answer; don't silently switch models or fight a grounding retry.
// Tool budget (5) is the real research limiter; maxTurns (8) is just slack
// above it for a thinking turn + the forced-answer turn.
forceAnswerOnFinalTurn: true,
maxToolCalls: options.maxToolCalls != null ? Number(options.maxToolCalls) : 6,
disableModelFallback: true,
disableGroundingRetry: true,
...(options.maxSpend != null ? { maxSpendUsd: Number(options.maxSpend) } : {}),
};

let finalText = '';
let turnReason: StreamTurnDone['reason'] = 'completed';
let turnError: string | undefined;
let inputTokens = 0;
let outputTokens = 0;
const trace: TraceEntry[] = [];
const nameById = new Map<string, string>();
const inputById = new Map<string, string>();
const previewById = new Map<string, string>();

let delivered = false;
const getInput = async (): Promise<string | null> => {
if (delivered) return null;
delivered = true;
return question;
};

await interactiveSession(agentConfig, getInput, (event: StreamEvent) => {
switch (event.kind) {
case 'text_delta':
finalText += event.text;
if (!asJson) process.stdout.write(event.text);
break;
case 'capability_start':
nameById.set(event.id, event.name);
inputById.set(event.id, '');
if (event.preview) previewById.set(event.id, event.preview);
if (!asJson) process.stderr.write(`\n · ${event.name}${event.preview ? ` ${event.preview}` : ''}\n`);
break;
case 'capability_input_delta':
inputById.set(event.id, (inputById.get(event.id) || '') + event.delta);
break;
case 'capability_done': {
const tool = nameById.get(event.id) || 'tool';
const input = (inputById.get(event.id) || '').trim() || previewById.get(event.id) || '';
const output = event.result?.fullOutput || event.result?.output || '';
trace.push({ tool, input, output: output.slice(0, 1500), isError: event.result?.isError });
break;
}
case 'usage':
inputTokens = event.inputTokens;
outputTokens = event.outputTokens;
break;
case 'turn_done':
turnReason = event.reason;
turnError = event.error;
break;
}
});

if (asJson) {
const envelope = {
model,
question,
finalText: finalText.trim(),
trace,
turnReason,
...(turnError ? { error: turnError } : {}),
usage: { inputTokens, outputTokens },
};
process.stdout.write(JSON.stringify(envelope) + '\n');
} else if (turnReason !== 'completed' && turnError) {
process.stderr.write(`\n${turnError}\n`);
}

process.exitCode = turnReason === 'completed' ? 0 : 1;
}
13 changes: 13 additions & 0 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import { uninitCommand } from './commands/uninit.js';
import { proxyCommand } from './commands/proxy.js';
import { buildTaskCommand } from './commands/task.js';
import { buildContentCommand } from './commands/content.js';
import { predictCommand } from './commands/predict.js';

import { VERSION as version } from './config.js';

Expand Down Expand Up @@ -90,6 +91,18 @@ program
.option('--debug', 'Enable debug logging')
.action((options) => proxyCommand({ ...options, version }));

program
.command('predict')
.description('Prediction mode — forecast one real-world event with a research-only toolset (web/markets), headless')
.requiredOption('-m, --model <model>', 'Model to use (e.g. anthropic/claude-opus-4.8, openai/gpt-5.5)')
.requiredOption('-q, --question <text>', 'The event question to forecast (include the allowed options)')
.option('--max-turns <n>', 'Max agent turns before forcing an answer', '8')
.option('--max-tool-calls <n>', 'Max tool calls before forcing an answer', '6')
.option('--max-spend <usd>', 'Hard USD cap on this prediction run')
.option('--no-json', 'Human-readable streaming instead of a JSON envelope')
.option('--debug', 'Enable debug logging')
.action((options) => predictCommand(options));

program
.command('init')
.description('Configure franklin auto-start (writes ~/.claude/settings.json + installs LaunchAgent on macOS)')
Expand Down
27 changes: 27 additions & 0 deletions src/tools/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -234,4 +234,31 @@ export {
detachCapability,
};

/**
* "Franklin prediction mode" toolset.
*
* A deliberately tight, research-only capability set for forecasting a single
* real-world event the way a careful bettor would: gather current facts, read
* sources, check live prediction-market odds and a little market data — then
* decide. Everything else (filesystem, shell, media generation, swaps/trade
* execution, phone/voice, GPU sandbox, posting) is intentionally excluded:
* a forecaster looks things up, it does not act on the world or spend beyond
* the cheap read calls these tools make.
*
* Used by the `franklin predict` command and reusable by any headless caller
* (e.g. franklin.bet) that wants a grounded prediction.
*/
export const predictionCapabilities: CapabilityHandler[] = [
webSearchCapability, // web_search — current news & facts
webFetchCapability, // webfetch — read a specific source URL
exaSearchCapability, // exa search — higher-quality web research
exaAnswerCapability, // exa answer — direct sourced answers
exaReadUrlsCapability, // exa read — pull full text of found URLs
searchXCapability, // search X — live sentiment / breaking signal
predictionMarketCapability, // search_prediction_markets — live implied odds
tradingSignalCapability, // market signal/indicators (for market-type events)
tradingMarketCapability, // market snapshot data
defiLlamaPriceCapability, // token price lookup (crypto-type events)
];

export { createSubAgentCapability } from './subagent.js';
Loading