From d6bce3d5d5e72b1e9347a55471d5bbcdd2ec5df8 Mon Sep 17 00:00:00 2001 From: ozymandiashh <234437643+ozymandiashh@users.noreply.github.com> Date: Tue, 23 Jun 2026 14:56:29 +0300 Subject: [PATCH] fix(copilot): read VS Code chatSessions for token cost (#555) VS Code GitHub Copilot Chat users with no OTel agent-traces.db and no ~/.copilot/session-state/ saw $0.00 cost: codeburn never read VS Code's core chat persistence, the only on-disk source carrying their token counts. Add a fourth Copilot source that reads the VS Code chat delta-journals at workspaceStorage//chatSessions/*.jsonl and globalStorage/emptyWindowChatSessions/*.jsonl. The files are a kind:0 snapshot / kind:1 path-set / kind:2 array-append journal; we replay it (prototype-pollution-safe: __proto__/prototype/constructor segments are rejected and containers use Object.create(null)) and read each request's result.metadata.promptTokens / outputTokens (falling back to completionTokens) and resolvedModel for pricing. Dedup so users with multiple sources are not double-counted: prefer OTel (skip chatSessions discovery when an OTel source is present), and skip a workspace's legacy GitHub.copilot-chat/transcripts when that workspace has chatSessions. New env overrides CODEBURN_COPILOT_GLOBAL_STORAGE_DIR and the existing CODEBURN_COPILOT_WS_STORAGE_DIR keep discovery testable. Tests cover the reporter's real request shape (promptTokens 32543 / outputTokens 60 -> non-zero cost), empty sessions, emptyWindow discovery, append-then-edit replay, requestId dedup, prototype-pollution paths, the transcript skip, and the OTel-prefer skip. --- docs/providers/copilot.md | 25 +- src/providers/copilot.ts | 488 ++++++++++++++++++++++++++++++-- tests/providers/copilot.test.ts | 217 +++++++++++++- 3 files changed, 694 insertions(+), 36 deletions(-) diff --git a/docs/providers/copilot.md b/docs/providers/copilot.md index b667147e..f78b62f6 100644 --- a/docs/providers/copilot.md +++ b/docs/providers/copilot.md @@ -1,23 +1,25 @@ # Copilot -GitHub Copilot Chat (CLI and VS Code extension transcripts). +GitHub Copilot Chat (CLI, VS Code core chat sessions, and VS Code extension transcripts). - **Source:** `src/providers/copilot.ts` - **Loading:** eager (`src/providers/index.ts:3`) -- **Test:** `tests/providers/copilot.test.ts` (401 lines) +- **Test:** `tests/providers/copilot.test.ts` ## Where it reads from -Two JSONL locations plus an optional OpenTelemetry SQLite source (see below). All -discovered sources are walked on every run; results merge and dedupe. +Three JSONL locations plus an optional OpenTelemetry SQLite source (see below). OTel is +preferred when present; chatSessions are only discovered when no OTel source is found. +Other discovered sources are walked on every run; results merge and dedupe. 1. **Legacy CLI sessions:** `~/.copilot/session-state/` -2. **VS Code transcripts:** `~/Library/Application Support/Code/User/workspaceStorage//GitHub.copilot-chat/transcripts/` and equivalents on Windows / Linux -3. **OTel SQLite store:** VS Code Copilot Chat's `agent-traces.db` (see the OTel section). Preferred when present because it carries full input / output / cache token counts; the JSONL sources only record output tokens. +2. **VS Code core chat sessions:** `~/Library/Application Support/Code/User/workspaceStorage//chatSessions/*.jsonl` plus `~/Library/Application Support/Code/User/globalStorage/emptyWindowChatSessions/*.jsonl` and equivalents on Windows / Linux +3. **VS Code transcripts:** `~/Library/Application Support/Code/User/workspaceStorage//GitHub.copilot-chat/transcripts/` and equivalents on Windows / Linux +4. **OTel SQLite store:** VS Code Copilot Chat's `agent-traces.db` (see the OTel section). Preferred when present because it carries full input / output / cache token counts; legacy JSONL sources only record output tokens. ## Storage format -JSONL in the first two locations (schemas differ; the parser switches by detecting which schema the first event uses), and a SQLite DB for the OTel source. +JSONL in the first three locations (schemas differ; the parser switches by source type / event shape), and a SQLite DB for the OTel source. VS Code core chat sessions use a delta journal: `kind:0` sets the root object, `kind:1` writes a value at path `k`, and `kind:2` appends items to an array path. ## OpenTelemetry (OTel) source @@ -26,6 +28,11 @@ breakdowns (input, output, cache-read, cache-creation) from it, which the JSONL not record. Discovery is skipped with `CODEBURN_COPILOT_DISABLE_OTEL=1`, and the DB path can be overridden with `CODEBURN_COPILOT_OTEL_DB`. +If OTel discovery finds at least one source, workspace `chatSessions/*.jsonl` and +`emptyWindowChatSessions/*.jsonl` are skipped. Those journals can mirror the same Copilot +turns under IDs that do not match OTel turn IDs, so CodeBurn prefers the richer OTel data +instead of trying to dedupe across stores. + - **Requires Node 22+.** The OTel source uses the built-in `node:sqlite` module (the same backend as Cursor / OpenCode). On Node 20, or if the DB is missing / locked / corrupt / wrong-schema, OTel is skipped and the JSONL/transcript sources are used as a fallback. @@ -43,7 +50,9 @@ None for the JSONL sources. The OTel source uses a durable cache (see above). ## Deduplication -Per `messageId` in both formats (`copilot.ts:118` for legacy, `copilot.ts:245` for transcripts). +Legacy JSONL and transcript sessions dedupe per `messageId`. Core chat sessions dedupe per `copilot-chatsession::`, and are not discovered when an OTel source is present. + +If a workspace hash contains at least one `chatSessions/*.jsonl` file, the provider skips that hash's legacy `GitHub.copilot-chat/transcripts/` directory. The core chat session journal is the modern token-bearing source for the same conversations, so reading both would inflate call counts. ## Model inference diff --git a/src/providers/copilot.ts b/src/providers/copilot.ts index 281af4fb..ce1263d9 100644 --- a/src/providers/copilot.ts +++ b/src/providers/copilot.ts @@ -8,11 +8,11 @@ // cache-read tokens, and cache-creation tokens are never written there, so // CodeBurn underreports Copilot costs by 60-80%. // -// This modified version adds a SECOND data source: VS Code Copilot Chat's -// OTel SQLite store (agent-traces.db). When present, it contains full -// per-LLM-call token breakdowns (input, output, cache_read, cache_creation) -// from the OpenTelemetry GenAI semantic conventions. We prefer OTel data -// when available and fall back to the original JSONL parsing. +// This modified version adds VS Code sources that can carry fuller token +// data: the OTel SQLite store (agent-traces.db), VS Code core chatSessions +// journals, and legacy extension transcripts. OTel and chatSessions contain +// input/output token breakdowns for Copilot Chat users; legacy JSONL remains +// a fallback when richer sources are absent. // // HOW TO ENABLE THE OTEL SQLITE STORE: // TWO settings must both be enabled in VS Code settings.json: @@ -37,11 +37,13 @@ // ENVIRONMENT VARIABLES: // CODEBURN_COPILOT_OTEL_DB — Override the agent-traces.db path // CODEBURN_COPILOT_DISABLE_OTEL=1 — Skip OTel entirely, use only JSONL +// CODEBURN_COPILOT_WS_STORAGE_DIR — Override VS Code workspaceStorage +// CODEBURN_COPILOT_GLOBAL_STORAGE_DIR — Override VS Code globalStorage // // ARCHITECTURE: -// discoverSessions() returns BOTH OTel sessions (one per conversation_id) -// and JSONL sessions. The OTel sessions are deduped against JSONL by -// conversation ID so we don't double-count. OTel sessions carry the full +// discoverSessions() returns OTel sessions and legacy JSONL sessions. When +// OTel is present, VS Code core chatSessions are skipped because they mirror +// the same Copilot turns under different IDs. OTel sessions carry the full // token breakdown; JSONL sessions only carry output tokens (the original // behaviour, as a fallback). // @@ -188,6 +190,9 @@ type CopilotEvent = | { type: 'assistant.message'; data: AssistantMessageData; timestamp?: string } | { type: 'subagent.selected'; data: SubagentSelectedData; timestamp?: string } +type ChatJournalPathSegment = string | number +type ChatSessionRequest = Record + // --------------------------------------------------------------------------- // Types for OTel span rows from agent-traces.db // --------------------------------------------------------------------------- @@ -334,6 +339,205 @@ function epochToISO(epoch: number): string { return new Date(ms).toISOString() } +function timestampToISO(raw: unknown): string { + if (typeof raw === 'number' && Number.isFinite(raw) && raw > 0) { + return epochToISO(raw) + } + if (typeof raw !== 'string') return '' + const trimmed = raw.trim() + if (!trimmed) return '' + if (/^\d+(\.\d+)?$/.test(trimmed)) { + return epochToISO(Number(trimmed)) + } + const parsed = Date.parse(trimmed) + return Number.isNaN(parsed) ? '' : new Date(parsed).toISOString() +} + +function isRecord(value: unknown): value is Record { + return typeof value === 'object' && value !== null && !Array.isArray(value) +} + +function isReplayContainer(value: unknown): value is object { + return typeof value === 'object' && value !== null +} + +function createReplayObject(): Record { + return Object.create(null) as Record +} + +const FORBIDDEN_CHAT_JOURNAL_KEYS = new Set(['__proto__', 'prototype', 'constructor']) + +function parseChatJournalPath(rawPath: unknown, fallback?: ChatJournalPathSegment[]): ChatJournalPathSegment[] | null { + const value = rawPath === undefined ? fallback : rawPath + if (!Array.isArray(value)) return null + + const path: ChatJournalPathSegment[] = [] + for (const segment of value) { + if (typeof segment === 'number') { + if (!Number.isInteger(segment) || segment < 0) return null + path.push(segment) + continue + } + if (typeof segment === 'string') { + if (FORBIDDEN_CHAT_JOURNAL_KEYS.has(segment)) return null + path.push(segment) + continue + } + return null + } + return path +} + +function getReplayValue(container: object, segment: ChatJournalPathSegment): unknown { + return (container as Record)[String(segment)] +} + +function setReplayValue(container: object, segment: ChatJournalPathSegment, value: unknown): void { + ;(container as Record)[String(segment)] = value +} + +function createContainerForNext(segment: ChatJournalPathSegment): unknown[] | Record { + return typeof segment === 'number' ? [] : createReplayObject() +} + +function ensureReplayParent(root: object, path: ChatJournalPathSegment[]): object | null { + let current: object = root + for (let i = 0; i < path.length - 1; i++) { + const segment = path[i]! + const nextSegment = path[i + 1]! + let child = getReplayValue(current, segment) + if (!isReplayContainer(child)) { + const created = createContainerForNext(nextSegment) + setReplayValue(current, segment, created) + current = created + continue + } + current = child + } + return current +} + +function applyChatJournalSet(root: unknown, path: ChatJournalPathSegment[], value: unknown): unknown { + if (path.length === 0) return value + + const workingRoot = isReplayContainer(root) ? root : createReplayObject() + const parent = ensureReplayParent(workingRoot, path) + if (!parent) return workingRoot + setReplayValue(parent, path[path.length - 1]!, value) + return workingRoot +} + +function applyChatJournalAppend(root: unknown, path: ChatJournalPathSegment[], items: unknown[]): unknown { + const workingRoot = isReplayContainer(root) ? root : createReplayObject() + + if (path.length === 0) { + if (Array.isArray(workingRoot)) { + for (const item of items) workingRoot.push(item) + } + return workingRoot + } + + const parent = ensureReplayParent(workingRoot, path) + if (!parent) return workingRoot + + const last = path[path.length - 1]! + let target = getReplayValue(parent, last) + const targetArray: unknown[] = Array.isArray(target) ? target : [] + if (target !== targetArray) { + setReplayValue(parent, last, targetArray) + } + for (const item of items) targetArray.push(item) + return workingRoot +} + +function replayChatSessionJournal(content: string): unknown { + let root: unknown = createReplayObject() + const lines = content.split('\n').filter((l) => l.trim()) + + for (const line of lines) { + let entry: unknown + try { + entry = JSON.parse(line) as unknown + } catch { + continue + } + if (!isRecord(entry)) continue + + const kind = entry['kind'] + if (kind === 0) { + root = entry['v'] + continue + } + + if (kind === 1) { + const path = parseChatJournalPath(entry['k']) + if (!path) continue + root = applyChatJournalSet(root, path, entry['v']) + continue + } + + if (kind === 2) { + const hasPath = Object.prototype.hasOwnProperty.call(entry, 'k') + const path = parseChatJournalPath(hasPath ? entry['k'] : undefined, ['requests']) + const items = Array.isArray(entry['v']) ? entry['v'] : [] + if (!path) continue + root = applyChatJournalAppend(root, path, items) + } + } + + return root +} + +function numberOrZero(raw: unknown): number { + return typeof raw === 'number' && Number.isFinite(raw) && raw > 0 ? raw : 0 +} + +function readString(raw: unknown): string { + return typeof raw === 'string' ? raw : '' +} + +function modelFromChatSessionRequest(req: ChatSessionRequest, metadata: Record): string { + const resolved = readString(metadata['resolvedModel']) + if (resolved) return resolved + + const modelId = readString(req['modelId']).replace(/^copilot\//, '') + return modelId || 'unknown' +} + +function extractChatSessionTools(metadata: Record): string[] { + const rounds = metadata['toolCallRounds'] + if (!Array.isArray(rounds)) return [] + + const names = new Set() + const addName = (raw: unknown): void => { + if (typeof raw === 'string' && raw.trim()) names.add(normalizeTool(raw)) + } + const addFromRecord = (record: Record): void => { + addName(record['toolName']) + addName(record['name']) + addName(record['tool']) + } + + for (const round of rounds) { + if (!isRecord(round)) continue + addFromRecord(round) + + for (const key of ['tools', 'toolCalls', 'toolRequests']) { + const entries = round[key] + if (!Array.isArray(entries)) continue + for (const entry of entries) { + if (typeof entry === 'string') { + addName(entry) + } else if (isRecord(entry)) { + addFromRecord(entry) + } + } + } + } + + return [...names] +} + /** * Extract a shell command string from an OTel execute_tool span's * `gen_ai.tool.call.arguments` attribute. The attribute is a JSON-encoded @@ -571,6 +775,71 @@ function createJsonlParser( } } +function createChatSessionParser( + source: SessionSource, + seenKeys: Set +): SessionParser { + return { + async *parse(): AsyncGenerator { + const content = await readSessionFile(source.path) + if (!content) return + + const root = replayChatSessionJournal(content) + if (!isRecord(root)) return + + const sessionId = readString(root['sessionId']) || basename(source.path, '.jsonl') + const sessionCreatedAt = timestampToISO(root['creationDate']) + const requests = Array.isArray(root['requests']) ? root['requests'] : [] + + for (let index = 0; index < requests.length; index++) { + const rawReq = requests[index] + if (!isRecord(rawReq)) continue + + const result = rawReq['result'] + const resultRecord = isRecord(result) ? result : null + const rawMetadata = resultRecord?.['metadata'] + const metadata = isRecord(rawMetadata) ? rawMetadata : createReplayObject() + + const inputTokens = numberOrZero(metadata['promptTokens']) + const metadataOutputTokens = numberOrZero(metadata['outputTokens']) + const outputTokens = metadataOutputTokens || numberOrZero(rawReq['completionTokens']) + + if (inputTokens === 0 && outputTokens === 0) continue + + const requestId = readString(rawReq['requestId']) || `request-${index}` + const dedupKey = `copilot-chatsession:${sessionId}:${requestId}` + if (seenKeys.has(dedupKey)) continue + seenKeys.add(dedupKey) + + const model = modelFromChatSessionRequest(rawReq, metadata) + const costUSD = calculateCost(model, inputTokens, outputTokens, 0, 0, 0) + const timestamp = timestampToISO(rawReq['timestamp']) || sessionCreatedAt + + yield { + provider: 'copilot', + sessionId, + project: source.project, + model, + inputTokens, + outputTokens, + cacheCreationInputTokens: 0, + cacheReadInputTokens: 0, + cachedInputTokens: 0, + reasoningTokens: 0, + webSearchRequests: 0, + costUSD, + tools: extractChatSessionTools(metadata), + bashCommands: [], + timestamp, + speed: 'standard' as const, + deduplicationKey: dedupKey, + userMessage: '', + } + } + }, + } +} + // --------------------------------------------------------------------------- // OTel SQLite parser — reads agent-traces.db for FULL token data // --------------------------------------------------------------------------- @@ -820,10 +1089,18 @@ interface JsonlSessionSource extends SessionSource { sourceType: 'jsonl' } +interface ChatSessionSource extends SessionSource { + sourceType: 'chatsession' +} + function isOtelSource(source: SessionSource): source is OTelSessionSource { return (source as OTelSessionSource).sourceType === 'otel' } +function isChatSessionSource(source: SessionSource): source is ChatSessionSource { + return (source as ChatSessionSource).sourceType === 'chatsession' +} + // --------------------------------------------------------------------------- // Session discovery: JSONL (original) // --------------------------------------------------------------------------- @@ -921,6 +1198,140 @@ export function getVSCodeWorkspaceStorageDirs(home: string, os: string): string[ ] } +export function getVSCodeGlobalStorageDirs(home: string, os: string): string[] { + const j = os === 'win32' ? win32.join : posix.join + if (os === 'darwin') { + return [ + j(home, 'Library', 'Application Support', 'Code', 'User', 'globalStorage'), + j(home, 'Library', 'Application Support', 'Code - Insiders', 'User', 'globalStorage'), + j(home, 'Library', 'Application Support', 'VSCodium', 'User', 'globalStorage'), + ] + } + if (os === 'linux') { + return [ + j(home, '.config', 'Code', 'User', 'globalStorage'), + j(home, '.config', 'Code - Insiders', 'User', 'globalStorage'), + j(home, '.config', 'VSCodium', 'User', 'globalStorage'), + ] + } + return [ + j(home, 'AppData', 'Roaming', 'Code', 'User', 'globalStorage'), + j(home, 'AppData', 'Roaming', 'Code - Insiders', 'User', 'globalStorage'), + j(home, 'AppData', 'Roaming', 'VSCodium', 'User', 'globalStorage'), + ] +} + +async function resolveWorkspaceProject(wsDir: string, hashDir: string): Promise { + let project = hashDir + try { + const wsJson = await readSessionFile(join(wsDir, hashDir, 'workspace.json')) + if (wsJson) { + const data = JSON.parse(wsJson) as { folder?: string } + if (typeof data.folder === 'string') { + // folder is a URI like 'file:///home/user/myapp' or 'file:///C:/Users/...' + const folder = data.folder.replace(/^file:\/\//, '').replace(/\/+$/, '') + const name = basename(folder) + if (name) project = name + } + } + } catch { + // workspace.json may be absent or malformed + } + return project +} + +async function hasChatSessionFiles(chatSessionsDir: string): Promise { + let files: string[] + try { + files = await readdir(chatSessionsDir) + } catch { + return false + } + + for (const file of files) { + if (!file.endsWith('.jsonl')) continue + const s = await stat(join(chatSessionsDir, file)).catch(() => null) + if (s?.isFile()) return true + } + return false +} + +// --------------------------------------------------------------------------- +// Session discovery: VS Code core chatSessions +// --------------------------------------------------------------------------- + +async function discoverWorkspaceChatSessions( + workspaceStorageDirs: string[] +): Promise { + const sources: ChatSessionSource[] = [] + + for (const wsDir of workspaceStorageDirs) { + let hashDirs: string[] + try { + hashDirs = await readdir(wsDir) + } catch { + continue + } + + for (const hashDir of hashDirs) { + const chatSessionsDir = join(wsDir, hashDir, 'chatSessions') + let files: string[] + try { + files = await readdir(chatSessionsDir) + } catch { + continue + } + + const project = await resolveWorkspaceProject(wsDir, hashDir) + for (const file of files) { + if (!file.endsWith('.jsonl')) continue + const path = join(chatSessionsDir, file) + const s = await stat(path).catch(() => null) + if (!s?.isFile()) continue + sources.push({ + path, + project, + provider: 'copilot', + sourceType: 'chatsession', + }) + } + } + } + + return sources +} + +async function discoverEmptyWindowChatSessions( + globalStorageDirs: string[] +): Promise { + const sources: ChatSessionSource[] = [] + + for (const globalDir of globalStorageDirs) { + const chatSessionsDir = join(globalDir, 'emptyWindowChatSessions') + let files: string[] + try { + files = await readdir(chatSessionsDir) + } catch { + continue + } + + for (const file of files) { + if (!file.endsWith('.jsonl')) continue + const path = join(chatSessionsDir, file) + const s = await stat(path).catch(() => null) + if (!s?.isFile()) continue + sources.push({ + path, + project: 'copilot-chat', + provider: 'copilot', + sourceType: 'chatsession', + }) + } + } + + return sources +} + // --------------------------------------------------------------------------- // Session discovery: VS Code workspace transcripts // --------------------------------------------------------------------------- @@ -944,24 +1355,11 @@ async function discoverTranscriptSessions( } for (const hashDir of hashDirs) { - const transcriptsDir = join(wsDir, hashDir, 'GitHub.copilot-chat', 'transcripts') + const chatSessionsDir = join(wsDir, hashDir, 'chatSessions') + if (await hasChatSessionFiles(chatSessionsDir)) continue - // Resolve project name from workspace.json - let project = hashDir - try { - const wsJson = await readSessionFile(join(wsDir, hashDir, 'workspace.json')) - if (wsJson) { - const data = JSON.parse(wsJson) as { folder?: string } - if (typeof data.folder === 'string') { - // folder is a URI like 'file:///home/user/myapp' or 'file:///C:/Users/...' - const folder = data.folder.replace(/^file:\/\//, '').replace(/\/+$/, '') - const name = basename(folder) - if (name) project = name - } - } - } catch { - // workspace.json may be absent or malformed - } + const transcriptsDir = join(wsDir, hashDir, 'GitHub.copilot-chat', 'transcripts') + const project = await resolveWorkspaceProject(wsDir, hashDir) let transcriptFiles: string[] try { @@ -987,7 +1385,11 @@ async function discoverTranscriptSessions( return sources } -export function createCopilotProvider(sessionStateDir?: string, workspaceStorageDir?: string): Provider { +export function createCopilotProvider( + sessionStateDir?: string, + workspaceStorageDir?: string, + globalStorageDir?: string +): Provider { // jsonlDir is resolved lazily inside discoverSessions so that env-var // overrides set after module load (e.g. in tests) are respected. @@ -1005,6 +1407,13 @@ export function createCopilotProvider(sessionStateDir?: string, workspaceStorage return getVSCodeWorkspaceStorageDirs(homedir(), platform()) } + function getGlobalDirs(): string[] { + if (globalStorageDir !== undefined) return [globalStorageDir] + const envDir = process.env['CODEBURN_COPILOT_GLOBAL_STORAGE_DIR'] + if (envDir) return [envDir] + return getVSCodeGlobalStorageDirs(homedir(), platform()) + } + return { name: 'copilot', displayName: 'Copilot', @@ -1023,6 +1432,7 @@ export function createCopilotProvider(sessionStateDir?: string, workspaceStorage async discoverSessions(): Promise { const sources: SessionSource[] = [] + let discoveredOtel = false // 1. Discover OTel sessions (preferred — full token data) const disableOtel = process.env['CODEBURN_COPILOT_DISABLE_OTEL'] === '1' @@ -1031,6 +1441,7 @@ export function createCopilotProvider(sessionStateDir?: string, workspaceStorage if (dbPath) { try { const otelSources = await discoverOtelSessions(dbPath) + discoveredOtel = otelSources.length > 0 sources.push(...otelSources) } catch { // OTel discovery failed — fall through to JSONL @@ -1047,7 +1458,27 @@ export function createCopilotProvider(sessionStateDir?: string, workspaceStorage // JSONL discovery failed } - // 3. Discover VS Code workspace transcript sessions + // Prefer OTel over chatSessions: they can mirror the same turns under + // incompatible IDs, and OTel carries richer token/cache data. + if (!discoveredOtel) { + // 3. Discover VS Code core chatSessions journals + try { + const chatSessionSources = await discoverWorkspaceChatSessions(getWsDirs()) + sources.push(...chatSessionSources) + } catch { + // Workspace chatSessions discovery failed + } + + // 4. Discover VS Code empty-window chatSessions journals + try { + const emptyWindowSources = await discoverEmptyWindowChatSessions(getGlobalDirs()) + sources.push(...emptyWindowSources) + } catch { + // Empty-window chatSessions discovery failed + } + } + + // 5. Discover VS Code workspace transcript sessions try { const transcriptSources = await discoverTranscriptSessions(getWsDirs()) sources.push(...transcriptSources) @@ -1069,6 +1500,9 @@ export function createCopilotProvider(sessionStateDir?: string, workspaceStorage if (isOtelSource(source)) { return createOtelParser(source, seenKeys) } + if (isChatSessionSource(source)) { + return createChatSessionParser(source, seenKeys) + } return createJsonlParser(source, seenKeys) }, } diff --git a/tests/providers/copilot.test.ts b/tests/providers/copilot.test.ts index 671fb28e..89cb4236 100644 --- a/tests/providers/copilot.test.ts +++ b/tests/providers/copilot.test.ts @@ -4,7 +4,7 @@ import { join, posix, win32 } from 'path' import { tmpdir } from 'os' import { createRequire } from 'node:module' -import { copilot, createCopilotProvider, getVSCodeWorkspaceStorageDirs } from '../../src/providers/copilot.js' +import { copilot, createCopilotProvider, getVSCodeGlobalStorageDirs, getVSCodeWorkspaceStorageDirs } from '../../src/providers/copilot.js' import { isSqliteAvailable } from '../../src/sqlite.js' import type { ParsedProviderCall } from '../../src/providers/types.js' @@ -63,6 +63,34 @@ function transcriptAssistantMessage(opts: { messageId: string; content?: string; }) } +function chatSessionSampleRequest(overrides: Record = {}) { + return { + requestId: 'request_8c8ce017-6e3f-460a-9931-5a16825d231a', + modelId: 'copilot/claude-sonnet-4.6', + completionTokens: 490, + result: { + metadata: { + promptTokens: 32543, + outputTokens: 60, + resolvedModel: 'claude-sonnet-4-6', + toolCallRounds: [{ thinking: { tokens: 0 }, modelId: 'claude-sonnet-4.6' }], + agentId: 'github.copilot.editsAgent', + }, + }, + ...overrides, + } +} + +async function createChatSessionFile(filePath: string, entries: unknown[]) { + await writeFile(filePath, entries.map(entry => JSON.stringify(entry)).join('\n') + '\n') +} + +async function collectCalls(source: { path: string; project: string; provider: string; sourceType?: string }, seenKeys = new Set()) { + const calls: ParsedProviderCall[] = [] + for await (const call of copilot.createSessionParser(source, seenKeys).parse()) calls.push(call) + return calls +} + describe('copilot provider - JSONL parsing', () => { beforeEach(async () => { tmpDir = await mkdtemp(join(tmpdir(), 'copilot-test-')) @@ -362,6 +390,181 @@ describe('copilot provider - JSONL parsing', () => { }) }) +describe('copilot provider - chatSessions parsing', () => { + beforeEach(async () => { + tmpDir = await mkdtemp(join(tmpdir(), 'copilot-chatsessions-test-')) + vi.stubEnv('CODEBURN_COPILOT_DISABLE_OTEL', '1') + }) + + afterEach(async () => { + await rm(tmpDir, { recursive: true, force: true }) + vi.unstubAllEnvs() + }) + + it('parses sample journal token counts and cost', async () => { + const filePath = join(tmpDir, 'sample.jsonl') + await createChatSessionFile(filePath, [ + { kind: 0, v: { version: 3, creationDate: 1780157113020, sessionId: 'chat-session-1', requests: [] } }, + { kind: 2, k: ['requests'], v: [chatSessionSampleRequest()] }, + ]) + + const calls = await collectCalls({ path: filePath, project: 'myproject', provider: 'copilot', sourceType: 'chatsession' }) + + expect(calls).toHaveLength(1) + expect(calls[0]!.inputTokens).toBe(32543) + expect(calls[0]!.outputTokens).toBe(60) + expect(calls[0]!.model).toBe('claude-sonnet-4-6') + expect(calls[0]!.costUSD).toBeGreaterThan(0) + }) + + it('returns no calls for an empty reconstructed requests array', async () => { + const filePath = join(tmpDir, 'empty.jsonl') + await createChatSessionFile(filePath, [ + { kind: 0, v: { version: 3, creationDate: 1780157113020, sessionId: 'chat-empty', requests: [] } }, + ]) + + const calls = await collectCalls({ path: filePath, project: 'myproject', provider: 'copilot', sourceType: 'chatsession' }) + + expect(calls).toHaveLength(0) + }) + + it('discovers and parses emptyWindowChatSessions from globalStorage', async () => { + const globalDir = join(tmpDir, 'globalStorage') + const emptyWindowDir = join(globalDir, 'emptyWindowChatSessions') + await mkdir(emptyWindowDir, { recursive: true }) + const filePath = join(emptyWindowDir, 'empty-window.jsonl') + await createChatSessionFile(filePath, [ + { kind: 0, v: { version: 3, creationDate: 1780157113020, sessionId: 'empty-window-session', requests: [] } }, + { kind: 2, k: ['requests'], v: [chatSessionSampleRequest()] }, + ]) + + const provider = createCopilotProvider('/nonexistent/legacy', '/nonexistent/ws', globalDir) + const sessions = await provider.discoverSessions() + + expect(sessions).toHaveLength(1) + expect(sessions[0]!.project).toBe('copilot-chat') + expect((sessions[0] as { sourceType?: string }).sourceType).toBe('chatsession') + + const calls: ParsedProviderCall[] = [] + for await (const call of provider.createSessionParser(sessions[0]!, new Set()).parse()) calls.push(call) + expect(calls).toHaveLength(1) + expect(calls[0]!.inputTokens).toBe(32543) + }) + + it('skips chatSessions discovery when an OTel source is present', async () => { + if (!isSqliteAvailable()) return + + vi.unstubAllEnvs() + const dbPath = join(tmpDir, 'agent-traces.db') + vi.stubEnv('CODEBURN_COPILOT_OTEL_DB', dbPath) + vi.stubEnv('CODEBURN_COPILOT_DISABLE_OTEL', '') + createOtelDb(dbPath) + insertSpan(dbPath, { + spanId: 'span-chatsession-skip', + traceId: 'trace-chatsession-skip', + operationName: 'chat', + startTimeMs: 1000, + attrs: { + 'gen_ai.conversation.id': 'conv-chatsession-skip', + 'gen_ai.response.model': 'gpt-4.1', + 'gen_ai.usage.input_tokens': 100, + 'gen_ai.usage.output_tokens': 10, + }, + }) + + const wsDir = join(tmpDir, 'vscode-ws') + const hashDir = join(wsDir, 'abc123') + const workspaceChatSessionsDir = join(hashDir, 'chatSessions') + const globalDir = join(tmpDir, 'globalStorage') + const emptyWindowDir = join(globalDir, 'emptyWindowChatSessions') + await mkdir(workspaceChatSessionsDir, { recursive: true }) + await mkdir(emptyWindowDir, { recursive: true }) + await writeFile(join(hashDir, 'workspace.json'), JSON.stringify({ folder: 'file:///home/user/myapp' })) + await createChatSessionFile(join(workspaceChatSessionsDir, 'workspace.jsonl'), [ + { kind: 0, v: { version: 3, creationDate: 1780157113020, sessionId: 'chat-workspace', requests: [] } }, + { kind: 2, k: ['requests'], v: [chatSessionSampleRequest()] }, + ]) + await createChatSessionFile(join(emptyWindowDir, 'empty-window.jsonl'), [ + { kind: 0, v: { version: 3, creationDate: 1780157113020, sessionId: 'chat-empty-window', requests: [] } }, + { kind: 2, k: ['requests'], v: [chatSessionSampleRequest({ requestId: 'request-empty-window' })] }, + ]) + + const provider = createCopilotProvider('/nonexistent/legacy', wsDir, globalDir) + const sources = await provider.discoverSessions() + + expect(sources.filter(s => (s as { sourceType?: string }).sourceType === 'otel')).toHaveLength(1) + expect(sources.filter(s => (s as { sourceType?: string }).sourceType === 'chatsession')).toHaveLength(0) + }) + + it('applies append-then-edit journal updates', async () => { + const filePath = join(tmpDir, 'append-edit.jsonl') + await createChatSessionFile(filePath, [ + { kind: 0, v: { version: 3, creationDate: 1780157113020, sessionId: 'chat-edit', requests: [] } }, + { kind: 2, k: ['requests'], v: [chatSessionSampleRequest()] }, + { kind: 1, k: ['requests', 0, 'result', 'metadata', 'outputTokens'], v: 88 }, + ]) + + const calls = await collectCalls({ path: filePath, project: 'myproject', provider: 'copilot', sourceType: 'chatsession' }) + + expect(calls).toHaveLength(1) + expect(calls[0]!.outputTokens).toBe(88) + }) + + it('deduplicates by requestId across parser runs', async () => { + const filePath = join(tmpDir, 'dedupe.jsonl') + await createChatSessionFile(filePath, [ + { kind: 0, v: { version: 3, creationDate: 1780157113020, sessionId: 'chat-dedupe', requests: [] } }, + { kind: 2, v: [chatSessionSampleRequest()] }, + ]) + const source = { path: filePath, project: 'myproject', provider: 'copilot', sourceType: 'chatsession' } + const seenKeys = new Set() + + const calls1 = await collectCalls(source, seenKeys) + const calls2 = await collectCalls(source, seenKeys) + + expect(calls1).toHaveLength(1) + expect(calls2).toHaveLength(0) + }) + + it('ignores prototype-pollution journal paths without crashing', async () => { + const filePath = join(tmpDir, 'proto.jsonl') + await createChatSessionFile(filePath, [ + { kind: 0, v: { version: 3, creationDate: 1780157113020, sessionId: 'chat-proto', requests: [] } }, + { kind: 1, k: ['__proto__', 'polluted'], v: true }, + { kind: 1, k: ['constructor', 'prototype', 'polluted'], v: true }, + { kind: 2, k: ['requests'], v: [chatSessionSampleRequest()] }, + ]) + + expect(({} as { polluted?: unknown }).polluted).toBeUndefined() + const calls = await collectCalls({ path: filePath, project: 'myproject', provider: 'copilot', sourceType: 'chatsession' }) + + expect(calls).toHaveLength(1) + expect(({} as { polluted?: unknown }).polluted).toBeUndefined() + }) + + it('skips legacy transcripts for a workspace hash that has chatSessions', async () => { + const wsDir = join(tmpDir, 'vscode-ws') + const hashDir = join(wsDir, 'abc123') + const chatSessionsDir = join(hashDir, 'chatSessions') + const transcriptsDir = join(hashDir, 'GitHub.copilot-chat', 'transcripts') + await mkdir(chatSessionsDir, { recursive: true }) + await mkdir(transcriptsDir, { recursive: true }) + await writeFile(join(hashDir, 'workspace.json'), JSON.stringify({ folder: 'file:///home/user/myapp' })) + await createChatSessionFile(join(chatSessionsDir, 'chat.jsonl'), [ + { kind: 0, v: { version: 3, creationDate: 1780157113020, sessionId: 'chat-modern', requests: [] } }, + { kind: 2, k: ['requests'], v: [chatSessionSampleRequest()] }, + ]) + await writeFile(join(transcriptsDir, 'legacy.jsonl'), transcriptSessionStart('legacy') + '\n') + + const provider = createCopilotProvider('/nonexistent/legacy', wsDir, '/nonexistent/global') + const sessions = await provider.discoverSessions() + + expect(sessions).toHaveLength(1) + expect((sessions[0] as { sourceType?: string }).sourceType).toBe('chatsession') + expect(sessions[0]!.path).toContain(`${join('abc123', 'chatSessions')}`) + }) +}) + describe('copilot provider - discoverSessions', () => { beforeEach(async () => { tmpDir = await mkdtemp(join(tmpdir(), 'copilot-test-')) @@ -450,6 +653,18 @@ describe('copilot provider - discoverSessions', () => { posix.join('/home/test', '.config', 'VSCodium', 'User', 'workspaceStorage'), ) }) + + it('includes VSCodium globalStorage paths on all supported platforms', () => { + expect(getVSCodeGlobalStorageDirs('/Users/test', 'darwin')).toContain( + posix.join('/Users/test', 'Library', 'Application Support', 'VSCodium', 'User', 'globalStorage'), + ) + expect(getVSCodeGlobalStorageDirs('C:\\Users\\test', 'win32')).toContain( + win32.join('C:\\Users\\test', 'AppData', 'Roaming', 'VSCodium', 'User', 'globalStorage'), + ) + expect(getVSCodeGlobalStorageDirs('/home/test', 'linux')).toContain( + posix.join('/home/test', '.config', 'VSCodium', 'User', 'globalStorage'), + ) + }) }) describe('copilot provider - metadata', () => {