From e30d820d6f84fc62fb6a41c9b9f335d4a180b41e Mon Sep 17 00:00:00 2001 From: samikshya-chand_data Date: Sat, 23 May 2026 20:37:09 +0530 Subject: [PATCH 1/5] Add SPOG support: x-databricks-org-id header for telemetry + feature-flag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit SPOG (Single Panel of Glass) replaces workspace-specific hostnames with account-level vanity URLs. When httpPath carries `?o=`, endpoints that don't include the workspace in their URL path (telemetry, feature flags) need the workspace conveyed via the `x-databricks-org-id` header instead. Changes: - Parse `?o=` out of httpPath in DBSQLClient.connect() and stash the org-id as `x-databricks-org-id` on a new `ClientConfig.customHeaders` field. A user-supplied `customHeaders` entry (case-insensitive) takes precedence. - DatabricksTelemetryExporter spreads `config.customHeaders` into the outgoing POST headers. Auth headers still win on collision. - FeatureFlagCache does the same for the feature-flag GET. Not applicable to this driver (vs JDBC port in databricks/databricks-jdbc#1316): - httpPath property parser fix — Node.js passes `options.path` through unmodified. - Warehouse ID regex fix for SEA — driver uses Thrift only. - DBFS Volume header injection — driver exposes no Volume API. OAuth/OIDC token requests deliberately do NOT receive customHeaders. Co-authored-by: Isaac --- lib/DBSQLClient.ts | 26 +++++++++ lib/contracts/IClientContext.ts | 11 ++++ lib/contracts/IDBSQLClient.ts | 11 ++++ lib/telemetry/DatabricksTelemetryExporter.ts | 1 + lib/telemetry/FeatureFlagCache.ts | 1 + tests/unit/DBSQLClient.test.ts | 55 +++++++++++++++++++ .../DatabricksTelemetryExporter.test.ts | 40 ++++++++++++++ tests/unit/telemetry/FeatureFlagCache.test.ts | 39 +++++++++++++ 8 files changed, 184 insertions(+) diff --git a/lib/DBSQLClient.ts b/lib/DBSQLClient.ts index 340efd0c..25d738b1 100644 --- a/lib/DBSQLClient.ts +++ b/lib/DBSQLClient.ts @@ -330,6 +330,27 @@ export default class DBSQLClient extends EventEmitter implements IDBSQLClient, I return match ? match[1] : undefined; } + /** + * Build the customHeaders map applied to telemetry POSTs and feature-flag + * GETs (SPOG / Single Panel of Glass support). When `httpPath` carries + * `?o=` — account-level vanity URL routing — endpoints that + * don't include the workspace in their path need the workspace conveyed via + * the `x-databricks-org-id` header instead. A user-supplied value in + * `options.customHeaders` (case-insensitively keyed) wins over the parsed + * value. + */ + private buildCustomHeaders(options: ConnectionOptions): Record | undefined { + const merged: Record = { ...(options.customHeaders ?? {}) }; + const hasOrgIdAlready = Object.keys(merged).some((k) => k.toLowerCase() === 'x-databricks-org-id'); + if (!hasOrgIdAlready) { + const orgId = this.extractWorkspaceId(); + if (orgId) { + merged['x-databricks-org-id'] = orgId; + } + } + return Object.keys(merged).length > 0 ? merged : undefined; + } + /** * Build driver configuration for telemetry reporting. * @returns DriverConfiguration object with current driver settings @@ -561,6 +582,11 @@ export default class DBSQLClient extends EventEmitter implements IDBSQLClient, I this.config.userAgentEntry = options.userAgentEntry; } + // SPOG: parse `?o=` out of httpPath and stash it as + // `x-databricks-org-id` for the telemetry + feature-flag clients, which + // hit endpoints that don't carry the workspace in their URL path. + this.config.customHeaders = this.buildCustomHeaders(options); + this.authProvider = this.createAuthProvider(options, authProvider); this.connectionProvider = this.createConnectionProvider(options); diff --git a/lib/contracts/IClientContext.ts b/lib/contracts/IClientContext.ts index 955b9c52..43a47745 100644 --- a/lib/contracts/IClientContext.ts +++ b/lib/contracts/IClientContext.ts @@ -52,6 +52,17 @@ export interface ClientConfig { */ telemetryFlushOnExit?: boolean; userAgentEntry?: string; + + /** + * Extra HTTP headers attached to driver-owned out-of-band requests + * (telemetry, feature flags). Populated by `DBSQLClient.connect()` from + * `ConnectionOptions.customHeaders` plus an `x-databricks-org-id` header + * derived from the `?o=` query parameter on `httpPath` when present, to + * support SPOG (Single Panel of Glass) account-level routing on endpoints + * that don't carry `?o=` in their URL path. NOT applied to Thrift or + * OAuth/OIDC requests. + */ + customHeaders?: Record; } export default interface IClientContext { diff --git a/lib/contracts/IDBSQLClient.ts b/lib/contracts/IDBSQLClient.ts index 08592219..b75a8075 100644 --- a/lib/contracts/IDBSQLClient.ts +++ b/lib/contracts/IDBSQLClient.ts @@ -55,6 +55,17 @@ export type ConnectionOptions = { proxy?: ProxyOptions; enableMetricViewMetadata?: boolean; + /** + * Extra HTTP headers attached to driver-owned out-of-band requests + * (telemetry POSTs and feature-flag GETs). Not applied to the primary + * Thrift transport or to OAuth/OIDC token requests. + * + * When `path` contains `?o=` (SPOG account-level routing), + * the driver automatically injects an `x-databricks-org-id` header unless + * one is already present in this map. + */ + customHeaders?: Record; + /** * Whether the driver emits telemetry events (connection / statement / * cloud-fetch / error). Defaults to `true`. diff --git a/lib/telemetry/DatabricksTelemetryExporter.ts b/lib/telemetry/DatabricksTelemetryExporter.ts index 65fe8b64..bfa0f1b1 100644 --- a/lib/telemetry/DatabricksTelemetryExporter.ts +++ b/lib/telemetry/DatabricksTelemetryExporter.ts @@ -249,6 +249,7 @@ export default class DatabricksTelemetryExporter { let headers: Record = { 'Content-Type': 'application/json', 'User-Agent': userAgent, + ...(config.customHeaders ?? {}), }; if (authenticatedExport) { diff --git a/lib/telemetry/FeatureFlagCache.ts b/lib/telemetry/FeatureFlagCache.ts index 47fc9e8d..e060bd93 100644 --- a/lib/telemetry/FeatureFlagCache.ts +++ b/lib/telemetry/FeatureFlagCache.ts @@ -140,6 +140,7 @@ export default class FeatureFlagCache { const headers: Record = { 'Content-Type': 'application/json', 'User-Agent': this.userAgent, + ...(this.context.getConfig().customHeaders ?? {}), ...(await this.getAuthHeaders()), }; diff --git a/tests/unit/DBSQLClient.test.ts b/tests/unit/DBSQLClient.test.ts index 184e25a3..5e2b99fe 100644 --- a/tests/unit/DBSQLClient.test.ts +++ b/tests/unit/DBSQLClient.test.ts @@ -103,6 +103,18 @@ describe('DBSQLClient.connect', () => { logSpy.restore(); }); + + it('populates config.customHeaders with org-id parsed from ?o= (SPOG)', async () => { + const client = new DBSQLClient(); + await client.connect({ ...connectOptions, path: '/sql/1.0/warehouses/abc?o=12345678901234' }); + expect(client.getConfig().customHeaders).to.deep.equal({ 'x-databricks-org-id': '12345678901234' }); + }); + + it('leaves config.customHeaders undefined when path has no ?o= and none supplied', async () => { + const client = new DBSQLClient(); + await client.connect({ ...connectOptions, path: '/sql/1.0/warehouses/abc' }); + expect(client.getConfig().customHeaders).to.be.undefined; + }); }); describe('DBSQLClient.openSession', () => { @@ -785,6 +797,49 @@ describe('DBSQLClient telemetry paths', () => { }); }); + describe('buildCustomHeaders (SPOG)', () => { + it('injects x-databricks-org-id from ?o= in httpPath', () => { + const client = new DBSQLClient(); + (client as any).httpPath = '/sql/1.0/warehouses/abc?o=12345678901234'; + const headers = (client as any).buildCustomHeaders({ path: '/sql/1.0/warehouses/abc?o=12345678901234' }); + expect(headers).to.deep.equal({ 'x-databricks-org-id': '12345678901234' }); + }); + + it('returns undefined when no ?o= and no user-supplied customHeaders', () => { + const client = new DBSQLClient(); + (client as any).httpPath = '/sql/1.0/warehouses/abc'; + const headers = (client as any).buildCustomHeaders({ path: '/sql/1.0/warehouses/abc' }); + expect(headers).to.be.undefined; + }); + + it('preserves user-supplied customHeaders alongside parsed org-id', () => { + const client = new DBSQLClient(); + (client as any).httpPath = '/sql/1.0/warehouses/abc?o=42'; + const headers = (client as any).buildCustomHeaders({ + path: '/sql/1.0/warehouses/abc?o=42', + customHeaders: { 'x-trace-id': 'tid-001' }, + }); + expect(headers).to.deep.equal({ 'x-trace-id': 'tid-001', 'x-databricks-org-id': '42' }); + }); + + it('user-supplied x-databricks-org-id wins over ?o= parsed value (case-insensitive)', () => { + const client = new DBSQLClient(); + (client as any).httpPath = '/sql/1.0/warehouses/abc?o=42'; + const headers = (client as any).buildCustomHeaders({ + path: '/sql/1.0/warehouses/abc?o=42', + customHeaders: { 'X-Databricks-Org-Id': '999' }, + }); + expect(headers).to.deep.equal({ 'X-Databricks-Org-Id': '999' }); + }); + + it('does not inject org-id when ?o= value is non-numeric', () => { + const client = new DBSQLClient(); + (client as any).httpPath = '/sql/1.0/warehouses/abc?o=tenant_xyz'; + const headers = (client as any).buildCustomHeaders({ path: '/sql/1.0/warehouses/abc?o=tenant_xyz' }); + expect(headers).to.be.undefined; + }); + }); + describe('telemetry refcount on reconnect', () => { it('releases the prior refcount when connect() is called twice', async () => { const client = new DBSQLClient(); diff --git a/tests/unit/telemetry/DatabricksTelemetryExporter.test.ts b/tests/unit/telemetry/DatabricksTelemetryExporter.test.ts index fb347bf6..c3fd099d 100644 --- a/tests/unit/telemetry/DatabricksTelemetryExporter.test.ts +++ b/tests/unit/telemetry/DatabricksTelemetryExporter.test.ts @@ -120,6 +120,46 @@ describe('DatabricksTelemetryExporter', () => { } expect(threw).to.be.false; }); + + it('should attach config.customHeaders to the POST (SPOG)', async () => { + const context = new ClientContextStub({ + customHeaders: { 'x-databricks-org-id': '12345678901234' }, + } as any); + const registry = new CircuitBreakerRegistry(context); + const exporter = new DatabricksTelemetryExporter(context, 'host.example.com', registry, fakeAuthProvider); + const sendRequestStub = sinon.stub(exporter as any, 'sendRequest').returns(makeOkResponse()); + + await exporter.export([makeMetric()]); + + const init = sendRequestStub.firstCall.args[1] as { headers: Record }; + expect(init.headers['x-databricks-org-id']).to.equal('12345678901234'); + }); + + it('auth headers win over customHeaders on key collision', async () => { + const context = new ClientContextStub({ + customHeaders: { Authorization: 'Bearer not-the-real-token' }, + } as any); + const registry = new CircuitBreakerRegistry(context); + const exporter = new DatabricksTelemetryExporter(context, 'host.example.com', registry, fakeAuthProvider); + const sendRequestStub = sinon.stub(exporter as any, 'sendRequest').returns(makeOkResponse()); + + await exporter.export([makeMetric()]); + + const init = sendRequestStub.firstCall.args[1] as { headers: Record }; + expect(init.headers.Authorization).to.equal('Bearer test-token'); + }); + + it('does not attach customHeaders when none are configured', async () => { + const context = new ClientContextStub(); + const registry = new CircuitBreakerRegistry(context); + const exporter = new DatabricksTelemetryExporter(context, 'host.example.com', registry, fakeAuthProvider); + const sendRequestStub = sinon.stub(exporter as any, 'sendRequest').returns(makeOkResponse()); + + await exporter.export([makeMetric()]); + + const init = sendRequestStub.firstCall.args[1] as { headers: Record }; + expect(init.headers).to.not.have.property('x-databricks-org-id'); + }); }); describe('export() - retry logic', () => { diff --git a/tests/unit/telemetry/FeatureFlagCache.test.ts b/tests/unit/telemetry/FeatureFlagCache.test.ts index ed7bc79c..c12ca5b1 100644 --- a/tests/unit/telemetry/FeatureFlagCache.test.ts +++ b/tests/unit/telemetry/FeatureFlagCache.test.ts @@ -317,4 +317,43 @@ describe('FeatureFlagCache', () => { fetchStub.restore(); }); }); + + describe('customHeaders propagation (SPOG)', () => { + function makeJsonResponse(body: unknown) { + return Promise.resolve({ + ok: true, + status: 200, + statusText: 'OK', + json: () => Promise.resolve(body), + text: () => Promise.resolve(''), + }); + } + + it('attaches config.customHeaders to the feature-flag GET', async () => { + const context = new ClientContextStub({ + customHeaders: { 'x-databricks-org-id': '12345678901234' }, + } as any); + const cache = new FeatureFlagCache(context); + const stub = sinon.stub(cache as any, 'fetchWithRetry').returns(makeJsonResponse({ flags: [] })); + + await (cache as any).fetchFeatureFlag('host.example.com'); + + expect(stub.calledOnce).to.be.true; + const init = stub.firstCall.args[1] as { headers: Record }; + expect(init.headers['x-databricks-org-id']).to.equal('12345678901234'); + stub.restore(); + }); + + it('does not set x-databricks-org-id when customHeaders is empty', async () => { + const context = new ClientContextStub(); + const cache = new FeatureFlagCache(context); + const stub = sinon.stub(cache as any, 'fetchWithRetry').returns(makeJsonResponse({ flags: [] })); + + await (cache as any).fetchFeatureFlag('host.example.com'); + + const init = stub.firstCall.args[1] as { headers: Record }; + expect(init.headers).to.not.have.property('x-databricks-org-id'); + stub.restore(); + }); + }); }); From 65273f12f4d348cf15ca44c4a54d13d851dd7e83 Mon Sep 17 00:00:00 2001 From: samikshya-chand_data Date: Mon, 25 May 2026 00:24:31 +0530 Subject: [PATCH 2/5] Fix close-ordering: keep context registered through final telemetry flush MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `TelemetryClientProvider.releaseClient` was calling `unregisterContext` before `client.close()`. On the last refcount, that emptied the FIFO of `(context, authProvider)` pairs before the final flush ran, so the exporter's `getAuthProvider()` walk returned undefined and the batch was dropped with "Telemetry: Authorization header missing — metrics will be dropped". Defer `unregisterContext` until after `close()` on the last refcount; multi-refcount path is unchanged (immediate unregister so surviving consumers don't reach into a closing context). Verified end-to-end against a real SPOG host: the final flush now exports its 3 metrics (connection.open, statement.start, statement.complete) instead of warning and dropping them. Co-authored-by: Isaac --- lib/telemetry/TelemetryClient.ts | 6 ++- lib/telemetry/TelemetryClientProvider.ts | 39 ++++++++++------ .../telemetry/TelemetryClientProvider.test.ts | 44 +++++++++++++++++++ 3 files changed, 74 insertions(+), 15 deletions(-) diff --git a/lib/telemetry/TelemetryClient.ts b/lib/telemetry/TelemetryClient.ts index 942a951e..07a8c73a 100644 --- a/lib/telemetry/TelemetryClient.ts +++ b/lib/telemetry/TelemetryClient.ts @@ -169,8 +169,10 @@ class TelemetryClient implements IClientContext { /** * Remove a `DBSQLClient`'s context from the pool. Called by - * `TelemetryClientProvider.releaseClient` before refcount decrement so the - * exporter doesn't keep trying to use a closed context. + * `TelemetryClientProvider.releaseClient` in the multi-refcount case so the + * exporter doesn't keep trying to use a closing context. Deliberately + * NOT called on the last refcount release: `close()` needs the snapshot + * pair to resolve auth/connection providers for the final flush. * * Uses the cached snapshot pair (`context`, `authProvider`) from register * time, not a fresh `context.getAuthProvider?.()` call. If the underlying diff --git a/lib/telemetry/TelemetryClientProvider.ts b/lib/telemetry/TelemetryClientProvider.ts index 4e7c09c0..2551ea2b 100644 --- a/lib/telemetry/TelemetryClientProvider.ts +++ b/lib/telemetry/TelemetryClientProvider.ts @@ -133,22 +133,35 @@ class TelemetryClientProvider { return; } - holder.client.unregisterContext(context); + if (holder.refCount > 1) { + // Other registrants remain — drop this context now so subsequent + // flushes by surviving consumers don't try to authenticate via a + // context whose underlying DBSQLClient is closing. + holder.client.unregisterContext(context); + holder.refCount -= 1; + logger.log(LogLevel.debug, `TelemetryClient reference count for ${host}: ${holder.refCount}`); + return; + } + + // Last refcount holder. Keep the context registered through close() + // so the final flush can still resolve `getAuthProvider()` and + // `getConnectionProvider()` from the FIFO snapshot — otherwise the + // exporter drops the final batch with "missing Authorization header". + // The TelemetryClient is fully closed below, so the lingering FIFO + // entry has no further effect. holder.refCount -= 1; logger.log(LogLevel.debug, `TelemetryClient reference count for ${host}: ${holder.refCount}`); - if (holder.refCount <= 0) { - // Remove from map BEFORE awaiting close so a concurrent - // getOrCreateClient creates a fresh instance rather than receiving - // this closing one. - this.clients.delete(key); - try { - await holder.client.close(); - logger.log(LogLevel.debug, `Closed and removed TelemetryClient for host: ${host}`); - } catch (error) { - const msg = error instanceof Error ? error.message : String(error); - logger.log(LogLevel.debug, `Error releasing TelemetryClient: ${msg}`); - } + // Remove from map BEFORE awaiting close so a concurrent + // getOrCreateClient creates a fresh instance rather than receiving + // this closing one. + this.clients.delete(key); + try { + await holder.client.close(); + logger.log(LogLevel.debug, `Closed and removed TelemetryClient for host: ${host}`); + } catch (error) { + const msg = error instanceof Error ? error.message : String(error); + logger.log(LogLevel.debug, `Error releasing TelemetryClient: ${msg}`); } } diff --git a/tests/unit/telemetry/TelemetryClientProvider.test.ts b/tests/unit/telemetry/TelemetryClientProvider.test.ts index 00f638fe..6ffa4db2 100644 --- a/tests/unit/telemetry/TelemetryClientProvider.test.ts +++ b/tests/unit/telemetry/TelemetryClientProvider.test.ts @@ -267,6 +267,50 @@ describe('TelemetryClientProvider', () => { expect(second.isClosed()).to.be.false; expect(provider.getRefCount(HOST1)).to.equal(1); }); + + it('keeps the context registered through close() on last refcount', async () => { + // Regression: releaseClient used to call unregisterContext before + // close(), which dropped the only FIFO entry and left the final + // flush without an auth provider — metrics were dropped with + // "missing Authorization header". + const context = new ClientContextStub(); + const provider = new TelemetryClientProvider(); + + const client = provider.getOrCreateClient(context, HOST1); + let authProviderAtClose: unknown = 'unset'; + sinon.stub(client, 'close').callsFake(async () => { + // The TelemetryClient is its own IClientContext. While close() runs, + // getAuthProvider() must still resolve (the FIFO entry survives). + authProviderAtClose = client.getAuthProvider?.(); + }); + + await provider.releaseClient(context, HOST1); + + // The FIFO walk hits the (still-registered) context; the stub's + // getAuthProvider returns undefined but the lookup itself completes. + // What we're really asserting is that the FIFO wasn't pre-emptied: + // pre-fix, this assertion would not even fire because close() runs + // against an empty FIFO and the auth-provider walk short-circuits + // before close()'s callsFake. Here we verify the spy ran AND the + // context is still registered at that moment. + expect(authProviderAtClose).to.not.equal('unset'); + expect((client as any).contexts.length).to.equal(1); + }); + + it('drops the context immediately when other refcounts remain', async () => { + const context = new ClientContextStub(); + const provider = new TelemetryClientProvider(); + + const client = provider.getOrCreateClient(context, HOST1); + provider.getOrCreateClient(context, HOST1); // refcount=2 + + await provider.releaseClient(context, HOST1); + + // Multi-refcount path: unregisterContext runs immediately; the + // (single) FIFO entry tracking this context was removed. + expect((client as any).contexts.length).to.equal(0); + expect(provider.getRefCount(HOST1)).to.equal(1); + }); }); describe('Host normalization', () => { From 66ce012de0549d335413ad8188e97eae866045ef Mon Sep 17 00:00:00 2001 From: samikshya-chand_data Date: Mon, 25 May 2026 00:30:17 +0530 Subject: [PATCH 3/5] Simplify close-ordering fix Tighten the previous commit to the minimal diff: guard the existing unregisterContext call instead of restructuring the function. Restores the unchanged comment on TelemetryClient.unregisterContext. Co-authored-by: Isaac --- lib/telemetry/TelemetryClient.ts | 6 ++-- lib/telemetry/TelemetryClientProvider.ts | 37 +++++++++--------------- 2 files changed, 16 insertions(+), 27 deletions(-) diff --git a/lib/telemetry/TelemetryClient.ts b/lib/telemetry/TelemetryClient.ts index 07a8c73a..942a951e 100644 --- a/lib/telemetry/TelemetryClient.ts +++ b/lib/telemetry/TelemetryClient.ts @@ -169,10 +169,8 @@ class TelemetryClient implements IClientContext { /** * Remove a `DBSQLClient`'s context from the pool. Called by - * `TelemetryClientProvider.releaseClient` in the multi-refcount case so the - * exporter doesn't keep trying to use a closing context. Deliberately - * NOT called on the last refcount release: `close()` needs the snapshot - * pair to resolve auth/connection providers for the final flush. + * `TelemetryClientProvider.releaseClient` before refcount decrement so the + * exporter doesn't keep trying to use a closed context. * * Uses the cached snapshot pair (`context`, `authProvider`) from register * time, not a fresh `context.getAuthProvider?.()` call. If the underlying diff --git a/lib/telemetry/TelemetryClientProvider.ts b/lib/telemetry/TelemetryClientProvider.ts index 2551ea2b..eb873cde 100644 --- a/lib/telemetry/TelemetryClientProvider.ts +++ b/lib/telemetry/TelemetryClientProvider.ts @@ -133,35 +133,26 @@ class TelemetryClientProvider { return; } + // Skip unregister on the last release so close()'s final flush can still + // resolve auth/connection providers from the FIFO snapshot. if (holder.refCount > 1) { - // Other registrants remain — drop this context now so subsequent - // flushes by surviving consumers don't try to authenticate via a - // context whose underlying DBSQLClient is closing. holder.client.unregisterContext(context); - holder.refCount -= 1; - logger.log(LogLevel.debug, `TelemetryClient reference count for ${host}: ${holder.refCount}`); - return; } - - // Last refcount holder. Keep the context registered through close() - // so the final flush can still resolve `getAuthProvider()` and - // `getConnectionProvider()` from the FIFO snapshot — otherwise the - // exporter drops the final batch with "missing Authorization header". - // The TelemetryClient is fully closed below, so the lingering FIFO - // entry has no further effect. holder.refCount -= 1; logger.log(LogLevel.debug, `TelemetryClient reference count for ${host}: ${holder.refCount}`); - // Remove from map BEFORE awaiting close so a concurrent - // getOrCreateClient creates a fresh instance rather than receiving - // this closing one. - this.clients.delete(key); - try { - await holder.client.close(); - logger.log(LogLevel.debug, `Closed and removed TelemetryClient for host: ${host}`); - } catch (error) { - const msg = error instanceof Error ? error.message : String(error); - logger.log(LogLevel.debug, `Error releasing TelemetryClient: ${msg}`); + if (holder.refCount <= 0) { + // Remove from map BEFORE awaiting close so a concurrent + // getOrCreateClient creates a fresh instance rather than receiving + // this closing one. + this.clients.delete(key); + try { + await holder.client.close(); + logger.log(LogLevel.debug, `Closed and removed TelemetryClient for host: ${host}`); + } catch (error) { + const msg = error instanceof Error ? error.message : String(error); + logger.log(LogLevel.debug, `Error releasing TelemetryClient: ${msg}`); + } } } From 3d21c114cbfabd50ee38b3d25f61200e1162727f Mon Sep 17 00:00:00 2001 From: samikshya-chand_data Date: Mon, 25 May 2026 20:49:31 +0530 Subject: [PATCH 4/5] Address review feedback: pure header builder, SPOG logs, all-purpose path form - buildCustomHeaders(httpPath, userHeaders) and extractWorkspaceId(httpPath): SPOG-routing inputs now in the signature instead of read off this.httpPath. extractWorkspaceId is static. Drops the (client as any).httpPath test workaround. Fixes the hidden ordering dependency flagged in PR review [M1]. - Log SPOG header injection (debug), caller-supplied override (debug), and non-numeric workspace ID (warn). Closes JDBC parity gap so oncall has a grep handle when SPOG routing misbehaves [H2]. - extractWorkspaceId now also matches the all-purpose cluster path form /o// in addition to the warehouse query form ?o=. Verified end-to-end against a real all-purpose cluster on a SPOG host. Co-authored-by: Isaac Signed-off-by: samikshya-chand_data --- lib/DBSQLClient.ts | 93 +++++++++++++++++------- tests/unit/DBSQLClient.test.ts | 127 +++++++++++++++++++++++++-------- 2 files changed, 167 insertions(+), 53 deletions(-) diff --git a/lib/DBSQLClient.ts b/lib/DBSQLClient.ts index 25d738b1..85ce05ca 100644 --- a/lib/DBSQLClient.ts +++ b/lib/DBSQLClient.ts @@ -298,9 +298,9 @@ export default class DBSQLClient extends EventEmitter implements IDBSQLClient, I /** * Extract the numeric workspace ID for telemetry. * - * The only reliable carrier in the connection params today is the `?o=N` - * query parameter on `httpPath` — Databricks SQL warehouses are typically - * connected to via paths like `/sql/1.0/warehouses/?o=12345678901234`. + * Two URL shapes carry the workspace ID today: + * - Warehouse, query form: `/sql/1.0/warehouses/?o=` + * - All-purpose cluster, path form: `sql/protocolv1/o//` * * Host-based extraction was tried previously but produced confidently-wrong * values: @@ -313,39 +313,84 @@ export default class DBSQLClient extends EventEmitter implements IDBSQLClient, I * Returns `undefined` when no workspace ID can be derived. Server-side * attribution is better off seeing a missing field than a wrong value. */ - private extractWorkspaceId(): string | undefined { - const { httpPath } = this; + private static extractWorkspaceId(httpPath: string | undefined): string | undefined { if (!httpPath) { return undefined; } const queryIdx = httpPath.indexOf('?'); - if (queryIdx < 0) { - return undefined; + // Warehouse form: `?o=` in the query string. + if (queryIdx >= 0) { + const query = httpPath.slice(queryIdx + 1); + // Match `o=` as the first param, an inner `&o=`, etc. + // Workspace IDs are decimal integers; reject anything else so a stray + // `o=tenant_42` doesn't ship as a workspace ID. + const queryMatch = query.match(/(?:^|&)o=(\d+)(?:&|$)/); + if (queryMatch) { + return queryMatch[1]; + } } - const query = httpPath.slice(queryIdx + 1); - // Match `o=` as the first param, an inner `&o=`, etc. - // Workspace IDs are decimal integers; reject anything else so a stray - // `o=tenant_42` doesn't ship as a workspace ID. - const match = query.match(/(?:^|&)o=(\d+)(?:&|$)/); - return match ? match[1] : undefined; + // All-purpose cluster form: `/o//` as a path segment. + const pathOnly = queryIdx >= 0 ? httpPath.slice(0, queryIdx) : httpPath; + const pathMatch = pathOnly.match(/(?:^|\/)o\/(\d+)(?:\/|$)/); + return pathMatch ? pathMatch[1] : undefined; + } + + // Detects an `o=` or `/o/` where `` is present but + // non-numeric, so the caller can warn instead of silently dropping a + // malformed workspace param. + private static hasMalformedOrgParam(httpPath: string | undefined): boolean { + if (!httpPath) { + return false; + } + const queryIdx = httpPath.indexOf('?'); + if (queryIdx >= 0) { + const query = httpPath.slice(queryIdx + 1); + const hasOrg = /(?:^|&)o=/.test(query); + const hasNumericOrg = /(?:^|&)o=\d+(?:&|$)/.test(query); + if (hasOrg && !hasNumericOrg) { + return true; + } + } + const pathOnly = queryIdx >= 0 ? httpPath.slice(0, queryIdx) : httpPath; + const hasPathOrg = /(?:^|\/)o\/[^/]+/.test(pathOnly); + const hasNumericPathOrg = /(?:^|\/)o\/\d+(?:\/|$)/.test(pathOnly); + return hasPathOrg && !hasNumericPathOrg; } /** * Build the customHeaders map applied to telemetry POSTs and feature-flag - * GETs (SPOG / Single Panel of Glass support). When `httpPath` carries - * `?o=` — account-level vanity URL routing — endpoints that - * don't include the workspace in their path need the workspace conveyed via + * GETs (SPOG / Single Panel of Glass support). When `httpPath` carries a + * workspace ID — either as a `?o=` query (warehouse) or a + * `/o//` path segment (all-purpose cluster) — endpoints + * that don't include the workspace in their URL path need it conveyed via * the `x-databricks-org-id` header instead. A user-supplied value in - * `options.customHeaders` (case-insensitively keyed) wins over the parsed - * value. + * `userHeaders` (case-insensitively keyed) wins over the parsed value. + * + * `httpPath` is passed explicitly (rather than read off `this.httpPath`) so + * the SPOG-routing dependency is visible in the signature — a future + * refactor that reorders connect() can't silently break injection. */ - private buildCustomHeaders(options: ConnectionOptions): Record | undefined { - const merged: Record = { ...(options.customHeaders ?? {}) }; + private buildCustomHeaders( + httpPath: string | undefined, + userHeaders: Record | undefined, + ): Record | undefined { + const merged: Record = { ...(userHeaders ?? {}) }; const hasOrgIdAlready = Object.keys(merged).some((k) => k.toLowerCase() === 'x-databricks-org-id'); - if (!hasOrgIdAlready) { - const orgId = this.extractWorkspaceId(); + if (hasOrgIdAlready) { + this.logger.log( + LogLevel.debug, + 'SPOG: x-databricks-org-id supplied by caller; not extracting from httpPath', + ); + } else { + const orgId = DBSQLClient.extractWorkspaceId(httpPath); if (orgId) { merged['x-databricks-org-id'] = orgId; + this.logger.log(LogLevel.debug, `SPOG: injecting x-databricks-org-id=${orgId} (extracted from httpPath)`); + } else if (DBSQLClient.hasMalformedOrgParam(httpPath)) { + this.logger.log( + LogLevel.warn, + 'SPOG: httpPath contains non-numeric workspace ID; x-databricks-org-id not injected', + ); } } return Object.keys(merged).length > 0 ? merged : undefined; @@ -585,7 +630,7 @@ export default class DBSQLClient extends EventEmitter implements IDBSQLClient, I // SPOG: parse `?o=` out of httpPath and stash it as // `x-databricks-org-id` for the telemetry + feature-flag clients, which // hit endpoints that don't carry the workspace in their URL path. - this.config.customHeaders = this.buildCustomHeaders(options); + this.config.customHeaders = this.buildCustomHeaders(options.path, options.customHeaders); this.authProvider = this.createAuthProvider(options, authProvider); @@ -703,7 +748,7 @@ export default class DBSQLClient extends EventEmitter implements IDBSQLClient, I safeEmit(this, (emitter) => { if (!this.host) return; const latencyMs = Date.now() - startTime; - const workspaceId = this.extractWorkspaceId(); + const workspaceId = DBSQLClient.extractWorkspaceId(this.httpPath); const driverConfig = this.driverConfigShipped ? undefined : this.buildDriverConfiguration(); if (driverConfig) { this.driverConfigShipped = true; diff --git a/tests/unit/DBSQLClient.test.ts b/tests/unit/DBSQLClient.test.ts index 5e2b99fe..1563989d 100644 --- a/tests/unit/DBSQLClient.test.ts +++ b/tests/unit/DBSQLClient.test.ts @@ -767,77 +767,146 @@ describe('DBSQLClient telemetry paths', () => { describe('extractWorkspaceId', () => { it('returns the numeric o= param from httpPath', () => { - const client = new DBSQLClient(); - (client as any).httpPath = '/sql/1.0/warehouses/abc?o=12345678901234'; - const id = (client as any).extractWorkspaceId(); + const id = (DBSQLClient as any).extractWorkspaceId('/sql/1.0/warehouses/abc?o=12345678901234'); expect(id).to.equal('12345678901234'); }); it('returns undefined when no query string', () => { - const client = new DBSQLClient(); - (client as any).httpPath = '/sql/1.0/warehouses/abc'; - expect((client as any).extractWorkspaceId()).to.be.undefined; + expect((DBSQLClient as any).extractWorkspaceId('/sql/1.0/warehouses/abc')).to.be.undefined; }); it('returns undefined when o= is not numeric', () => { - const client = new DBSQLClient(); - (client as any).httpPath = '/sql/1.0/warehouses/abc?o=tenant_xyz'; - expect((client as any).extractWorkspaceId()).to.be.undefined; + expect((DBSQLClient as any).extractWorkspaceId('/sql/1.0/warehouses/abc?o=tenant_xyz')).to.be.undefined; }); it('handles o= as a non-first param', () => { - const client = new DBSQLClient(); - (client as any).httpPath = '/sql/1.0/warehouses/abc?foo=bar&o=42&baz=qux'; - expect((client as any).extractWorkspaceId()).to.equal('42'); + expect((DBSQLClient as any).extractWorkspaceId('/sql/1.0/warehouses/abc?foo=bar&o=42&baz=qux')).to.equal('42'); }); it('returns undefined when httpPath is unset', () => { - const client = new DBSQLClient(); - expect((client as any).extractWorkspaceId()).to.be.undefined; + expect((DBSQLClient as any).extractWorkspaceId(undefined)).to.be.undefined; + }); + + it('returns the numeric workspace id from all-purpose cluster path form', () => { + expect( + (DBSQLClient as any).extractWorkspaceId('sql/protocolv1/o/99999999999999/0101-000000-aaaaaaaa'), + ).to.equal('99999999999999'); + }); + + it('returns the numeric workspace id from all-purpose cluster path with leading slash', () => { + expect( + (DBSQLClient as any).extractWorkspaceId('/sql/protocolv1/o/12345/0101-000000-aaaaaaaa'), + ).to.equal('12345'); + }); + + it('returns undefined when all-purpose cluster path has non-numeric workspace segment', () => { + expect( + (DBSQLClient as any).extractWorkspaceId('sql/protocolv1/o/tenant_xyz/0101-000000-aaaaaaaa'), + ).to.be.undefined; + }); + + it('prefers ?o= query form over /o/ path form when both are present', () => { + expect( + (DBSQLClient as any).extractWorkspaceId('sql/protocolv1/o/111/cluster?o=222'), + ).to.equal('222'); }); }); describe('buildCustomHeaders (SPOG)', () => { it('injects x-databricks-org-id from ?o= in httpPath', () => { const client = new DBSQLClient(); - (client as any).httpPath = '/sql/1.0/warehouses/abc?o=12345678901234'; - const headers = (client as any).buildCustomHeaders({ path: '/sql/1.0/warehouses/abc?o=12345678901234' }); + const headers = (client as any).buildCustomHeaders('/sql/1.0/warehouses/abc?o=12345678901234', undefined); expect(headers).to.deep.equal({ 'x-databricks-org-id': '12345678901234' }); }); it('returns undefined when no ?o= and no user-supplied customHeaders', () => { const client = new DBSQLClient(); - (client as any).httpPath = '/sql/1.0/warehouses/abc'; - const headers = (client as any).buildCustomHeaders({ path: '/sql/1.0/warehouses/abc' }); + const headers = (client as any).buildCustomHeaders('/sql/1.0/warehouses/abc', undefined); expect(headers).to.be.undefined; }); it('preserves user-supplied customHeaders alongside parsed org-id', () => { const client = new DBSQLClient(); - (client as any).httpPath = '/sql/1.0/warehouses/abc?o=42'; - const headers = (client as any).buildCustomHeaders({ - path: '/sql/1.0/warehouses/abc?o=42', - customHeaders: { 'x-trace-id': 'tid-001' }, - }); + const headers = (client as any).buildCustomHeaders('/sql/1.0/warehouses/abc?o=42', { 'x-trace-id': 'tid-001' }); expect(headers).to.deep.equal({ 'x-trace-id': 'tid-001', 'x-databricks-org-id': '42' }); }); it('user-supplied x-databricks-org-id wins over ?o= parsed value (case-insensitive)', () => { const client = new DBSQLClient(); - (client as any).httpPath = '/sql/1.0/warehouses/abc?o=42'; - const headers = (client as any).buildCustomHeaders({ - path: '/sql/1.0/warehouses/abc?o=42', - customHeaders: { 'X-Databricks-Org-Id': '999' }, + const headers = (client as any).buildCustomHeaders('/sql/1.0/warehouses/abc?o=42', { + 'X-Databricks-Org-Id': '999', }); expect(headers).to.deep.equal({ 'X-Databricks-Org-Id': '999' }); }); it('does not inject org-id when ?o= value is non-numeric', () => { const client = new DBSQLClient(); - (client as any).httpPath = '/sql/1.0/warehouses/abc?o=tenant_xyz'; - const headers = (client as any).buildCustomHeaders({ path: '/sql/1.0/warehouses/abc?o=tenant_xyz' }); + const headers = (client as any).buildCustomHeaders('/sql/1.0/warehouses/abc?o=tenant_xyz', undefined); expect(headers).to.be.undefined; }); + + it('injects x-databricks-org-id from all-purpose cluster path form', () => { + const client = new DBSQLClient(); + const headers = (client as any).buildCustomHeaders( + 'sql/protocolv1/o/99999999999999/0101-000000-aaaaaaaa', + undefined, + ); + expect(headers).to.deep.equal({ 'x-databricks-org-id': '99999999999999' }); + }); + + it('logs a warning when workspace ID segment is non-numeric (path form)', () => { + const client = new DBSQLClient(); + const logSpy = sinon.spy((client as any).logger, 'log'); + try { + (client as any).buildCustomHeaders('sql/protocolv1/o/tenant_xyz/cluster', undefined); + const warnCalls = logSpy.getCalls().filter((c) => c.args[0] === LogLevel.warn); + expect(warnCalls).to.have.lengthOf(1); + expect(warnCalls[0].args[1]).to.match(/non-numeric workspace ID/); + } finally { + logSpy.restore(); + } + }); + + it('logs a warning when ?o= is present but non-numeric', () => { + const client = new DBSQLClient(); + const logSpy = sinon.spy((client as any).logger, 'log'); + try { + (client as any).buildCustomHeaders('/sql/1.0/warehouses/abc?o=tenant_xyz', undefined); + const warnCalls = logSpy.getCalls().filter((c) => c.args[0] === LogLevel.warn); + expect(warnCalls).to.have.lengthOf(1); + expect(warnCalls[0].args[1]).to.match(/non-numeric workspace ID/); + } finally { + logSpy.restore(); + } + }); + + it('logs a debug line when injecting org-id from httpPath', () => { + const client = new DBSQLClient(); + const logSpy = sinon.spy((client as any).logger, 'log'); + try { + (client as any).buildCustomHeaders('/sql/1.0/warehouses/abc?o=42', undefined); + const injectLog = logSpy + .getCalls() + .find((c) => c.args[0] === LogLevel.debug && /injecting x-databricks-org-id=42/.test(String(c.args[1]))); + expect(injectLog, 'expected SPOG inject debug log').to.exist; + } finally { + logSpy.restore(); + } + }); + + it('logs a debug line when caller supplies x-databricks-org-id', () => { + const client = new DBSQLClient(); + const logSpy = sinon.spy((client as any).logger, 'log'); + try { + (client as any).buildCustomHeaders('/sql/1.0/warehouses/abc?o=42', { 'x-databricks-org-id': '999' }); + const callerLog = logSpy + .getCalls() + .find((c) => c.args[0] === LogLevel.debug && /supplied by caller/.test(String(c.args[1]))); + expect(callerLog, 'expected SPOG caller-supplied debug log').to.exist; + } finally { + logSpy.restore(); + } + }); }); describe('telemetry refcount on reconnect', () => { From dadc2af7cd222498f2342b1d06bea2da3e837380 Mon Sep 17 00:00:00 2001 From: samikshya-chand_data Date: Mon, 25 May 2026 21:00:39 +0530 Subject: [PATCH 5/5] Apply prettier formatting Auto-fix from `prettier --write` on the two files touched by the previous commit; no logic changes. Co-authored-by: Isaac Signed-off-by: samikshya-chand_data --- lib/DBSQLClient.ts | 5 +---- tests/unit/DBSQLClient.test.ts | 19 +++++++------------ 2 files changed, 8 insertions(+), 16 deletions(-) diff --git a/lib/DBSQLClient.ts b/lib/DBSQLClient.ts index 85ce05ca..b35e0d41 100644 --- a/lib/DBSQLClient.ts +++ b/lib/DBSQLClient.ts @@ -377,10 +377,7 @@ export default class DBSQLClient extends EventEmitter implements IDBSQLClient, I const merged: Record = { ...(userHeaders ?? {}) }; const hasOrgIdAlready = Object.keys(merged).some((k) => k.toLowerCase() === 'x-databricks-org-id'); if (hasOrgIdAlready) { - this.logger.log( - LogLevel.debug, - 'SPOG: x-databricks-org-id supplied by caller; not extracting from httpPath', - ); + this.logger.log(LogLevel.debug, 'SPOG: x-databricks-org-id supplied by caller; not extracting from httpPath'); } else { const orgId = DBSQLClient.extractWorkspaceId(httpPath); if (orgId) { diff --git a/tests/unit/DBSQLClient.test.ts b/tests/unit/DBSQLClient.test.ts index 1563989d..1b04fe7e 100644 --- a/tests/unit/DBSQLClient.test.ts +++ b/tests/unit/DBSQLClient.test.ts @@ -788,27 +788,22 @@ describe('DBSQLClient telemetry paths', () => { }); it('returns the numeric workspace id from all-purpose cluster path form', () => { - expect( - (DBSQLClient as any).extractWorkspaceId('sql/protocolv1/o/99999999999999/0101-000000-aaaaaaaa'), - ).to.equal('99999999999999'); + expect((DBSQLClient as any).extractWorkspaceId('sql/protocolv1/o/99999999999999/0101-000000-aaaaaaaa')).to.equal( + '99999999999999', + ); }); it('returns the numeric workspace id from all-purpose cluster path with leading slash', () => { - expect( - (DBSQLClient as any).extractWorkspaceId('/sql/protocolv1/o/12345/0101-000000-aaaaaaaa'), - ).to.equal('12345'); + expect((DBSQLClient as any).extractWorkspaceId('/sql/protocolv1/o/12345/0101-000000-aaaaaaaa')).to.equal('12345'); }); it('returns undefined when all-purpose cluster path has non-numeric workspace segment', () => { - expect( - (DBSQLClient as any).extractWorkspaceId('sql/protocolv1/o/tenant_xyz/0101-000000-aaaaaaaa'), - ).to.be.undefined; + expect((DBSQLClient as any).extractWorkspaceId('sql/protocolv1/o/tenant_xyz/0101-000000-aaaaaaaa')).to.be + .undefined; }); it('prefers ?o= query form over /o/ path form when both are present', () => { - expect( - (DBSQLClient as any).extractWorkspaceId('sql/protocolv1/o/111/cluster?o=222'), - ).to.equal('222'); + expect((DBSQLClient as any).extractWorkspaceId('sql/protocolv1/o/111/cluster?o=222')).to.equal('222'); }); });