From 6b3affd40bd6dfa4054f6d1b47e610a636d73315 Mon Sep 17 00:00:00 2001 From: Madhavendra Rathore Date: Fri, 15 May 2026 00:48:48 +0000 Subject: [PATCH 01/11] sea-napi-binding: scaffold native/sea/ crate with version() smoke test Creates the napi-rs binding skeleton: Cargo.toml + lib.rs + module stubs for database/connection/statement/result/error/logger. Captures napi-rs tokio Handle via OnceCell in runtime.rs. Single working #[napi] fn version() proves the binding loads + executes end-to-end in Node. Depends on krn-async-public-api branch (path dep on kernel). Round 2 will add open/execute/fetch methods. --- lib/sea/SeaNativeLoader.ts | 59 +++++++ native/sea/.gitignore | 7 + native/sea/Cargo.toml | 54 ++++++ native/sea/build.rs | 17 ++ native/sea/index.d.ts | 60 +++++++ native/sea/index.js | 317 +++++++++++++++++++++++++++++++++++ native/sea/package.json | 23 +++ native/sea/src/connection.rs | 51 ++++++ native/sea/src/database.rs | 99 +++++++++++ native/sea/src/error.rs | 45 +++++ native/sea/src/lib.rs | 43 +++++ native/sea/src/logger.rs | 17 ++ native/sea/src/result.rs | 18 ++ native/sea/src/runtime.rs | 56 +++++++ native/sea/src/statement.rs | 20 +++ package.json | 2 + tests/native/version.test.ts | 40 +++++ 17 files changed, 928 insertions(+) create mode 100644 lib/sea/SeaNativeLoader.ts create mode 100644 native/sea/.gitignore create mode 100644 native/sea/Cargo.toml create mode 100644 native/sea/build.rs create mode 100644 native/sea/index.d.ts create mode 100644 native/sea/index.js create mode 100644 native/sea/package.json create mode 100644 native/sea/src/connection.rs create mode 100644 native/sea/src/database.rs create mode 100644 native/sea/src/error.rs create mode 100644 native/sea/src/lib.rs create mode 100644 native/sea/src/logger.rs create mode 100644 native/sea/src/result.rs create mode 100644 native/sea/src/runtime.rs create mode 100644 native/sea/src/statement.rs create mode 100644 tests/native/version.test.ts diff --git a/lib/sea/SeaNativeLoader.ts b/lib/sea/SeaNativeLoader.ts new file mode 100644 index 00000000..638ca6dc --- /dev/null +++ b/lib/sea/SeaNativeLoader.ts @@ -0,0 +1,59 @@ +// Copyright (c) 2026 Databricks, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/** + * Loader for the SEA (Statement Execution API) native binding. + * + * Round 1b: minimal pass-through to the napi-rs auto-generated + * `index.js` shim in `native/sea/`. The shim itself picks the right + * per-platform `.node` artifact (linux-x64-gnu today; more triples in + * the bundling feature). + * + * Round 2+ will extend this with: lazy require to defer the `.node` + * load until the first SEA call, structured load-error diagnostics + * (which platform/arch was attempted, whether the package was + * installed at all), and a JS-side `DBSQLLogger` install path that + * forwards to the binding's `installLogger()` once that surface lands. + */ + +// The path is relative to this file at runtime (`dist/sea/SeaNativeLoader.js`) +// resolving to `dist/sea/../../native/sea/index.js` once `tsc` has emitted +// to `dist/`. We use a require-time path resolution because the napi +// shim is plain CommonJS and not part of the TS source tree. +// +// eslint-disable-next-line @typescript-eslint/no-var-requires, import/no-dynamic-require, global-require +const native = require('../../native/sea/index.js'); + +export interface SeaNativeBinding { + /** Returns the native crate version (smoke test for the binding's load path). */ + version(): string; +} + +/** + * Returns the loaded native binding. Throws if the platform-specific + * `.node` artifact cannot be found (napi-rs's auto-generated shim + * surfaces a descriptive error in that case). + */ +export function getSeaNative(): SeaNativeBinding { + return native as SeaNativeBinding; +} + +/** + * Convenience accessor for the smoke-test path. Equivalent to + * `getSeaNative().version()` but reads more naturally in tests and + * REPLs. + */ +export function version(): string { + return getSeaNative().version(); +} diff --git a/native/sea/.gitignore b/native/sea/.gitignore new file mode 100644 index 00000000..92ba58de --- /dev/null +++ b/native/sea/.gitignore @@ -0,0 +1,7 @@ +# Rust build artifacts +target/ +Cargo.lock + +# Platform-specific `.node` binaries are produced per-platform by the +# bundling feature; not committed. +*.node diff --git a/native/sea/Cargo.toml b/native/sea/Cargo.toml new file mode 100644 index 00000000..d5c49046 --- /dev/null +++ b/native/sea/Cargo.toml @@ -0,0 +1,54 @@ +# Copyright (c) 2026 Databricks, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +[package] +name = "databricks-sea-native" +version = "0.1.0" +edition = "2021" +authors = ["Databricks"] +license = "Apache-2.0" +description = "Databricks SQL Node.js SEA native binding (napi-rs)" +publish = false + +[lib] +crate-type = ["cdylib"] + +[dependencies] +# napi-rs v2 line; `napi6` enables N-API 6 surface, `async` enables the +# `#[napi] async fn` glue that drives futures on napi-rs's tokio runtime. +napi = { version = "2", default-features = false, features = ["napi6", "async"] } +napi-derive = "2" + +# Kernel — path dep on the async-public-api branch worktree. Once the +# kernel is published this becomes a version dep. +databricks-sql-kernel = { path = "../../../../databricks-sql-kernel-sea-WT/async-public-api" } + +# Tokio is a transitive dep via the kernel and via napi's `async` feature; +# declared explicitly so we can name `tokio::runtime::Handle` directly. +tokio = { version = "1", default-features = false, features = ["rt"] } + +# Lazy `OnceCell` for the captured tokio Handle. +once_cell = "1" + +# Tracing for kernel + binding diagnostics. The real subscriber is wired +# in Round 3 via the ThreadsafeFunction logger bridge. +tracing = "0.1" +tracing-subscriber = { version = "0.3", default-features = false, features = ["fmt"] } + +[build-dependencies] +napi-build = "2" + +[profile.release] +lto = true +strip = "symbols" diff --git a/native/sea/build.rs b/native/sea/build.rs new file mode 100644 index 00000000..398bb2da --- /dev/null +++ b/native/sea/build.rs @@ -0,0 +1,17 @@ +// Copyright (c) 2026 Databricks, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +fn main() { + napi_build::setup(); +} diff --git a/native/sea/index.d.ts b/native/sea/index.d.ts new file mode 100644 index 00000000..202deddd --- /dev/null +++ b/native/sea/index.d.ts @@ -0,0 +1,60 @@ +/* tslint:disable */ +/* eslint-disable */ + +/* auto-generated by NAPI-RS */ + +/** + * JS-visible connection options. Empty in Round 1b; Round 2 may add + * per-connection scope fields (catalog, schema, session config map). + */ +export interface ConnectionOptions { + +} +/** + * JS-visible constructor options. Round 2 will populate this with + * real fields (host, warehouseId, auth, …); for the scaffold it is + * intentionally empty so the JS smoke test can call `new Database({})` + * without TypeScript complaining about unknown properties. + */ +export interface DatabaseOptions { + /** + * Workspace host URL (e.g. `https://workspace.databricks.com`). + * Optional in Round 1b; Round 2 makes it required. + */ + host?: string + /** Warehouse id. Optional in Round 1b; Round 2 makes it required. */ + warehouseId?: string +} +/** + * Returns the native binding's crate version (`CARGO_PKG_VERSION`). + * + * Acts as the round-1b smoke test: a JS `require()` of the `.node` + * artifact that successfully calls `version()` proves the binding's + * build + load + dispatch path is wired correctly. + */ +export declare function version(): string +/** Opaque connection handle. Round 1b: marker only; no kernel state. */ +export declare class Connection { + /** + * Construct a new connection handle. Round 1b is a no-op shell; + * Round 2 will wire it to `Database`'s `Session` (likely via an + * async `Database::connect()` factory rather than a JS-side + * `new Connection()`). + */ + constructor(options: ConnectionOptions) +} +/** + * Opaque database handle on the JS side. + * + * Holds `Option` so `close()` (Round 2) can `.take()` the + * session out and `.await` an async close, leaving `inner = None`. + * The `Drop` impl checks `inner` to decide whether to schedule a + * fire-and-forget close on the captured tokio runtime. + */ +export declare class Database { + /** + * Construct a new database handle. Round 1b: the options are + * stashed for diagnostic purposes only — no network call. + */ + constructor(options: DatabaseOptions) +} diff --git a/native/sea/index.js b/native/sea/index.js new file mode 100644 index 00000000..6818d29b --- /dev/null +++ b/native/sea/index.js @@ -0,0 +1,317 @@ +/* tslint:disable */ +/* eslint-disable */ +/* prettier-ignore */ + +/* auto-generated by NAPI-RS */ + +const { existsSync, readFileSync } = require('fs') +const { join } = require('path') + +const { platform, arch } = process + +let nativeBinding = null +let localFileExisted = false +let loadError = null + +function isMusl() { + // For Node 10 + if (!process.report || typeof process.report.getReport !== 'function') { + try { + const lddPath = require('child_process').execSync('which ldd').toString().trim() + return readFileSync(lddPath, 'utf8').includes('musl') + } catch (e) { + return true + } + } else { + const { glibcVersionRuntime } = process.report.getReport().header + return !glibcVersionRuntime + } +} + +switch (platform) { + case 'android': + switch (arch) { + case 'arm64': + localFileExisted = existsSync(join(__dirname, 'index.android-arm64.node')) + try { + if (localFileExisted) { + nativeBinding = require('./index.android-arm64.node') + } else { + nativeBinding = require('@databricks/sea-native-linux-x64-gnu-android-arm64') + } + } catch (e) { + loadError = e + } + break + case 'arm': + localFileExisted = existsSync(join(__dirname, 'index.android-arm-eabi.node')) + try { + if (localFileExisted) { + nativeBinding = require('./index.android-arm-eabi.node') + } else { + nativeBinding = require('@databricks/sea-native-linux-x64-gnu-android-arm-eabi') + } + } catch (e) { + loadError = e + } + break + default: + throw new Error(`Unsupported architecture on Android ${arch}`) + } + break + case 'win32': + switch (arch) { + case 'x64': + localFileExisted = existsSync( + join(__dirname, 'index.win32-x64-msvc.node') + ) + try { + if (localFileExisted) { + nativeBinding = require('./index.win32-x64-msvc.node') + } else { + nativeBinding = require('@databricks/sea-native-linux-x64-gnu-win32-x64-msvc') + } + } catch (e) { + loadError = e + } + break + case 'ia32': + localFileExisted = existsSync( + join(__dirname, 'index.win32-ia32-msvc.node') + ) + try { + if (localFileExisted) { + nativeBinding = require('./index.win32-ia32-msvc.node') + } else { + nativeBinding = require('@databricks/sea-native-linux-x64-gnu-win32-ia32-msvc') + } + } catch (e) { + loadError = e + } + break + case 'arm64': + localFileExisted = existsSync( + join(__dirname, 'index.win32-arm64-msvc.node') + ) + try { + if (localFileExisted) { + nativeBinding = require('./index.win32-arm64-msvc.node') + } else { + nativeBinding = require('@databricks/sea-native-linux-x64-gnu-win32-arm64-msvc') + } + } catch (e) { + loadError = e + } + break + default: + throw new Error(`Unsupported architecture on Windows: ${arch}`) + } + break + case 'darwin': + localFileExisted = existsSync(join(__dirname, 'index.darwin-universal.node')) + try { + if (localFileExisted) { + nativeBinding = require('./index.darwin-universal.node') + } else { + nativeBinding = require('@databricks/sea-native-linux-x64-gnu-darwin-universal') + } + break + } catch {} + switch (arch) { + case 'x64': + localFileExisted = existsSync(join(__dirname, 'index.darwin-x64.node')) + try { + if (localFileExisted) { + nativeBinding = require('./index.darwin-x64.node') + } else { + nativeBinding = require('@databricks/sea-native-linux-x64-gnu-darwin-x64') + } + } catch (e) { + loadError = e + } + break + case 'arm64': + localFileExisted = existsSync( + join(__dirname, 'index.darwin-arm64.node') + ) + try { + if (localFileExisted) { + nativeBinding = require('./index.darwin-arm64.node') + } else { + nativeBinding = require('@databricks/sea-native-linux-x64-gnu-darwin-arm64') + } + } catch (e) { + loadError = e + } + break + default: + throw new Error(`Unsupported architecture on macOS: ${arch}`) + } + break + case 'freebsd': + if (arch !== 'x64') { + throw new Error(`Unsupported architecture on FreeBSD: ${arch}`) + } + localFileExisted = existsSync(join(__dirname, 'index.freebsd-x64.node')) + try { + if (localFileExisted) { + nativeBinding = require('./index.freebsd-x64.node') + } else { + nativeBinding = require('@databricks/sea-native-linux-x64-gnu-freebsd-x64') + } + } catch (e) { + loadError = e + } + break + case 'linux': + switch (arch) { + case 'x64': + if (isMusl()) { + localFileExisted = existsSync( + join(__dirname, 'index.linux-x64-musl.node') + ) + try { + if (localFileExisted) { + nativeBinding = require('./index.linux-x64-musl.node') + } else { + nativeBinding = require('@databricks/sea-native-linux-x64-gnu-linux-x64-musl') + } + } catch (e) { + loadError = e + } + } else { + localFileExisted = existsSync( + join(__dirname, 'index.linux-x64-gnu.node') + ) + try { + if (localFileExisted) { + nativeBinding = require('./index.linux-x64-gnu.node') + } else { + nativeBinding = require('@databricks/sea-native-linux-x64-gnu-linux-x64-gnu') + } + } catch (e) { + loadError = e + } + } + break + case 'arm64': + if (isMusl()) { + localFileExisted = existsSync( + join(__dirname, 'index.linux-arm64-musl.node') + ) + try { + if (localFileExisted) { + nativeBinding = require('./index.linux-arm64-musl.node') + } else { + nativeBinding = require('@databricks/sea-native-linux-x64-gnu-linux-arm64-musl') + } + } catch (e) { + loadError = e + } + } else { + localFileExisted = existsSync( + join(__dirname, 'index.linux-arm64-gnu.node') + ) + try { + if (localFileExisted) { + nativeBinding = require('./index.linux-arm64-gnu.node') + } else { + nativeBinding = require('@databricks/sea-native-linux-x64-gnu-linux-arm64-gnu') + } + } catch (e) { + loadError = e + } + } + break + case 'arm': + if (isMusl()) { + localFileExisted = existsSync( + join(__dirname, 'index.linux-arm-musleabihf.node') + ) + try { + if (localFileExisted) { + nativeBinding = require('./index.linux-arm-musleabihf.node') + } else { + nativeBinding = require('@databricks/sea-native-linux-x64-gnu-linux-arm-musleabihf') + } + } catch (e) { + loadError = e + } + } else { + localFileExisted = existsSync( + join(__dirname, 'index.linux-arm-gnueabihf.node') + ) + try { + if (localFileExisted) { + nativeBinding = require('./index.linux-arm-gnueabihf.node') + } else { + nativeBinding = require('@databricks/sea-native-linux-x64-gnu-linux-arm-gnueabihf') + } + } catch (e) { + loadError = e + } + } + break + case 'riscv64': + if (isMusl()) { + localFileExisted = existsSync( + join(__dirname, 'index.linux-riscv64-musl.node') + ) + try { + if (localFileExisted) { + nativeBinding = require('./index.linux-riscv64-musl.node') + } else { + nativeBinding = require('@databricks/sea-native-linux-x64-gnu-linux-riscv64-musl') + } + } catch (e) { + loadError = e + } + } else { + localFileExisted = existsSync( + join(__dirname, 'index.linux-riscv64-gnu.node') + ) + try { + if (localFileExisted) { + nativeBinding = require('./index.linux-riscv64-gnu.node') + } else { + nativeBinding = require('@databricks/sea-native-linux-x64-gnu-linux-riscv64-gnu') + } + } catch (e) { + loadError = e + } + } + break + case 's390x': + localFileExisted = existsSync( + join(__dirname, 'index.linux-s390x-gnu.node') + ) + try { + if (localFileExisted) { + nativeBinding = require('./index.linux-s390x-gnu.node') + } else { + nativeBinding = require('@databricks/sea-native-linux-x64-gnu-linux-s390x-gnu') + } + } catch (e) { + loadError = e + } + break + default: + throw new Error(`Unsupported architecture on Linux: ${arch}`) + } + break + default: + throw new Error(`Unsupported OS: ${platform}, architecture: ${arch}`) +} + +if (!nativeBinding) { + if (loadError) { + throw loadError + } + throw new Error(`Failed to load native binding`) +} + +const { Connection, Database, version } = nativeBinding + +module.exports.Connection = Connection +module.exports.Database = Database +module.exports.version = version diff --git a/native/sea/package.json b/native/sea/package.json new file mode 100644 index 00000000..96d116dd --- /dev/null +++ b/native/sea/package.json @@ -0,0 +1,23 @@ +{ + "name": "@databricks/sea-native-linux-x64-gnu", + "version": "0.1.0", + "description": "Databricks SQL Node.js SEA native binding (linux-x64-gnu).", + "main": "index.js", + "types": "index.d.ts", + "files": [ + "index.js", + "index.d.ts", + "*.node" + ], + "license": "Apache-2.0", + "engines": { + "node": ">=14.0.0" + }, + "napi": { + "binaryName": "sea-native", + "targets": [ + "x86_64-unknown-linux-gnu" + ] + }, + "private": true +} diff --git a/native/sea/src/connection.rs b/native/sea/src/connection.rs new file mode 100644 index 00000000..ad9df612 --- /dev/null +++ b/native/sea/src/connection.rs @@ -0,0 +1,51 @@ +// Copyright (c) 2026 Databricks, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Opaque `Connection` wrapper. +//! +//! Round 1b: scaffold only. The kernel collapses ADBC's per-connection +//! state into the `Session` handle held by `Database` (see +//! `database.rs`). The JS-side `Connection` exists for API parity with +//! the existing Node driver but is currently a thin marker; Round 2 +//! decides whether to keep it as a pass-through on `Database` or to +//! attach per-connection scoping (e.g. default catalog/schema overrides). + +/// JS-visible connection options. Empty in Round 1b; Round 2 may add +/// per-connection scope fields (catalog, schema, session config map). +#[napi(object)] +pub struct ConnectionOptions {} + +/// Opaque connection handle. Round 1b: marker only; no kernel state. +#[napi] +pub struct Connection {} + +#[napi] +impl Connection { + /// Construct a new connection handle. Round 1b is a no-op shell; + /// Round 2 will wire it to `Database`'s `Session` (likely via an + /// async `Database::connect()` factory rather than a JS-side + /// `new Connection()`). + #[napi(constructor)] + pub fn new(_options: ConnectionOptions) -> Self { + Connection {} + } +} + +impl Drop for Connection { + fn drop(&mut self) { + // Round 1b: nothing to clean up. Round 2 will populate this + // with the same `runtime::get_handle().spawn(...)` pattern as + // `Database::drop`. + } +} diff --git a/native/sea/src/database.rs b/native/sea/src/database.rs new file mode 100644 index 00000000..800ca090 --- /dev/null +++ b/native/sea/src/database.rs @@ -0,0 +1,99 @@ +// Copyright (c) 2026 Databricks, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Opaque `Database` wrapper around the kernel's `Session` handle. +//! +//! Round 1b: scaffold only — `constructor` stores options and returns +//! immediately. Round 2 will add `open()` (calling `Session::open`), +//! `statement()`, `close()`, etc. +//! +//! The kernel collapses ADBC's `Database` + `Connection` into a single +//! `Session`. We keep the wrapper name `Database` on the JS side +//! because that matches the existing Node driver's mental model; the +//! actual session lives inside this struct. + +use databricks_sql_kernel::Session; + +use crate::runtime; + +/// JS-visible constructor options. Round 2 will populate this with +/// real fields (host, warehouseId, auth, …); for the scaffold it is +/// intentionally empty so the JS smoke test can call `new Database({})` +/// without TypeScript complaining about unknown properties. +#[napi(object)] +pub struct DatabaseOptions { + /// Workspace host URL (e.g. `https://workspace.databricks.com`). + /// Optional in Round 1b; Round 2 makes it required. + pub host: Option, + /// Warehouse id. Optional in Round 1b; Round 2 makes it required. + pub warehouse_id: Option, +} + +/// Opaque database handle on the JS side. +/// +/// Holds `Option` so `close()` (Round 2) can `.take()` the +/// session out and `.await` an async close, leaving `inner = None`. +/// The `Drop` impl checks `inner` to decide whether to schedule a +/// fire-and-forget close on the captured tokio runtime. +#[napi] +pub struct Database { + // TODO(round-2): populate this from `Session::open(config).await` + // inside an `open()` async method (or directly inside the + // constructor via a factory pattern). For now it stays `None` so + // Drop has nothing to clean up. + inner: Option, +} + +#[napi] +impl Database { + /// Construct a new database handle. Round 1b: the options are + /// stashed for diagnostic purposes only — no network call. + #[napi(constructor)] + pub fn new(_options: DatabaseOptions) -> Self { + Database { inner: None } + } +} + +impl Drop for Database { + fn drop(&mut self) { + // Pattern #5 from the napi-rs patterns doc: spawn cleanup on + // the captured runtime handle. We only enter this branch if + // the JS user dropped the handle without calling `close()` + // first (which Round 2 will provide). For Round 1b there is + // nothing to clean up, but the pattern is in place so the + // Round-2 work is a one-line addition. + let Some(session) = self.inner.take() else { + return; + }; + let Some(handle) = runtime::try_get_handle() else { + // No async entry point has ever run, so there cannot be a + // live `Session` either — but the destructor of `Session` + // itself uses the kernel's own borrowed handle, so we + // simply let it run. + drop(session); + return; + }; + // The kernel's `SessionInner::Drop` already spawns a + // fire-and-forget `delete_session` on its own captured runtime + // handle. To stay on napi-rs's runtime explicitly (so Round 2 + // can add binding-side cleanup steps before the kernel drop), + // hop onto a tokio task and let the kernel destructor run + // there. We do NOT call `Session::close().await` because that + // method enters a tracing span (`EnteredSpan` is `!Send`) and + // therefore cannot cross an `await` boundary inside a `spawn`. + handle.spawn(async move { + drop(session); + }); + } +} diff --git a/native/sea/src/error.rs b/native/sea/src/error.rs new file mode 100644 index 00000000..fc82a0b4 --- /dev/null +++ b/native/sea/src/error.rs @@ -0,0 +1,45 @@ +// Copyright (c) 2026 Databricks, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Minimal kernel-error → `napi::Error` mapping. +//! +//! Round 1b: just preserves the kernel error message and translates +//! the kernel's [`ErrorCode`] into a small set of napi statuses. Round +//! 2 will add a full taxonomy (sqlState, vendorCode, retryable, …) +//! attached as own-properties on the JS error object via +//! `Env::create_error` (pattern #7 in the napi-rs patterns doc). + +use databricks_sql_kernel::{Error as KernelError, ErrorCode}; +use napi::{Error as NapiError, Status}; + +/// Map a kernel `Error` into a `napi::Error`. The kernel `ErrorCode` +/// is used to pick a sensible napi `Status`; the kernel message is +/// preserved verbatim as the error reason. +/// +/// Round 1b has no callers — the scaffold doesn't return any kernel +/// errors yet. Round 2's `Database::open()` is the first consumer. +#[allow(dead_code)] +pub(crate) fn napi_err_from_kernel(e: KernelError) -> NapiError { + let status = match e.code { + ErrorCode::InvalidArgument | ErrorCode::InvalidStatementHandle => { + Status::InvalidArg + } + ErrorCode::Cancelled => Status::Cancelled, + // Everything else collapses to `GenericFailure`; Round 2 + // refines this with sqlState / vendorCode / category own- + // properties on a JS error object. + _ => Status::GenericFailure, + }; + NapiError::new(status, e.message) +} diff --git a/native/sea/src/lib.rs b/native/sea/src/lib.rs new file mode 100644 index 00000000..7d76cf9b --- /dev/null +++ b/native/sea/src/lib.rs @@ -0,0 +1,43 @@ +// Copyright (c) 2026 Databricks, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! `databricks-sea-native` — napi-rs binding crate for the Databricks +//! SQL Node.js driver's SEA (Statement Execution API) path. +//! +//! Round 1b scaffold: module skeletons + a single working `version()` +//! `#[napi]` function that proves the binding loads end-to-end. Round 2 +//! adds `Database::open` / `Statement::execute` / fetch / cancel. + +#![deny(unsafe_op_in_unsafe_fn)] + +#[macro_use] +extern crate napi_derive; + +pub(crate) mod connection; +pub(crate) mod database; +pub(crate) mod error; +pub(crate) mod logger; +pub(crate) mod result; +pub(crate) mod runtime; +pub(crate) mod statement; + +/// Returns the native binding's crate version (`CARGO_PKG_VERSION`). +/// +/// Acts as the round-1b smoke test: a JS `require()` of the `.node` +/// artifact that successfully calls `version()` proves the binding's +/// build + load + dispatch path is wired correctly. +#[napi] +pub fn version() -> String { + env!("CARGO_PKG_VERSION").to_string() +} diff --git a/native/sea/src/logger.rs b/native/sea/src/logger.rs new file mode 100644 index 00000000..2bfcd078 --- /dev/null +++ b/native/sea/src/logger.rs @@ -0,0 +1,17 @@ +// Copyright (c) 2026 Databricks, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! `tracing` → JS `DBSQLLogger` bridge via `ThreadsafeFunction`. +//! +//! Round 3 work. Empty in Round 1b. diff --git a/native/sea/src/result.rs b/native/sea/src/result.rs new file mode 100644 index 00000000..f406c363 --- /dev/null +++ b/native/sea/src/result.rs @@ -0,0 +1,18 @@ +// Copyright (c) 2026 Databricks, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! ResultStream wrapper. +//! +//! Round 2 work. Empty in Round 1b — see `statement.rs` for the same +//! reasoning. diff --git a/native/sea/src/runtime.rs b/native/sea/src/runtime.rs new file mode 100644 index 00000000..7f0ee42d --- /dev/null +++ b/native/sea/src/runtime.rs @@ -0,0 +1,56 @@ +// Copyright (c) 2026 Databricks, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Captured tokio `Handle` for napi-rs's process-global runtime. +//! +//! Per the napi-rs patterns doc (pattern #2): the first time any +//! `#[napi] async fn` runs, we are guaranteed to be on napi-rs's tokio +//! runtime. We snapshot the current `Handle` then and stash a clone in +//! a process-static `OnceCell`. Every subsequent kernel construction +//! reads the captured handle and hands a clone to the kernel, so +//! Drop-time cleanup (which runs on the V8 GC thread, *outside* any +//! tokio context) can still `spawn` cleanup tasks onto the same +//! runtime napi-rs is driving. +//! +//! `Handle::current()` MUST NOT be called from a synchronous JS-thread +//! entry point or from module init — both run before napi-rs has +//! constructed its runtime and would panic. `get()` returns `None` in +//! that case so callers can surface a useful error rather than abort. + +use once_cell::sync::OnceCell; +use tokio::runtime::Handle; + +static RUNTIME_HANDLE: OnceCell = OnceCell::new(); + +/// Capture the current tokio runtime handle on first call, return a +/// reference to the captured clone on subsequent calls. +/// +/// MUST be called from inside a `#[napi] async fn` body (or any other +/// tokio runtime context); otherwise `Handle::current()` panics on the +/// very first call. Subsequent calls are infallible and lock-free. +/// +/// Round 1b has no async entry points that exercise this yet; Round 2 +/// will call it from `Database::open()` and other `#[napi] async fn`s. +#[allow(dead_code)] +pub(crate) fn get_handle() -> &'static Handle { + RUNTIME_HANDLE.get_or_init(Handle::current) +} + +/// Non-panicking accessor — returns `None` if `get_handle()` has not +/// been called yet. Drop impls and other GC-thread call sites use this +/// to short-circuit cleanup when no async entry point has ever run +/// (i.e. there is no kernel state that needs closing either). +pub(crate) fn try_get_handle() -> Option<&'static Handle> { + RUNTIME_HANDLE.get() +} diff --git a/native/sea/src/statement.rs b/native/sea/src/statement.rs new file mode 100644 index 00000000..c449b402 --- /dev/null +++ b/native/sea/src/statement.rs @@ -0,0 +1,20 @@ +// Copyright (c) 2026 Databricks, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Statement / ExecutedStatement wrappers. +//! +//! Round 2 work. This module is intentionally empty in Round 1b — no +//! `#[napi]` types here yet. Adding empty stubs would require +//! `napi-rs` to generate JS bindings for them, which adds noise to the +//! `index.d.ts` without any callable surface. diff --git a/package.json b/package.json index e430181f..14d4d200 100644 --- a/package.json +++ b/package.json @@ -17,6 +17,8 @@ "test": "nyc --report-dir=${NYC_REPORT_DIR:-coverage_unit} mocha --config tests/unit/.mocharc.js", "update-version": "node bin/update-version.js && prettier --write ./lib/version.ts", "build": "npm run update-version && tsc --project tsconfig.build.json", + "build:native": "cd native/sea && napi build --platform --release", + "build:native:debug": "cd native/sea && napi build --platform", "watch": "tsc --project tsconfig.build.json --watch", "type-check": "tsc --noEmit", "prettier": "prettier . --check", diff --git a/tests/native/version.test.ts b/tests/native/version.test.ts new file mode 100644 index 00000000..03210c3c --- /dev/null +++ b/tests/native/version.test.ts @@ -0,0 +1,40 @@ +// Copyright (c) 2026 Databricks, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import { expect } from 'chai'; +import { version, getSeaNative } from '../../lib/sea/SeaNativeLoader'; + +describe('SEA native binding — smoke test', () => { + it('loads the .node artifact and returns version()', () => { + const v = version(); + // Round 1b: the native crate is at 0.1.0. Match the shape rather + // than the literal so the test does not need updating on every + // version bump. + expect(v).to.match(/^\d+\.\d+\.\d+$/); + }); + + it('exposes the Database opaque class', () => { + const binding = getSeaNative() as unknown as { Database: new (opts: object) => object }; + expect(typeof binding.Database).to.equal('function'); + const db = new binding.Database({}); + expect(db).to.be.an('object'); + }); + + it('exposes the Connection opaque class', () => { + const binding = getSeaNative() as unknown as { Connection: new (opts: object) => object }; + expect(typeof binding.Connection).to.equal('function'); + const conn = new binding.Connection({}); + expect(conn).to.be.an('object'); + }); +}); From f7cdb801a7d26aec5e0c7bcc6a02b3def9031478 Mon Sep 17 00:00:00 2001 From: Madhavendra Rathore Date: Fri, 15 May 2026 01:02:43 +0000 Subject: [PATCH 02/11] =?UTF-8?q?sea-errors-logging:=20kernel=20ErrorCode?= =?UTF-8?q?=20=E2=86=92=20JS=20error=20class=20mapping?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Single mapping function in lib/sea/SeaErrorMapping.ts converts the napi-binding's surfaced kernel error (code+message+sqlstate) to the appropriate existing JS error class. M0 minimum: PAT auth errors land as AuthenticationError; cancel/timeout as OperationStateError; network/internal as HiveDriverError. SQLSTATE preserved on the error object via .sqlState property. No new error classes. M1 may add nuance. --- lib/sea/SeaErrorMapping.ts | 141 +++++++++++++++++ tests/unit/sea/error-mapping.test.ts | 227 +++++++++++++++++++++++++++ 2 files changed, 368 insertions(+) create mode 100644 lib/sea/SeaErrorMapping.ts create mode 100644 tests/unit/sea/error-mapping.test.ts diff --git a/lib/sea/SeaErrorMapping.ts b/lib/sea/SeaErrorMapping.ts new file mode 100644 index 00000000..7e8a5534 --- /dev/null +++ b/lib/sea/SeaErrorMapping.ts @@ -0,0 +1,141 @@ +import HiveDriverError from '../errors/HiveDriverError'; +import AuthenticationError from '../errors/AuthenticationError'; +import OperationStateError, { OperationStateErrorCode } from '../errors/OperationStateError'; +import ParameterError from '../errors/ParameterError'; + +/** + * Shape of the kernel error surfaced by the napi-binding's `napi_err_from_kernel`. + * + * The Rust kernel's `kernel_error::Error` is exposed as a `JsError` whose + * properties mirror the Rust struct: the `ErrorCode` variant name (as a string), + * the message, and an optional SQLSTATE (either taken from the structured + * server response or recovered via `extract_sqlstate_from_message`). + */ +export interface KernelErrorShape { + /** Kernel `ErrorCode` variant name, e.g. `"Unauthenticated"`, `"SqlError"`. */ + code: string; + /** Human-readable error message. */ + message: string; + /** Optional SQLSTATE — five-char alphanumeric, when the kernel was able to surface it. */ + sqlstate?: string; +} + +/** + * Kernel `ErrorCode` variants — the 13 variants of the `#[non_exhaustive]` enum + * defined in `src/kernel_error.rs:66-134`. + * + * Kept here as a literal type rather than an `enum` so test exhaustiveness checks + * and runtime `code` strings are guaranteed to stay in lockstep with the kernel. + */ +export type KernelErrorCode = + | 'InvalidArgument' + | 'Unauthenticated' + | 'PermissionDenied' + | 'NotFound' + | 'ResourceExhausted' + | 'Unavailable' + | 'Timeout' + | 'Cancelled' + | 'DataLoss' + | 'Internal' + | 'InvalidStatementHandle' + | 'NetworkError' + | 'SqlError'; + +/** + * An `Error` with a preserved SQLSTATE on the `sqlState` property. Used as the + * narrowed return type of {@link mapKernelErrorToJsError} so callers that need + * the SQLSTATE can `error.sqlState` without an `any` cast. + */ +export interface ErrorWithSqlState extends Error { + sqlState?: string; +} + +/** + * Attach the kernel's SQLSTATE to the JS error object via the `sqlState` property. + * The driver has no pre-existing `sqlState` convention (no other error class + * sets it today) so this single helper defines it for the SEA path. + */ +function attachSqlState(error: ErrorWithSqlState, sqlstate?: string): ErrorWithSqlState { + if (sqlstate !== undefined) { + // Using Object.defineProperty so the property is non-enumerable but still + // visible via direct access — matches the way Node attaches `.code` to system errors. + Object.defineProperty(error, 'sqlState', { + value: sqlstate, + writable: true, + enumerable: false, + configurable: true, + }); + } + return error; +} + +/** + * Map a kernel error (as surfaced by the napi-binding) to the appropriate JS + * driver error class. + * + * M0 mapping table: + * Unauthenticated, PermissionDenied → AuthenticationError + * Cancelled → OperationStateError(Canceled) + * Timeout → OperationStateError(Timeout) + * InvalidArgument → ParameterError + * NetworkError, Unavailable, + * NotFound, ResourceExhausted, + * DataLoss, Internal, + * InvalidStatementHandle, SqlError → HiveDriverError + * + * Unknown `code` values (e.g. if the kernel adds a new variant) fall through + * to HiveDriverError so the driver never silently drops an error. The kernel's + * `ErrorCode` is `#[non_exhaustive]` so this can legitimately happen. + * + * SQLSTATE, when present, is attached on `error.sqlState` regardless of which + * class is returned. + */ +export function mapKernelErrorToJsError(kErr: KernelErrorShape): ErrorWithSqlState { + const { code, message, sqlstate } = kErr; + + let error: ErrorWithSqlState; + + switch (code as KernelErrorCode) { + case 'Unauthenticated': + case 'PermissionDenied': + error = new AuthenticationError(message); + break; + + case 'Cancelled': + // OperationStateError with the Canceled code carries the kernel message + // through the response.displayMessage fallback path. + error = new OperationStateError(OperationStateErrorCode.Canceled); + error.message = message; + break; + + case 'Timeout': + error = new OperationStateError(OperationStateErrorCode.Timeout); + error.message = message; + break; + + case 'InvalidArgument': + error = new ParameterError(message); + break; + + // All remaining kernel ErrorCode variants map to the base driver error class. + // M0 intentionally does not introduce new error classes; M1 may add nuance. + case 'NotFound': + case 'ResourceExhausted': + case 'Unavailable': + case 'DataLoss': + case 'Internal': + case 'InvalidStatementHandle': + case 'NetworkError': + case 'SqlError': + error = new HiveDriverError(message); + break; + + default: + // Unknown/future kernel variant — never drop the error, surface as base class. + error = new HiveDriverError(message); + break; + } + + return attachSqlState(error, sqlstate); +} diff --git a/tests/unit/sea/error-mapping.test.ts b/tests/unit/sea/error-mapping.test.ts new file mode 100644 index 00000000..8331bc57 --- /dev/null +++ b/tests/unit/sea/error-mapping.test.ts @@ -0,0 +1,227 @@ +import { expect } from 'chai'; +import { + mapKernelErrorToJsError, + KernelErrorCode, + KernelErrorShape, +} from '../../../lib/sea/SeaErrorMapping'; +import HiveDriverError from '../../../lib/errors/HiveDriverError'; +import AuthenticationError from '../../../lib/errors/AuthenticationError'; +import OperationStateError, { + OperationStateErrorCode, +} from '../../../lib/errors/OperationStateError'; +import ParameterError from '../../../lib/errors/ParameterError'; + +describe('SeaErrorMapping.mapKernelErrorToJsError', () => { + // The 13 kernel ErrorCode variants — kept in sync with src/kernel_error.rs:66-134. + // Tabular driver: each row is (kernel code, expected class, optional extra assertion). + type Case = { + code: KernelErrorCode; + expectedClass: Function; + extra?: (err: Error) => void; + }; + + const cases: Array = [ + { + code: 'InvalidArgument', + expectedClass: ParameterError, + }, + { + code: 'Unauthenticated', + expectedClass: AuthenticationError, + }, + { + code: 'PermissionDenied', + expectedClass: AuthenticationError, + }, + { + code: 'NotFound', + expectedClass: HiveDriverError, + }, + { + code: 'ResourceExhausted', + expectedClass: HiveDriverError, + }, + { + code: 'Unavailable', + expectedClass: HiveDriverError, + }, + { + code: 'Timeout', + expectedClass: OperationStateError, + extra: (err) => { + expect((err as OperationStateError).errorCode).to.equal(OperationStateErrorCode.Timeout); + }, + }, + { + code: 'Cancelled', + expectedClass: OperationStateError, + extra: (err) => { + expect((err as OperationStateError).errorCode).to.equal(OperationStateErrorCode.Canceled); + }, + }, + { + code: 'DataLoss', + expectedClass: HiveDriverError, + }, + { + code: 'Internal', + expectedClass: HiveDriverError, + }, + { + code: 'InvalidStatementHandle', + expectedClass: HiveDriverError, + }, + { + code: 'NetworkError', + expectedClass: HiveDriverError, + }, + { + code: 'SqlError', + expectedClass: HiveDriverError, + }, + ]; + + it('covers all 13 kernel ErrorCode variants', () => { + // Guardrail: if the kernel adds a variant, KernelErrorCode in TS will gain + // a literal — this test then fails because the new variant has no case row. + // (Drift is caught at the test level since the union itself is an inline literal.) + expect(cases).to.have.lengthOf(13); + }); + + cases.forEach(({ code, expectedClass, extra }) => { + it(`maps ${code} to ${expectedClass.name}`, () => { + const kErr: KernelErrorShape = { + code, + message: `kernel ${code} message`, + }; + + const err = mapKernelErrorToJsError(kErr); + + expect(err).to.be.instanceOf(expectedClass); + expect(err.message).to.equal(`kernel ${code} message`); + if (extra) { + extra(err); + } + }); + }); + + describe('SQLSTATE preservation', () => { + it('attaches sqlState when present on the kernel error', () => { + const err = mapKernelErrorToJsError({ + code: 'SqlError', + message: 'syntax error', + sqlstate: '42000', + }); + + expect(err).to.be.instanceOf(HiveDriverError); + expect(err.sqlState).to.equal('42000'); + }); + + it('does not set sqlState when absent', () => { + const err = mapKernelErrorToJsError({ + code: 'Internal', + message: 'boom', + }); + + expect(err.sqlState).to.be.undefined; + }); + + it('preserves sqlState on AuthenticationError', () => { + const err = mapKernelErrorToJsError({ + code: 'Unauthenticated', + message: 'invalid token', + sqlstate: '28000', + }); + + expect(err).to.be.instanceOf(AuthenticationError); + expect(err.sqlState).to.equal('28000'); + }); + + it('preserves sqlState on OperationStateError', () => { + const err = mapKernelErrorToJsError({ + code: 'Timeout', + message: 'deadline exceeded', + sqlstate: 'HYT01', + }); + + expect(err).to.be.instanceOf(OperationStateError); + expect((err as OperationStateError).errorCode).to.equal(OperationStateErrorCode.Timeout); + expect(err.sqlState).to.equal('HYT01'); + }); + + it('preserves sqlState on ParameterError', () => { + const err = mapKernelErrorToJsError({ + code: 'InvalidArgument', + message: 'bad param', + sqlstate: 'HY009', + }); + + expect(err).to.be.instanceOf(ParameterError); + expect(err.sqlState).to.equal('HY009'); + }); + + it('attaches sqlState as a non-enumerable property', () => { + const err = mapKernelErrorToJsError({ + code: 'SqlError', + message: 'oops', + sqlstate: '42000', + }); + + const descriptor = Object.getOwnPropertyDescriptor(err, 'sqlState'); + expect(descriptor).to.exist; + expect(descriptor!.enumerable).to.equal(false); + expect(descriptor!.writable).to.equal(true); + expect(descriptor!.configurable).to.equal(true); + }); + }); + + describe('unknown / future kernel codes', () => { + it('falls back to HiveDriverError for an unrecognised code', () => { + const err = mapKernelErrorToJsError({ + code: 'SomeFutureVariantThatDoesNotExist', + message: 'forward-compat message', + }); + + // Never silently drop — must surface as the base driver class. + expect(err).to.be.instanceOf(HiveDriverError); + expect(err.message).to.equal('forward-compat message'); + }); + + it('still preserves sqlState on a fallback HiveDriverError', () => { + const err = mapKernelErrorToJsError({ + code: 'BrandNewVariant', + message: 'with sqlstate', + sqlstate: '01004', + }); + + expect(err).to.be.instanceOf(HiveDriverError); + expect(err.sqlState).to.equal('01004'); + }); + }); + + describe('returned errors compose with try/catch', () => { + it('thrown errors are catchable as Error', () => { + function thrower() { + throw mapKernelErrorToJsError({ code: 'Internal', message: 'kaboom' }); + } + + expect(thrower).to.throw(Error, 'kaboom'); + expect(thrower).to.throw(HiveDriverError, 'kaboom'); + }); + + it('AuthenticationError thrown is also instanceOf HiveDriverError', () => { + // AuthenticationError extends HiveDriverError — preserve that hierarchy. + const err = mapKernelErrorToJsError({ code: 'Unauthenticated', message: 'nope' }); + expect(err).to.be.instanceOf(AuthenticationError); + expect(err).to.be.instanceOf(HiveDriverError); + expect(err).to.be.instanceOf(Error); + }); + + it('ParameterError does NOT extend HiveDriverError (matches existing class hierarchy)', () => { + const err = mapKernelErrorToJsError({ code: 'InvalidArgument', message: 'bad' }); + expect(err).to.be.instanceOf(ParameterError); + expect(err).to.not.be.instanceOf(HiveDriverError); + expect(err).to.be.instanceOf(Error); + }); + }); +}); From 40d0b57784c5eed2b73dec59750da4bb3df7a26d Mon Sep 17 00:00:00 2001 From: Madhavendra Rathore Date: Fri, 15 May 2026 01:21:30 +0000 Subject: [PATCH 03/11] sea-napi-binding: Database/Connection/Statement/ResultStream methods wired MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds real async methods on the opaque wrappers backing M0: - openSession (free function) with PAT → kernel Session - Connection::execute_statement → kernel ExecutedStatement - Statement::fetch_next_batch / schema / cancel / close → kernel ResultStream - Arrow batches returned as IPC bytes (per Layer 2 design) - Error mapping preserves kernel ErrorCode + SQLSTATE for TS layer - All entry points wrapped in catch_unwind End-to-end smoke test against pecotesting passes. No new dependencies beyond arrow-{ipc,array,schema} + futures. Uses kernel async public API (no block_on). Co-authored-by: Isaac --- lib/sea/SeaNativeLoader.ts | 22 ++++ native/sea/Cargo.toml | 19 +++- native/sea/index.d.ts | 148 ++++++++++++++++++------ native/sea/index.js | 5 +- native/sea/src/connection.rs | 161 ++++++++++++++++++++++---- native/sea/src/database.rs | 135 ++++++++++------------ native/sea/src/error.rs | 152 +++++++++++++++++++++---- native/sea/src/lib.rs | 13 ++- native/sea/src/result.rs | 24 +++- native/sea/src/statement.rs | 202 ++++++++++++++++++++++++++++++++- native/sea/src/util.rs | 62 ++++++++++ tests/native/e2e-smoke.test.ts | 106 +++++++++++++++++ tests/native/version.test.ts | 20 ++-- 13 files changed, 891 insertions(+), 178 deletions(-) create mode 100644 native/sea/src/util.rs create mode 100644 tests/native/e2e-smoke.test.ts diff --git a/lib/sea/SeaNativeLoader.ts b/lib/sea/SeaNativeLoader.ts index 638ca6dc..c66cdf33 100644 --- a/lib/sea/SeaNativeLoader.ts +++ b/lib/sea/SeaNativeLoader.ts @@ -35,9 +35,31 @@ // eslint-disable-next-line @typescript-eslint/no-var-requires, import/no-dynamic-require, global-require const native = require('../../native/sea/index.js'); +/** + * Public surface of the native binding exposed to the rest of the + * NodeJS driver. Round 2 lands `openSession` + opaque `Connection` / + * `Statement` classes (the binding-generated `.d.ts` is the source of + * truth for their method signatures — see `native/sea/index.d.ts`). + * + * We deliberately keep this typed loosely (`unknown` for the class + * shapes) so the loader layer doesn't have to import the binding's + * generated types and the JS adapter layer can introduce its own + * higher-level wrappers without conflicting with the binding's TS + * declarations. + */ export interface SeaNativeBinding { /** Returns the native crate version (smoke test for the binding's load path). */ version(): string; + /** Open a session over PAT auth. Returns an opaque Connection. */ + openSession(opts: { + hostName: string; + httpPath: string; + token: string; + }): Promise; + /** Opaque Connection class — instance methods on the binding-generated d.ts. */ + Connection: Function; + /** Opaque Statement class — instance methods on the binding-generated d.ts. */ + Statement: Function; } /** diff --git a/native/sea/Cargo.toml b/native/sea/Cargo.toml index d5c49046..c69fb93a 100644 --- a/native/sea/Cargo.toml +++ b/native/sea/Cargo.toml @@ -35,8 +35,9 @@ napi-derive = "2" databricks-sql-kernel = { path = "../../../../databricks-sql-kernel-sea-WT/async-public-api" } # Tokio is a transitive dep via the kernel and via napi's `async` feature; -# declared explicitly so we can name `tokio::runtime::Handle` directly. -tokio = { version = "1", default-features = false, features = ["rt"] } +# declared explicitly so we can name `tokio::runtime::Handle` and +# `tokio::sync::Mutex` directly. +tokio = { version = "1", default-features = false, features = ["rt", "sync"] } # Lazy `OnceCell` for the captured tokio Handle. once_cell = "1" @@ -46,6 +47,20 @@ once_cell = "1" tracing = "0.1" tracing-subscriber = { version = "0.3", default-features = false, features = ["fmt"] } +# `catch_unwind` wrapper around async futures (pattern #8 of the +# napi-rs patterns doc). Transitively a dep of the kernel already, but +# declared here so we can `use FutureExt;` directly. +futures = { version = "0.3", default-features = false, features = ["std"] } + +# Arrow IPC encoding of result batches across the napi boundary. +# `arrow-array` / `arrow-schema` come in via the kernel's public types +# (`RecordBatch`, `SchemaRef`); `arrow-ipc` is for the `StreamWriter` +# we use on the encode side. Versions kept in lock-step with the +# kernel's `arrow-*` deps to avoid two arrow versions in the dep graph. +arrow-array = "57" +arrow-schema = "57" +arrow-ipc = "57" + [build-dependencies] napi-build = "2" diff --git a/native/sea/index.d.ts b/native/sea/index.d.ts index 202deddd..5fb5e902 100644 --- a/native/sea/index.d.ts +++ b/native/sea/index.d.ts @@ -4,57 +4,141 @@ /* auto-generated by NAPI-RS */ /** - * JS-visible connection options. Empty in Round 1b; Round 2 may add - * per-connection scope fields (catalog, schema, session config map). + * JS-visible per-execute options. M0 only carries + * initialCatalog / initialSchema / sessionConfig — parameters and + * per-statement overrides land in M1. */ -export interface ConnectionOptions { - +export interface ExecuteOptions { + /** Default catalog applied to this statement via session conf. */ + initialCatalog?: string + /** Default schema applied to this statement via session conf. */ + initialSchema?: string + /** + * Per-statement session conf overrides (forwarded to SEA + * `parameters` / Thrift `confOverlay`). + */ + sessionConfig?: Record } /** - * JS-visible constructor options. Round 2 will populate this with - * real fields (host, warehouseId, auth, …); for the scaffold it is - * intentionally empty so the JS smoke test can call `new Database({})` - * without TypeScript complaining about unknown properties. + * JS-visible options for opening a Databricks SQL session over PAT. + * + * M0 supports PAT only — `token` is required. OAuth M2M / U2M variants + * land in M1 along with a discriminated-union shape on the JS side. */ -export interface DatabaseOptions { +export interface ConnectionOptions { + /** + * Workspace host, e.g. `adb-…azuredatabricks.net`. The kernel + * normalises this — bare hostnames get `https://` prepended. + */ + hostName: string /** - * Workspace host URL (e.g. `https://workspace.databricks.com`). - * Optional in Round 1b; Round 2 makes it required. + * JDBC-style HTTP path, e.g. `/sql/1.0/warehouses/abc123`. The + * kernel parses out the warehouse id. */ - host?: string - /** Warehouse id. Optional in Round 1b; Round 2 makes it required. */ - warehouseId?: string + httpPath: string + /** + * Personal access token. Must be non-empty (the kernel rejects + * empty PATs at session construction). + */ + token: string +} +/** + * Open a Databricks SQL session over PAT auth and return an opaque + * `Connection` wrapping the kernel `Session`. + * + * The JS-visible name is `openSession` (napi-rs converts snake_case + * to camelCase for free functions). + */ +export declare function openSession(options: ConnectionOptions): Promise +/** + * A single Arrow IPC stream payload encoding one record batch (plus + * the schema header so the JS-side reader is stateless). + */ +export interface ArrowBatch { + ipcBytes: Buffer +} +/** + * An Arrow IPC stream payload encoding just the result schema (no + * record-batch messages). Returned by `Statement.schema()`. + */ +export interface ArrowSchema { + ipcBytes: Buffer } /** * Returns the native binding's crate version (`CARGO_PKG_VERSION`). * - * Acts as the round-1b smoke test: a JS `require()` of the `.node` - * artifact that successfully calls `version()` proves the binding's - * build + load + dispatch path is wired correctly. + * Originally the round-1b smoke test; kept as a cheap "is the binding + * loaded?" probe for the JS-side loader's structured diagnostics. */ export declare function version(): string -/** Opaque connection handle. Round 1b: marker only; no kernel state. */ +/** + * Opaque connection handle wrapping a kernel `Session`. + * + * `inner` is `Arc>>` so: + * - the Drop impl can clone the `Arc` and `.take()` the session on a + * background tokio task without holding `&mut self` (which Drop is + * forbidden from doing across an `await`), + * - `executeStatement` can share immutable access to the session via + * the `Arc` clones the kernel makes internally + * (`Session::statement()` only needs `&self`). + */ export declare class Connection { /** - * Construct a new connection handle. Round 1b is a no-op shell; - * Round 2 will wire it to `Database`'s `Session` (likely via an - * async `Database::connect()` factory rather than a JS-side - * `new Connection()`). + * Execute a SQL statement and return a Statement handle that + * streams batches via `fetchNextBatch()`. */ - constructor(options: ConnectionOptions) + executeStatement(sql: string, options: ExecuteOptions): Promise + /** + * Explicit close. Marks the connection wrapper as closed so + * subsequent calls on this `Connection` return `InvalidArg`, then + * schedules a fire-and-forget server-side close on the runtime. + * + * **Why fire-and-forget and not `Session::close().await`:** the + * kernel's `Session::close(self).await` body holds a + * `tracing::EnteredSpan` (a `!Send` type) across an `.await`, so + * the future is not `Send`. napi-rs's `execute_tokio_future` glue + * rejects non-`Send` futures, and `Handle::spawn` does too. The + * kernel's `SessionInner::Drop` already spawns the + * `delete_session` RPC on the same runtime handle the napi + * binding captured, so dropping the value is functionally + * equivalent — the difference is that JS callers can't observe a + * `delete_session` failure from `close()`. Tracked as a kernel- + * side follow-up (clone the span rather than entering it) in + * Round 3 findings. + */ + close(): Promise } /** - * Opaque database handle on the JS side. + * Opaque executed-statement handle. * - * Holds `Option` so `close()` (Round 2) can `.take()` the - * session out and `.await` an async close, leaving `inner = None`. - * The `Drop` impl checks `inner` to decide whether to schedule a - * fire-and-forget close on the captured tokio runtime. + * `inner` is wrapped in `Arc>>` so: + * - `fetch_next_batch` can `await` `ResultStream::next_batch` which + * requires `&mut ExecutedStatement` (via `result_stream_mut`), + * - `cancel` / `close` (which take `&self` on the kernel side via the + * `ExecutedStatementHandle` trait) can run concurrently with each + * other from a JS perspective without panicking, + * - `Drop` can hand the inner handle off to a tokio task without + * touching `&mut self` across an `await`. */ -export declare class Database { +export declare class Statement { + /** + * Pull the next batch of results. Returns `None` when the stream + * is exhausted. The returned `ArrowBatch.ipcBytes` is a complete + * Arrow IPC stream (schema header + 1 record-batch message) + * suitable for handing to `apache-arrow`'s `RecordBatchReader`. + */ + fetchNextBatch(): Promise + /** + * Result schema as an Arrow IPC payload (schema header only, no + * record-batch message). Available before any batches have been + * fetched. + */ + schema(): Promise + /** Server-side cancel. No-op if already finished. */ + cancel(): Promise /** - * Construct a new database handle. Round 1b: the options are - * stashed for diagnostic purposes only — no network call. + * Explicit close. Awaits the server-side close so the JS caller + * can observe failures. */ - constructor(options: DatabaseOptions) + close(): Promise } diff --git a/native/sea/index.js b/native/sea/index.js index 6818d29b..c7551305 100644 --- a/native/sea/index.js +++ b/native/sea/index.js @@ -310,8 +310,9 @@ if (!nativeBinding) { throw new Error(`Failed to load native binding`) } -const { Connection, Database, version } = nativeBinding +const { Connection, openSession, Statement, version } = nativeBinding module.exports.Connection = Connection -module.exports.Database = Database +module.exports.openSession = openSession +module.exports.Statement = Statement module.exports.version = version diff --git a/native/sea/src/connection.rs b/native/sea/src/connection.rs index ad9df612..4afbd724 100644 --- a/native/sea/src/connection.rs +++ b/native/sea/src/connection.rs @@ -12,40 +12,155 @@ // See the License for the specific language governing permissions and // limitations under the License. -//! Opaque `Connection` wrapper. +//! Opaque `Connection` wrapper around the kernel's `Session`. //! -//! Round 1b: scaffold only. The kernel collapses ADBC's per-connection -//! state into the `Session` handle held by `Database` (see -//! `database.rs`). The JS-side `Connection` exists for API parity with -//! the existing Node driver but is currently a thin marker; Round 2 -//! decides whether to keep it as a pass-through on `Database` or to -//! attach per-connection scoping (e.g. default catalog/schema overrides). - -/// JS-visible connection options. Empty in Round 1b; Round 2 may add -/// per-connection scope fields (catalog, schema, session config map). +//! The kernel collapses ADBC's `Database` + `Connection` into a single +//! `Session`. We keep the wrapper name `Connection` on the JS side because +//! that matches the existing Node driver's mental model. +//! +//! M0 surface (Round 2): +//! - `Connection.executeStatement(sql, options)` — builds a kernel +//! `Statement`, sets the spec, awaits `execute()`, wraps the result +//! in a JS-visible `Statement` opaque handle. +//! - `Connection.close()` — explicit async close. Drop schedules a +//! fire-and-forget close on the captured runtime handle if explicit +//! close was never called. + +use std::collections::HashMap; +use std::sync::Arc; +use tokio::sync::Mutex; + +use databricks_sql_kernel::Session; + +use crate::error::napi_err_from_kernel; +use crate::runtime; +use crate::statement::Statement; +use crate::util::guarded; + +/// JS-visible per-execute options. M0 only carries +/// initialCatalog / initialSchema / sessionConfig — parameters and +/// per-statement overrides land in M1. #[napi(object)] -pub struct ConnectionOptions {} +pub struct ExecuteOptions { + /// Default catalog applied to this statement via session conf. + pub initial_catalog: Option, + /// Default schema applied to this statement via session conf. + pub initial_schema: Option, + /// Per-statement session conf overrides (forwarded to SEA + /// `parameters` / Thrift `confOverlay`). + pub session_config: Option>, +} -/// Opaque connection handle. Round 1b: marker only; no kernel state. +/// Opaque connection handle wrapping a kernel `Session`. +/// +/// `inner` is `Arc>>` so: +/// - the Drop impl can clone the `Arc` and `.take()` the session on a +/// background tokio task without holding `&mut self` (which Drop is +/// forbidden from doing across an `await`), +/// - `executeStatement` can share immutable access to the session via +/// the `Arc` clones the kernel makes internally +/// (`Session::statement()` only needs `&self`). #[napi] -pub struct Connection {} +pub struct Connection { + pub(crate) inner: Arc>>, +} #[napi] impl Connection { - /// Construct a new connection handle. Round 1b is a no-op shell; - /// Round 2 will wire it to `Database`'s `Session` (likely via an - /// async `Database::connect()` factory rather than a JS-side - /// `new Connection()`). - #[napi(constructor)] - pub fn new(_options: ConnectionOptions) -> Self { - Connection {} + /// Execute a SQL statement and return a Statement handle that + /// streams batches via `fetchNextBatch()`. + #[napi] + pub async fn execute_statement( + &self, + sql: String, + options: ExecuteOptions, + ) -> napi::Result { + let inner = Arc::clone(&self.inner); + guarded(async move { + let guard = inner.lock().await; + let session = guard.as_ref().ok_or_else(|| { + napi::Error::new(napi::Status::InvalidArg, "connection already closed") + })?; + + // Build a per-statement spec on the kernel's mutable + // Statement. Session conf overrides surface through the + // statement_conf overlay; M0 has no parameter binding. + let mut stmt = session.statement(); + stmt.spec().sql(sql); + + let mut overlay: HashMap = + options.session_config.unwrap_or_default(); + if let Some(catalog) = options.initial_catalog { + overlay.insert("default_catalog".to_string(), catalog); + } + if let Some(schema) = options.initial_schema { + overlay.insert("default_schema".to_string(), schema); + } + if !overlay.is_empty() { + stmt.spec().statement_conf(overlay); + } + + let executed = stmt.execute().await.map_err(napi_err_from_kernel)?; + Ok(Statement::from_executed(executed)) + }) + .await + } + + /// Explicit close. Marks the connection wrapper as closed so + /// subsequent calls on this `Connection` return `InvalidArg`, then + /// schedules a fire-and-forget server-side close on the runtime. + /// + /// **Why fire-and-forget and not `Session::close().await`:** the + /// kernel's `Session::close(self).await` body holds a + /// `tracing::EnteredSpan` (a `!Send` type) across an `.await`, so + /// the future is not `Send`. napi-rs's `execute_tokio_future` glue + /// rejects non-`Send` futures, and `Handle::spawn` does too. The + /// kernel's `SessionInner::Drop` already spawns the + /// `delete_session` RPC on the same runtime handle the napi + /// binding captured, so dropping the value is functionally + /// equivalent — the difference is that JS callers can't observe a + /// `delete_session` failure from `close()`. Tracked as a kernel- + /// side follow-up (clone the span rather than entering it) in + /// Round 3 findings. + #[napi] + pub async fn close(&self) -> napi::Result<()> { + let inner = Arc::clone(&self.inner); + guarded(async move { + let _taken = { + let mut guard = inner.lock().await; + guard.take() + }; + // `_taken` drops here. Kernel's `SessionInner::Drop` + // spawns `delete_session` on its captured handle. + Ok(()) + }) + .await } } impl Drop for Connection { fn drop(&mut self) { - // Round 1b: nothing to clean up. Round 2 will populate this - // with the same `runtime::get_handle().spawn(...)` pattern as - // `Database::drop`. + // Fire-and-forget close on the captured runtime. If `close()` + // was already called, `inner` holds `None` and the spawned + // task is a trivial no-op. + let Some(handle) = runtime::try_get_handle() else { + // No async entry point ever ran — there's nothing to close. + return; + }; + let inner = Arc::clone(&self.inner); + handle.spawn(async move { + // Drop the session value on the runtime. The kernel's + // `SessionInner::Drop` already spawns a fire-and-forget + // `delete_session` against its own captured handle. We do + // NOT call `Session::close().await` here because that + // method holds a `tracing::EnteredSpan` (`!Send`) across + // its body, which would conflict with `Handle::spawn`'s + // `Send` bound on the future. + let _taken = { + let mut guard = inner.lock().await; + guard.take() + }; + // `_taken` drops here; kernel's SessionInner::Drop fires. + }); } } diff --git a/native/sea/src/database.rs b/native/sea/src/database.rs index 800ca090..7f86760e 100644 --- a/native/sea/src/database.rs +++ b/native/sea/src/database.rs @@ -12,88 +12,79 @@ // See the License for the specific language governing permissions and // limitations under the License. -//! Opaque `Database` wrapper around the kernel's `Session` handle. -//! -//! Round 1b: scaffold only — `constructor` stores options and returns -//! immediately. Round 2 will add `open()` (calling `Session::open`), -//! `statement()`, `close()`, etc. +//! `openSession()` — the binding's session-construction entry point. //! //! The kernel collapses ADBC's `Database` + `Connection` into a single -//! `Session`. We keep the wrapper name `Database` on the JS side -//! because that matches the existing Node driver's mental model; the -//! actual session lives inside this struct. +//! `Session`. The TS adapter layer reconstructs a `DBSQLClient` / +//! `Database` wrapper on top of this binding, so the napi surface itself +//! stays flat: one free function, one opaque `Connection` class. +//! +//! Rationale for a free function over a static class method: +//! - napi-rs v2's static-method codegen for async functions returning a +//! `#[napi]` struct is fragile — the runtime registration sometimes +//! omits the method from the class object. Free `#[napi]` functions +//! go through a different, more stable codegen path. +//! - There is no kernel-side `Database` state to wrap; everything +//! meaningful lives on `Session`. A wrapper class with no fields adds +//! a JS object allocation per session for no benefit. -use databricks_sql_kernel::Session; +use std::sync::Arc; +use tokio::sync::Mutex; +use databricks_sql_kernel::{AuthConfig, Session}; + +use crate::connection::Connection; +use crate::error::napi_err_from_kernel; use crate::runtime; +use crate::util::guarded; -/// JS-visible constructor options. Round 2 will populate this with -/// real fields (host, warehouseId, auth, …); for the scaffold it is -/// intentionally empty so the JS smoke test can call `new Database({})` -/// without TypeScript complaining about unknown properties. +/// JS-visible options for opening a Databricks SQL session over PAT. +/// +/// M0 supports PAT only — `token` is required. OAuth M2M / U2M variants +/// land in M1 along with a discriminated-union shape on the JS side. #[napi(object)] -pub struct DatabaseOptions { - /// Workspace host URL (e.g. `https://workspace.databricks.com`). - /// Optional in Round 1b; Round 2 makes it required. - pub host: Option, - /// Warehouse id. Optional in Round 1b; Round 2 makes it required. - pub warehouse_id: Option, +pub struct ConnectionOptions { + /// Workspace host, e.g. `adb-…azuredatabricks.net`. The kernel + /// normalises this — bare hostnames get `https://` prepended. + pub host_name: String, + /// JDBC-style HTTP path, e.g. `/sql/1.0/warehouses/abc123`. The + /// kernel parses out the warehouse id. + pub http_path: String, + /// Personal access token. Must be non-empty (the kernel rejects + /// empty PATs at session construction). + pub token: String, } -/// Opaque database handle on the JS side. +/// Open a Databricks SQL session over PAT auth and return an opaque +/// `Connection` wrapping the kernel `Session`. /// -/// Holds `Option` so `close()` (Round 2) can `.take()` the -/// session out and `.await` an async close, leaving `inner = None`. -/// The `Drop` impl checks `inner` to decide whether to schedule a -/// fire-and-forget close on the captured tokio runtime. +/// The JS-visible name is `openSession` (napi-rs converts snake_case +/// to camelCase for free functions). #[napi] -pub struct Database { - // TODO(round-2): populate this from `Session::open(config).await` - // inside an `open()` async method (or directly inside the - // constructor via a factory pattern). For now it stays `None` so - // Drop has nothing to clean up. - inner: Option, -} - -#[napi] -impl Database { - /// Construct a new database handle. Round 1b: the options are - /// stashed for diagnostic purposes only — no network call. - #[napi(constructor)] - pub fn new(_options: DatabaseOptions) -> Self { - Database { inner: None } - } -} +pub async fn open_session(options: ConnectionOptions) -> napi::Result { + guarded(async move { + // Cache the napi-rs tokio Handle on the very first async call + // so Drop impls (which run on the V8 GC thread, outside any + // tokio context) can still `spawn` cleanup tasks onto the + // runtime that's driving this future. + let _ = runtime::get_handle(); -impl Drop for Database { - fn drop(&mut self) { - // Pattern #5 from the napi-rs patterns doc: spawn cleanup on - // the captured runtime handle. We only enter this branch if - // the JS user dropped the handle without calling `close()` - // first (which Round 2 will provide). For Round 1b there is - // nothing to clean up, but the pattern is in place so the - // Round-2 work is a one-line addition. - let Some(session) = self.inner.take() else { - return; - }; - let Some(handle) = runtime::try_get_handle() else { - // No async entry point has ever run, so there cannot be a - // live `Session` either — but the destructor of `Session` - // itself uses the kernel's own borrowed handle, so we - // simply let it run. - drop(session); - return; - }; - // The kernel's `SessionInner::Drop` already spawns a - // fire-and-forget `delete_session` on its own captured runtime - // handle. To stay on napi-rs's runtime explicitly (so Round 2 - // can add binding-side cleanup steps before the kernel drop), - // hop onto a tokio task and let the kernel destructor run - // there. We do NOT call `Session::close().await` because that - // method enters a tracing span (`EnteredSpan` is `!Send`) and - // therefore cannot cross an `await` boundary inside a `spawn`. - handle.spawn(async move { - drop(session); - }); - } + // SessionConfig is `#[non_exhaustive]` — go through the + // builder, which is the only public path that constructs it. + // `http_path()` is the convenience setter that maps a bare + // hostname + `/sql/1.0/warehouses/{id}` path into the kernel's + // `ConnectionConfig`. + let session = Session::builder() + .http_path(options.host_name, options.http_path) + .auth(AuthConfig::Pat { + token: options.token, + }) + .open() + .await + .map_err(napi_err_from_kernel)?; + Ok(Connection { + inner: Arc::new(Mutex::new(Some(session))), + }) + }) + .await } diff --git a/native/sea/src/error.rs b/native/sea/src/error.rs index fc82a0b4..d06e1600 100644 --- a/native/sea/src/error.rs +++ b/native/sea/src/error.rs @@ -12,34 +12,142 @@ // See the License for the specific language governing permissions and // limitations under the License. -//! Minimal kernel-error → `napi::Error` mapping. +//! Kernel-error → `napi::Error` mapping. //! -//! Round 1b: just preserves the kernel error message and translates -//! the kernel's [`ErrorCode`] into a small set of napi statuses. Round -//! 2 will add a full taxonomy (sqlState, vendorCode, retryable, …) -//! attached as own-properties on the JS error object via -//! `Env::create_error` (pattern #7 in the napi-rs patterns doc). +//! The kernel returns a richly-typed [`Error`](databricks_sql_kernel::Error) +//! with `code`, `sql_state`, `error_code`, `vendor_code`, `http_status`, +//! `retryable`, and `query_id` fields. The napi `Error` type only +//! carries `status` + `reason` directly — to attach the extra fields +//! as own-properties on the JS error object we'd need an `Env` +//! reference, which `#[napi] async fn` bodies don't have access to +//! cheaply. +//! +//! Compromise (one helper, DRY): encode the structured metadata into +//! the `reason` field as a JSON envelope prefixed with a sentinel +//! `__databricks_error__:` token. The TS adapter detects the sentinel, +//! parses the payload, and reconstructs the typed error class +//! (`DBSQLError`, `AuthError`, …). Plain-string errors from the +//! binding's own code paths fall through the sentinel detection +//! unchanged. +//! +//! Round 3 may switch to the `Env::create_error` + own-properties +//! pattern once we have a stable point in each entry where `env: Env` +//! is available (likely by wrapping the async glue in a sync entry +//! point that calls `tokio::spawn` after capturing `env`). use databricks_sql_kernel::{Error as KernelError, ErrorCode}; use napi::{Error as NapiError, Status}; -/// Map a kernel `Error` into a `napi::Error`. The kernel `ErrorCode` -/// is used to pick a sensible napi `Status`; the kernel message is -/// preserved verbatim as the error reason. -/// -/// Round 1b has no callers — the scaffold doesn't return any kernel -/// errors yet. Round 2's `Database::open()` is the first consumer. -#[allow(dead_code)] +/// Sentinel that tells the TS adapter the `reason` string is a JSON +/// envelope rather than a plain message. Has to be ASCII-only so it +/// survives any `String` round-trip the napi layer might do. +pub(crate) const ERROR_SENTINEL: &str = "__databricks_error__:"; + +/// Map a kernel [`Error`] into a `napi::Error`. Preserves the kernel +/// `ErrorCode` (mapped to the closest napi `Status`), and stuffs the +/// remaining structured fields into a JSON envelope on the reason so +/// the TS layer can reconstruct the typed error class. pub(crate) fn napi_err_from_kernel(e: KernelError) -> NapiError { - let status = match e.code { - ErrorCode::InvalidArgument | ErrorCode::InvalidStatementHandle => { - Status::InvalidArg - } + let status = status_from_kernel_code(e.code); + + // Build a minimal JSON envelope. We hand-build it (no serde_json + // dep) — the field set is small and fixed, and avoiding serde + // keeps the crate dep graph trim. + let mut envelope = String::with_capacity(e.message.len() + 128); + envelope.push_str(ERROR_SENTINEL); + envelope.push('{'); + push_json_str_field(&mut envelope, "code", error_code_str(e.code)); + envelope.push(','); + push_json_str_field(&mut envelope, "message", &e.message); + if let Some(s) = &e.sql_state { + envelope.push(','); + push_json_str_field(&mut envelope, "sqlState", s); + } + if let Some(ec) = &e.error_code { + envelope.push(','); + push_json_str_field(&mut envelope, "errorCode", ec); + } + if let Some(vc) = e.vendor_code { + envelope.push(','); + envelope.push_str("\"vendorCode\":"); + envelope.push_str(&vc.to_string()); + } + if let Some(hs) = e.http_status { + envelope.push(','); + envelope.push_str("\"httpStatus\":"); + envelope.push_str(&hs.to_string()); + } + if e.retryable { + envelope.push_str(",\"retryable\":true"); + } + if let Some(qid) = &e.query_id { + envelope.push(','); + push_json_str_field(&mut envelope, "queryId", qid); + } + envelope.push('}'); + + NapiError::new(status, envelope) +} + +/// Map kernel `ErrorCode` → napi `Status`. The status is mostly +/// cosmetic on the napi side (the TS layer dispatches on `code` from +/// the envelope); we pick the closest match so unwrapped errors still +/// look reasonable in raw napi consumers. +fn status_from_kernel_code(code: ErrorCode) -> Status { + match code { + ErrorCode::InvalidArgument | ErrorCode::InvalidStatementHandle => Status::InvalidArg, ErrorCode::Cancelled => Status::Cancelled, - // Everything else collapses to `GenericFailure`; Round 2 - // refines this with sqlState / vendorCode / category own- - // properties on a JS error object. _ => Status::GenericFailure, - }; - NapiError::new(status, e.message) + } +} + +/// String tag for each kernel `ErrorCode` — stable across kernel +/// versions because v0's `ErrorCode` is `#[non_exhaustive]` and we +/// pattern-match exhaustively against the known set. +fn error_code_str(code: ErrorCode) -> &'static str { + match code { + ErrorCode::InvalidArgument => "InvalidArgument", + ErrorCode::Unauthenticated => "Unauthenticated", + ErrorCode::PermissionDenied => "PermissionDenied", + ErrorCode::NotFound => "NotFound", + ErrorCode::ResourceExhausted => "ResourceExhausted", + ErrorCode::Unavailable => "Unavailable", + ErrorCode::Timeout => "Timeout", + ErrorCode::Cancelled => "Cancelled", + ErrorCode::DataLoss => "DataLoss", + ErrorCode::Internal => "Internal", + ErrorCode::InvalidStatementHandle => "InvalidStatementHandle", + ErrorCode::NetworkError => "NetworkError", + ErrorCode::SqlError => "SqlError", + // Forward-compat: ErrorCode is `#[non_exhaustive]`. Any new + // variant the kernel adds in v0.x lands here until we mirror + // it in this match. The TS layer treats Unknown as a generic + // failure. + _ => "Unknown", + } +} + +/// Append `"key":"value"` to the JSON buffer, escaping the value's +/// `"` and `\` characters and control chars to keep the envelope +/// JSON-parseable. The narrow set of escapes is sufficient for the +/// human-readable error messages the kernel produces (no embedded +/// binary blobs, no Unicode surrogate pairs). +fn push_json_str_field(out: &mut String, key: &str, value: &str) { + out.push('"'); + out.push_str(key); + out.push_str("\":\""); + for ch in value.chars() { + match ch { + '"' => out.push_str("\\\""), + '\\' => out.push_str("\\\\"), + '\n' => out.push_str("\\n"), + '\r' => out.push_str("\\r"), + '\t' => out.push_str("\\t"), + c if (c as u32) < 0x20 => { + out.push_str(&format!("\\u{:04x}", c as u32)); + } + c => out.push(c), + } + } + out.push('"'); } diff --git a/native/sea/src/lib.rs b/native/sea/src/lib.rs index 7d76cf9b..6de102ea 100644 --- a/native/sea/src/lib.rs +++ b/native/sea/src/lib.rs @@ -15,9 +15,10 @@ //! `databricks-sea-native` — napi-rs binding crate for the Databricks //! SQL Node.js driver's SEA (Statement Execution API) path. //! -//! Round 1b scaffold: module skeletons + a single working `version()` -//! `#[napi]` function that proves the binding loads end-to-end. Round 2 -//! adds `Database::open` / `Statement::execute` / fetch / cancel. +//! Round 2 surface: `Database.open` → `Connection.execute_statement` +//! → `Statement.fetch_next_batch` / `schema` / `cancel` / `close`. +//! Results cross the FFI as Arrow IPC bytes (see `result.rs`); the +//! TS adapter decodes them via `apache-arrow`. #![deny(unsafe_op_in_unsafe_fn)] @@ -31,12 +32,12 @@ pub(crate) mod logger; pub(crate) mod result; pub(crate) mod runtime; pub(crate) mod statement; +pub(crate) mod util; /// Returns the native binding's crate version (`CARGO_PKG_VERSION`). /// -/// Acts as the round-1b smoke test: a JS `require()` of the `.node` -/// artifact that successfully calls `version()` proves the binding's -/// build + load + dispatch path is wired correctly. +/// Originally the round-1b smoke test; kept as a cheap "is the binding +/// loaded?" probe for the JS-side loader's structured diagnostics. #[napi] pub fn version() -> String { env!("CARGO_PKG_VERSION").to_string() diff --git a/native/sea/src/result.rs b/native/sea/src/result.rs index f406c363..488c0851 100644 --- a/native/sea/src/result.rs +++ b/native/sea/src/result.rs @@ -12,7 +12,25 @@ // See the License for the specific language governing permissions and // limitations under the License. -//! ResultStream wrapper. +//! Arrow IPC payload types crossed across the napi boundary. //! -//! Round 2 work. Empty in Round 1b — see `statement.rs` for the same -//! reasoning. +//! Per sea-design.md Layer 2: "The binding ships the batch across the +//! FFI as Arrow IPC bytes. The adapter converts those bytes into +//! JavaScript rows…" — so the napi boundary is intentionally narrow: +//! one envelope per batch, one envelope per schema. + +use napi::bindgen_prelude::Buffer; + +/// A single Arrow IPC stream payload encoding one record batch (plus +/// the schema header so the JS-side reader is stateless). +#[napi(object)] +pub struct ArrowBatch { + pub ipc_bytes: Buffer, +} + +/// An Arrow IPC stream payload encoding just the result schema (no +/// record-batch messages). Returned by `Statement.schema()`. +#[napi(object)] +pub struct ArrowSchema { + pub ipc_bytes: Buffer, +} diff --git a/native/sea/src/statement.rs b/native/sea/src/statement.rs index c449b402..6d7b8761 100644 --- a/native/sea/src/statement.rs +++ b/native/sea/src/statement.rs @@ -12,9 +12,201 @@ // See the License for the specific language governing permissions and // limitations under the License. -//! Statement / ExecutedStatement wrappers. +//! Opaque `Statement` wrapper around the kernel's `ExecutedStatement`. //! -//! Round 2 work. This module is intentionally empty in Round 1b — no -//! `#[napi]` types here yet. Adding empty stubs would require -//! `napi-rs` to generate JS bindings for them, which adds noise to the -//! `index.d.ts` without any callable surface. +//! M0 surface (Round 2): +//! - `Statement.fetchNextBatch() -> Option` — drives +//! `ResultStream::next_batch().await`, serialises the borrowed +//! `RecordBatch` to Arrow IPC bytes, returns them to JS. +//! - `Statement.schema() -> ArrowSchema` — returns the cached schema +//! from the kernel side, serialised as a schema-only IPC payload. +//! - `Statement.cancel()` / `Statement.close()` — forwards to +//! `ExecutedStatement::cancel/close` via the +//! `ExecutedStatementHandle` trait. Drop fires-and-forgets close +//! if not already explicitly closed. + +use std::sync::Arc; +use tokio::sync::Mutex; + +use arrow_ipc::writer::StreamWriter; +use databricks_sql_kernel::{ExecutedStatement, ExecutedStatementHandle, ResultBatch}; + +use crate::error::napi_err_from_kernel; +use crate::result::{ArrowBatch, ArrowSchema}; +use crate::runtime; +use crate::util::guarded; + +/// Opaque executed-statement handle. +/// +/// `inner` is wrapped in `Arc>>` so: +/// - `fetch_next_batch` can `await` `ResultStream::next_batch` which +/// requires `&mut ExecutedStatement` (via `result_stream_mut`), +/// - `cancel` / `close` (which take `&self` on the kernel side via the +/// `ExecutedStatementHandle` trait) can run concurrently with each +/// other from a JS perspective without panicking, +/// - `Drop` can hand the inner handle off to a tokio task without +/// touching `&mut self` across an `await`. +#[napi] +pub struct Statement { + inner: Arc>>, +} + +impl Statement { + /// Crate-internal constructor — called from + /// `Connection::execute_statement` once the kernel hands back the + /// `ExecutedStatement`. + pub(crate) fn from_executed(executed: ExecutedStatement) -> Self { + Self { + inner: Arc::new(Mutex::new(Some(executed))), + } + } +} + +#[napi] +impl Statement { + /// Pull the next batch of results. Returns `None` when the stream + /// is exhausted. The returned `ArrowBatch.ipcBytes` is a complete + /// Arrow IPC stream (schema header + 1 record-batch message) + /// suitable for handing to `apache-arrow`'s `RecordBatchReader`. + #[napi] + pub async fn fetch_next_batch(&self) -> napi::Result> { + let inner = Arc::clone(&self.inner); + guarded(async move { + let mut guard = inner.lock().await; + let executed = guard.as_mut().ok_or_else(|| { + napi::Error::new(napi::Status::InvalidArg, "statement already closed") + })?; + + let stream = executed.result_stream_mut(); + // Capture the schema before borrowing the next batch — we + // include the schema header in every IPC payload so the + // JS-side consumer can decode each batch independently + // without carrying state across calls. + let schema = stream.schema(); + let maybe_batch = stream.next_batch().await.map_err(napi_err_from_kernel)?; + let Some(batch) = maybe_batch else { + return Ok(None); + }; + // `ResultBatch` is `#[non_exhaustive]`; v0 only ever + // yields `Arrow`. The error arm exists for forward + // compat — v1+ may add ColumnarThrift / JsonRows / etc., + // and we want the binding to surface that as a typed + // error rather than silently misbehaving. + let record_batch = match batch { + ResultBatch::Arrow(rb) => rb, + _ => { + return Err(napi::Error::new( + napi::Status::GenericFailure, + "non-Arrow ResultBatch variant — binding needs upgrade", + )); + } + }; + let bytes = encode_ipc_stream(&schema, Some(record_batch))?; + Ok(Some(ArrowBatch { + ipc_bytes: bytes.into(), + })) + }) + .await + } + + /// Result schema as an Arrow IPC payload (schema header only, no + /// record-batch message). Available before any batches have been + /// fetched. + #[napi] + pub async fn schema(&self) -> napi::Result { + let inner = Arc::clone(&self.inner); + guarded(async move { + let guard = inner.lock().await; + let executed = guard.as_ref().ok_or_else(|| { + napi::Error::new(napi::Status::InvalidArg, "statement already closed") + })?; + let schema = executed.schema(); + let bytes = encode_ipc_stream(&schema, None)?; + Ok(ArrowSchema { + ipc_bytes: bytes.into(), + }) + }) + .await + } + + /// Server-side cancel. No-op if already finished. + #[napi] + pub async fn cancel(&self) -> napi::Result<()> { + let inner = Arc::clone(&self.inner); + guarded(async move { + let guard = inner.lock().await; + let executed = guard.as_ref().ok_or_else(|| { + napi::Error::new(napi::Status::InvalidArg, "statement already closed") + })?; + executed.cancel().await.map_err(napi_err_from_kernel) + }) + .await + } + + /// Explicit close. Awaits the server-side close so the JS caller + /// can observe failures. + #[napi] + pub async fn close(&self) -> napi::Result<()> { + let inner = Arc::clone(&self.inner); + guarded(async move { + // Take the handle out so `Drop` knows there's nothing left + // to clean up. + let executed = { + let mut guard = inner.lock().await; + guard.take() + }; + if let Some(executed) = executed { + executed.close().await.map_err(napi_err_from_kernel)?; + } + Ok(()) + }) + .await + } +} + +impl Drop for Statement { + fn drop(&mut self) { + let Some(handle) = runtime::try_get_handle() else { + return; + }; + let inner = Arc::clone(&self.inner); + handle.spawn(async move { + // Drop the executed statement on the runtime. The kernel's + // `ExecutedStatement::Drop` already spawns a fire-and-forget + // `close_statement` against its own captured handle, so we + // just need to ensure the value is dropped inside a tokio + // context (the kernel's Drop reads `runtime_handle.clone()` + // and spawns; that handle is the same one we captured here). + let _taken = { + let mut guard = inner.lock().await; + guard.take() + }; + }); + } +} + +/// Encode an Arrow schema (and optional one record batch) as an IPC +/// stream payload. Used for both `schema()` (schema only) and +/// `fetchNextBatch()` (schema + one batch). Returning a self-contained +/// IPC stream per call is wasteful header-wise but lets the JS adapter +/// stay stateless — it decodes each `ipcBytes` independently via the +/// same `apache-arrow` `RecordBatchReader` path. +fn encode_ipc_stream( + schema: &arrow_schema::SchemaRef, + batch: Option<&arrow_array::RecordBatch>, +) -> napi::Result> { + let mut buf: Vec = Vec::new(); + { + let mut writer = StreamWriter::try_new(&mut buf, schema) + .map_err(|e| napi::Error::new(napi::Status::GenericFailure, e.to_string()))?; + if let Some(rb) = batch { + writer + .write(rb) + .map_err(|e| napi::Error::new(napi::Status::GenericFailure, e.to_string()))?; + } + writer + .finish() + .map_err(|e| napi::Error::new(napi::Status::GenericFailure, e.to_string()))?; + } + Ok(buf) +} diff --git a/native/sea/src/util.rs b/native/sea/src/util.rs new file mode 100644 index 00000000..4ba7e346 --- /dev/null +++ b/native/sea/src/util.rs @@ -0,0 +1,62 @@ +// Copyright (c) 2026 Databricks, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Shared helpers — one place for the `catch_unwind` wrapping that +//! every async entry point goes through (pattern #8 in the napi-rs +//! patterns doc). One helper, called once per entry point — DRY. +//! +//! Why a helper rather than a macro: helper + `async move {}` reads +//! better at call sites and keeps the stack trace shallow when a panic +//! actually fires (a macro would expand into the caller's body). + +use std::any::Any; +use std::future::Future; +use std::panic::AssertUnwindSafe; + +use futures::FutureExt; +use napi::{Error as NapiError, Result as NapiResult, Status}; + +/// Run `fut` and convert any panic the future raises into a +/// `napi::Error` so the JS caller sees a rejected promise instead of +/// the Node process aborting. +/// +/// `catch_unwind` does not catch `std::process::abort`, double-panic, +/// or allocator OOM — those still bring down the process. That's by +/// design: a corrupted process state isn't something we can pretend to +/// recover from. +pub(crate) async fn guarded(fut: F) -> NapiResult +where + F: Future>, +{ + match AssertUnwindSafe(fut).catch_unwind().await { + Ok(res) => res, + Err(panic) => Err(NapiError::new( + Status::GenericFailure, + format!("panic in native binding: {}", panic_payload_msg(panic)), + )), + } +} + +/// Best-effort downcast of a panic payload to a human-readable string. +/// `panic!("…")` produces `&'static str` or `String`; the rest fall +/// through to a generic marker so the JS caller still sees *something*. +fn panic_payload_msg(p: Box) -> String { + if let Some(s) = p.downcast_ref::<&'static str>() { + return (*s).to_string(); + } + if let Some(s) = p.downcast_ref::() { + return s.clone(); + } + "non-string panic payload".to_string() +} diff --git a/tests/native/e2e-smoke.test.ts b/tests/native/e2e-smoke.test.ts new file mode 100644 index 00000000..8ab6d22f --- /dev/null +++ b/tests/native/e2e-smoke.test.ts @@ -0,0 +1,106 @@ +// Copyright (c) 2026 Databricks, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import { expect } from 'chai'; +import { getSeaNative } from '../../lib/sea/SeaNativeLoader'; + +// Round 2 end-to-end smoke test: +// 1. Open a kernel `Session` via `Database.open(...)` over PAT. +// 2. Execute `SELECT 1`. +// 3. Fetch the first batch — assert the IPC bytes are non-empty. +// 4. Close the statement, then the connection. +// +// Requires three env vars (exported by the developer's shell): +// - DATABRICKS_PECOTESTING_SERVER_HOSTNAME +// - DATABRICKS_PECOTESTING_HTTP_PATH +// - DATABRICKS_PECOTESTING_TOKEN_PERSONAL +// If any is missing, the test is skipped (so CI can keep the file in +// the suite without flapping when secrets aren't provisioned). + +interface NativeBinding { + openSession(opts: { + hostName: string; + httpPath: string; + token: string; + }): Promise; +} + +interface NativeConnection { + executeStatement( + sql: string, + options: { + initialCatalog?: string; + initialSchema?: string; + sessionConfig?: Record; + }, + ): Promise; + close(): Promise; +} + +interface NativeStatement { + fetchNextBatch(): Promise<{ ipcBytes: Buffer } | null>; + schema(): Promise<{ ipcBytes: Buffer }>; + cancel(): Promise; + close(): Promise; +} + +describe('SEA native binding — Round 2 end-to-end smoke test', function smoke() { + const hostName = process.env.DATABRICKS_PECOTESTING_SERVER_HOSTNAME; + const httpPath = process.env.DATABRICKS_PECOTESTING_HTTP_PATH; + const token = process.env.DATABRICKS_PECOTESTING_TOKEN_PERSONAL; + + // Live-warehouse tests can take >2s through warm-up, so bump the + // mocha default (2000ms) generously. + this.timeout(60_000); + + before(function gate() { + if (!hostName || !httpPath || !token) { + // Use `this.skip()` so the suite is reported as skipped rather + // than failing on dev machines without the secrets. + // eslint-disable-next-line no-invalid-this + this.skip(); + } + }); + + it('opens a session, runs SELECT 1, and reads the first batch', async () => { + const binding = getSeaNative() as unknown as NativeBinding; + + const connection = await binding.openSession({ + hostName: hostName as string, + httpPath: httpPath as string, + token: token as string, + }); + expect(connection).to.be.an('object'); + + let statement: NativeStatement | null = null; + try { + statement = await connection.executeStatement('SELECT 1', {}); + expect(statement).to.be.an('object'); + + const batch = await statement.fetchNextBatch(); + expect(batch).to.not.equal(null); + expect(batch!.ipcBytes).to.be.instanceOf(Buffer); + expect(batch!.ipcBytes.length).to.be.greaterThan(0); + + // Draining: subsequent fetch should return null (one-row result). + const after = await statement.fetchNextBatch(); + expect(after).to.equal(null); + } finally { + if (statement !== null) { + await statement.close(); + } + await connection.close(); + } + }); +}); diff --git a/tests/native/version.test.ts b/tests/native/version.test.ts index 03210c3c..72a69f43 100644 --- a/tests/native/version.test.ts +++ b/tests/native/version.test.ts @@ -18,23 +18,21 @@ import { version, getSeaNative } from '../../lib/sea/SeaNativeLoader'; describe('SEA native binding — smoke test', () => { it('loads the .node artifact and returns version()', () => { const v = version(); - // Round 1b: the native crate is at 0.1.0. Match the shape rather - // than the literal so the test does not need updating on every - // version bump. expect(v).to.match(/^\d+\.\d+\.\d+$/); }); - it('exposes the Database opaque class', () => { - const binding = getSeaNative() as unknown as { Database: new (opts: object) => object }; - expect(typeof binding.Database).to.equal('function'); - const db = new binding.Database({}); - expect(db).to.be.an('object'); + it('exposes the openSession factory function', () => { + const binding = getSeaNative() as unknown as { openSession: Function }; + expect(typeof binding.openSession).to.equal('function'); }); it('exposes the Connection opaque class', () => { - const binding = getSeaNative() as unknown as { Connection: new (opts: object) => object }; + const binding = getSeaNative() as unknown as { Connection: Function }; expect(typeof binding.Connection).to.equal('function'); - const conn = new binding.Connection({}); - expect(conn).to.be.an('object'); + }); + + it('exposes the Statement opaque class', () => { + const binding = getSeaNative() as unknown as { Statement: Function }; + expect(typeof binding.Statement).to.equal('function'); }); }); From 983bd1905c0277819442c6e97f53050b19cebbe9 Mon Sep 17 00:00:00 2001 From: Madhavendra Rathore Date: Fri, 15 May 2026 01:26:17 +0000 Subject: [PATCH 04/11] =?UTF-8?q?sea-napi-binding:=20cleanup=20=E2=80=94?= =?UTF-8?q?=20drop=20unused=20tracing=20deps;=20address=20bloat=20findings?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Round 1 scaffold declared tracing + tracing-subscriber as deps but never used them. Removed. Logger bridge will re-add in round 3. Other findings from 6b3affd-2026-05-15.md reviewed: - Finding 2 (Database::Drop unreachable in Round 1b) — obsoleted by Round 2 (40d0b57): database.rs no longer declares a Database struct or Drop impl; it is now an `open_session` free function. - Finding 3 (empty Connection::Drop) — obsoleted by Round 2: the Drop impl now spawns a real fire-and-forget close on the captured tokio handle. Co-authored-by: Isaac --- native/sea/Cargo.toml | 5 ----- 1 file changed, 5 deletions(-) diff --git a/native/sea/Cargo.toml b/native/sea/Cargo.toml index c69fb93a..c001e04b 100644 --- a/native/sea/Cargo.toml +++ b/native/sea/Cargo.toml @@ -42,11 +42,6 @@ tokio = { version = "1", default-features = false, features = ["rt", "sync"] } # Lazy `OnceCell` for the captured tokio Handle. once_cell = "1" -# Tracing for kernel + binding diagnostics. The real subscriber is wired -# in Round 3 via the ThreadsafeFunction logger bridge. -tracing = "0.1" -tracing-subscriber = { version = "0.3", default-features = false, features = ["fmt"] } - # `catch_unwind` wrapper around async futures (pattern #8 of the # napi-rs patterns doc). Transitively a dep of the kernel already, but # declared here so we can `use FutureExt;` directly. From c894742c6f78e8c85af21555622e48f4192f8ca1 Mon Sep 17 00:00:00 2001 From: Madhavendra Rathore Date: Fri, 15 May 2026 01:39:43 +0000 Subject: [PATCH 05/11] =?UTF-8?q?sea-auth:=20PAT=20auth=20flow=20through?= =?UTF-8?q?=20SeaBackend=20=E2=86=92=20napi=20binding?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit SeaBackend.connect() now wires PAT options to the napi binding's openSession(). Non-PAT modes rejected with clear M0-scope error (OAuth/Azure/Federation land in M1). E2E test against pecotesting confirms PAT round-trips: connect → openSession → close all clean. No new dependencies. SeaAuth helper is ~30 LOC. --- lib/sea/SeaAuth.ts | 83 +++++++ lib/sea/SeaBackend.ts | 163 ++++++++++++- tests/integration/.mocharc.js | 11 + tests/integration/sea/auth-pat-e2e.test.ts | 75 ++++++ tests/unit/sea/auth-pat.test.ts | 263 +++++++++++++++++++++ 5 files changed, 589 insertions(+), 6 deletions(-) create mode 100644 lib/sea/SeaAuth.ts create mode 100644 tests/integration/.mocharc.js create mode 100644 tests/integration/sea/auth-pat-e2e.test.ts create mode 100644 tests/unit/sea/auth-pat.test.ts diff --git a/lib/sea/SeaAuth.ts b/lib/sea/SeaAuth.ts new file mode 100644 index 00000000..cf16c80f --- /dev/null +++ b/lib/sea/SeaAuth.ts @@ -0,0 +1,83 @@ +// Copyright (c) 2026 Databricks, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import { ConnectionOptions } from '../contracts/IDBSQLClient'; +import AuthenticationError from '../errors/AuthenticationError'; +import HiveDriverError from '../errors/HiveDriverError'; + +/** + * Shape consumed by the napi-binding's `openSession()` (see + * `native/sea/index.d.ts`). M0 supports PAT only — `token` is required. + * + * Mirrors `ConnectionOptions` in the binding's `.d.ts`; declared locally + * to avoid coupling the JS-side adapter to the auto-generated TS file. + */ +export interface SeaNativeConnectionOptions { + hostName: string; + httpPath: string; + token: string; +} + +function prependSlash(str: string): string { + if (str.length > 0 && str.charAt(0) !== '/') { + return `/${str}`; + } + return str; +} + +/** + * Validate that the user-supplied `ConnectionOptions` describe a PAT auth + * configuration and build the napi-binding's connection-options shape. + * + * M0 SCOPE: PAT only. + * - Accepts `authType: 'access-token'` and the undefined-authType default + * (which already means PAT throughout the existing driver — see + * `DBSQLClient.createAuthProvider`). + * - Rejects every other `authType` discriminant with a clear + * "M0 supports only PAT" message so callers know OAuth / Federation / + * custom providers land in M1. + * + * Throws: + * - `AuthenticationError` when the auth mode is PAT but `token` is missing + * or empty. + * - `HiveDriverError` when the auth mode is anything other than PAT. + */ +export function buildSeaConnectionOptions(options: ConnectionOptions): SeaNativeConnectionOptions { + const { authType } = options as { authType?: string }; + + if (authType !== undefined && authType !== 'access-token') { + throw new HiveDriverError( + `SEA backend (M0) supports only PAT auth (authType: 'access-token'); ` + + `got authType: '${authType}'. Other auth modes (databricks-oauth, ` + + `token-provider, external-token, static-token, custom) will land in M1.`, + ); + } + + // PAT path — at this point `options` is structurally the access-token branch + // of `AuthOptions`, which guarantees a `token` field at the type level. We + // still defensively re-check because the public ConnectionOptions type + // permits `authType: undefined` with no token at runtime. + const { token } = options as { token?: string }; + if (typeof token !== 'string' || token.length === 0) { + throw new AuthenticationError( + 'SEA backend: a non-empty PAT must be supplied via `token` when using `authType: \'access-token\'`.', + ); + } + + return { + hostName: options.host, + httpPath: prependSlash(options.path), + token, + }; +} diff --git a/lib/sea/SeaBackend.ts b/lib/sea/SeaBackend.ts index 5815dc05..ee20a1ba 100644 --- a/lib/sea/SeaBackend.ts +++ b/lib/sea/SeaBackend.ts @@ -1,18 +1,169 @@ +// Copyright (c) 2026 Databricks, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + import IBackend from '../contracts/IBackend'; import ISessionBackend from '../contracts/ISessionBackend'; +import IOperationBackend from '../contracts/IOperationBackend'; +import { ConnectionOptions, OpenSessionRequest } from '../contracts/IDBSQLClient'; +import { + ExecuteStatementOptions, + TypeInfoRequest, + CatalogsRequest, + SchemasRequest, + TablesRequest, + TableTypesRequest, + ColumnsRequest, + FunctionsRequest, + PrimaryKeysRequest, + CrossReferenceRequest, +} from '../contracts/IDBSQLSession'; +import Status from '../dto/Status'; +import InfoValue from '../dto/InfoValue'; +import HiveDriverError from '../errors/HiveDriverError'; +import { getSeaNative, SeaNativeBinding } from './SeaNativeLoader'; +import { buildSeaConnectionOptions, SeaNativeConnectionOptions } from './SeaAuth'; + +const NOT_IMPLEMENTED_SESSION = + 'SEA session backend: method not implemented in sea-auth (M0); lands in sea-execution/sea-operation.'; + +/** + * Opaque handle to the napi binding's `Connection` class. The exact + * shape lives in `native/sea/index.d.ts` (auto-generated). We type it as + * a structural minimum here so the loader's pass-through typing doesn't + * leak into every call site. + */ +interface NativeConnection { + close(): Promise; +} + +/** + * Minimal `ISessionBackend` that wraps the napi-binding's `Connection`. + * + * For M0 (sea-auth) only `id` and `close()` are functional — they're the + * subset required to round-trip a connect-open-close cycle. Every other + * method throws a clear "not implemented in M0" `HiveDriverError`. + * + * The `id` field is currently a synthetic counter-based string; the kernel + * exposes a real session-id through a follow-on getter that + * `sea-execution` will wire through. + */ +export class SeaSessionBackend implements ISessionBackend { + private static seq = 0; -const NOT_IMPLEMENTED = 'SEA backend not implemented yet — wired in sea-napi-binding feature'; + public readonly id: string; + + private readonly connection: NativeConnection; + + constructor(connection: NativeConnection) { + this.connection = connection; + SeaSessionBackend.seq += 1; + this.id = `sea-session-${SeaSessionBackend.seq}`; + } + + /* eslint-disable @typescript-eslint/no-unused-vars */ + public async getInfo(_infoType: number): Promise { + throw new HiveDriverError(NOT_IMPLEMENTED_SESSION); + } + public async executeStatement( + _statement: string, + _options: ExecuteStatementOptions, + ): Promise { + throw new HiveDriverError(NOT_IMPLEMENTED_SESSION); + } + + public async getTypeInfo(_request: TypeInfoRequest): Promise { + throw new HiveDriverError(NOT_IMPLEMENTED_SESSION); + } + + public async getCatalogs(_request: CatalogsRequest): Promise { + throw new HiveDriverError(NOT_IMPLEMENTED_SESSION); + } + + public async getSchemas(_request: SchemasRequest): Promise { + throw new HiveDriverError(NOT_IMPLEMENTED_SESSION); + } + + public async getTables(_request: TablesRequest): Promise { + throw new HiveDriverError(NOT_IMPLEMENTED_SESSION); + } + + public async getTableTypes(_request: TableTypesRequest): Promise { + throw new HiveDriverError(NOT_IMPLEMENTED_SESSION); + } + + public async getColumns(_request: ColumnsRequest): Promise { + throw new HiveDriverError(NOT_IMPLEMENTED_SESSION); + } + + public async getFunctions(_request: FunctionsRequest): Promise { + throw new HiveDriverError(NOT_IMPLEMENTED_SESSION); + } + + public async getPrimaryKeys(_request: PrimaryKeysRequest): Promise { + throw new HiveDriverError(NOT_IMPLEMENTED_SESSION); + } + + public async getCrossReference(_request: CrossReferenceRequest): Promise { + throw new HiveDriverError(NOT_IMPLEMENTED_SESSION); + } + /* eslint-enable @typescript-eslint/no-unused-vars */ + + public async close(): Promise { + await this.connection.close(); + return Status.success(); + } +} + +/** + * M0 SeaBackend — wires PAT auth + napi `openSession` end-to-end. + * + * Connect is a no-op at this layer (the napi binding has no notion of a + * standalone "connect"; a session is opened directly). We capture the + * validated PAT options and hand them to `openSession()` on demand. + * + * Subsequent milestones (`sea-execution`, `sea-operation`) replace the + * stubbed `ISessionBackend` / `IOperationBackend` methods with real + * napi-binding calls. + */ export default class SeaBackend implements IBackend { - public async connect(): Promise { - throw new Error(NOT_IMPLEMENTED); + private nativeOptions?: SeaNativeConnectionOptions; + + private readonly native: SeaNativeBinding; + + constructor(native: SeaNativeBinding = getSeaNative()) { + this.native = native; + } + + public async connect(options: ConnectionOptions): Promise { + // Validate PAT auth + capture the napi-binding option shape. + // Any non-PAT mode (or a missing token) throws here, before we ever + // touch the native binding. + this.nativeOptions = buildSeaConnectionOptions(options); } - public async openSession(): Promise { - throw new Error(NOT_IMPLEMENTED); + // eslint-disable-next-line @typescript-eslint/no-unused-vars + public async openSession(_request: OpenSessionRequest): Promise { + if (!this.nativeOptions) { + throw new HiveDriverError('SeaBackend: connect() must be called before openSession().'); + } + const connection = (await this.native.openSession(this.nativeOptions)) as NativeConnection; + return new SeaSessionBackend(connection); } public async close(): Promise { - throw new Error(NOT_IMPLEMENTED); + // Connection-level resources are owned by the session wrapper. No-op here. + this.nativeOptions = undefined; } } diff --git a/tests/integration/.mocharc.js b/tests/integration/.mocharc.js new file mode 100644 index 00000000..f7113140 --- /dev/null +++ b/tests/integration/.mocharc.js @@ -0,0 +1,11 @@ +'use strict'; + +const allSpecs = 'tests/integration/**/*.test.ts'; + +const argvSpecs = process.argv.slice(4); + +module.exports = { + spec: argvSpecs.length > 0 ? argvSpecs : allSpecs, + timeout: '300000', + require: ['ts-node/register'], +}; diff --git a/tests/integration/sea/auth-pat-e2e.test.ts b/tests/integration/sea/auth-pat-e2e.test.ts new file mode 100644 index 00000000..8bff9748 --- /dev/null +++ b/tests/integration/sea/auth-pat-e2e.test.ts @@ -0,0 +1,75 @@ +// Copyright (c) 2026 Databricks, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import { expect } from 'chai'; +import { DBSQLClient } from '../../../lib'; + +/** + * sea-auth M0 end-to-end: + * 1. Construct a DBSQLClient. + * 2. `connect({ useSEA: true, token })` against pecotesting. + * 3. `openSession()` — round-trips through the napi binding. + * 4. Close the session, then the client. + * + * No query is executed here — execution is the responsibility of the + * sea-execution feature's own e2e. This test exists solely to confirm + * the PAT round-trips end-to-end and the napi binding's `openSession` + * surface is reachable from `DBSQLClient`. + * + * Required env (exported by `~/.zshrc` on the developer machine): + * - DATABRICKS_PECOTESTING_SERVER_HOSTNAME + * - DATABRICKS_PECOTESTING_HTTP_PATH + * - DATABRICKS_PECOTESTING_TOKEN_PERSONAL (preferred — personal PAT) + * - DATABRICKS_PECOTESTING_TOKEN (fallback — shared PAT) + * + * If any of the three required env vars is missing, the suite is skipped + * so CI machines without secrets don't fail-flap. + */ +describe('sea-auth e2e — PAT through DBSQLClient ↔ SeaBackend ↔ napi binding', function suite() { + const host = process.env.DATABRICKS_PECOTESTING_SERVER_HOSTNAME; + const path = process.env.DATABRICKS_PECOTESTING_HTTP_PATH; + const token = + process.env.DATABRICKS_PECOTESTING_TOKEN_PERSONAL || process.env.DATABRICKS_PECOTESTING_TOKEN; + + this.timeout(120_000); + + before(function gate() { + if (!host || !path || !token) { + // eslint-disable-next-line no-invalid-this + this.skip(); + } + }); + + it('connects, opens a session, closes the session, closes the client', async () => { + const client = new DBSQLClient(); + + const connected = await client.connect({ + host: host as string, + path: path as string, + token: token as string, + useSEA: true, + }); + expect(connected).to.equal(client); + + const session = await client.openSession(); + expect(session).to.exist; + expect(session.id).to.be.a('string'); + expect(session.id.length).to.be.greaterThan(0); + + const status = await session.close(); + expect(status.isSuccess).to.equal(true); + + await client.close(); + }); +}); diff --git a/tests/unit/sea/auth-pat.test.ts b/tests/unit/sea/auth-pat.test.ts new file mode 100644 index 00000000..5476d722 --- /dev/null +++ b/tests/unit/sea/auth-pat.test.ts @@ -0,0 +1,263 @@ +// Copyright (c) 2026 Databricks, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import { expect } from 'chai'; +import SeaBackend from '../../../lib/sea/SeaBackend'; +import { buildSeaConnectionOptions } from '../../../lib/sea/SeaAuth'; +import { SeaNativeBinding } from '../../../lib/sea/SeaNativeLoader'; +import { ConnectionOptions } from '../../../lib/contracts/IDBSQLClient'; +import AuthenticationError from '../../../lib/errors/AuthenticationError'; +import HiveDriverError from '../../../lib/errors/HiveDriverError'; + +/** + * Fake napi binding that records the option object handed to `openSession` + * and returns a fake `Connection` whose `close()` we can observe. No real + * native code runs in this suite. + */ +function makeFakeBinding() { + const calls: Array<{ method: string; args: unknown[] }> = []; + + const fakeConnection = { + async executeStatement() { + throw new Error('not used in this test'); + }, + async close() { + calls.push({ method: 'connection.close', args: [] }); + }, + }; + + const binding: SeaNativeBinding = { + version() { + return 'fake-binding'; + }, + async openSession(opts: { hostName: string; httpPath: string; token: string }) { + calls.push({ method: 'openSession', args: [opts] }); + return fakeConnection as unknown; + }, + Connection: function FakeConnection() {} as unknown as Function, + Statement: function FakeStatement() {} as unknown as Function, + }; + + return { binding, calls }; +} + +describe('SeaAuth + SeaBackend — PAT auth flow', () => { + describe('buildSeaConnectionOptions', () => { + it('accepts a bare access-token PAT (undefined authType)', () => { + const opts: ConnectionOptions = { + host: 'example.cloud.databricks.com', + path: '/sql/1.0/warehouses/abc', + token: 'dapi-fake-pat', + }; + + const native = buildSeaConnectionOptions(opts); + expect(native).to.deep.equal({ + hostName: 'example.cloud.databricks.com', + httpPath: '/sql/1.0/warehouses/abc', + token: 'dapi-fake-pat', + }); + }); + + it('accepts an explicit access-token PAT', () => { + const opts: ConnectionOptions = { + host: 'example.cloud.databricks.com', + path: '/sql/1.0/warehouses/abc', + authType: 'access-token', + token: 'dapi-fake-pat', + }; + + const native = buildSeaConnectionOptions(opts); + expect(native.token).to.equal('dapi-fake-pat'); + }); + + it('prepends `/` to a path missing the leading slash', () => { + const opts: ConnectionOptions = { + host: 'example.cloud.databricks.com', + path: 'sql/1.0/warehouses/abc', + token: 'dapi-fake-pat', + }; + + const native = buildSeaConnectionOptions(opts); + expect(native.httpPath).to.equal('/sql/1.0/warehouses/abc'); + }); + + it('throws AuthenticationError when token is missing', () => { + const opts = { + host: 'example.cloud.databricks.com', + path: '/sql/1.0/warehouses/abc', + authType: 'access-token', + // no token + } as unknown as ConnectionOptions; + + expect(() => buildSeaConnectionOptions(opts)).to.throw(AuthenticationError, /non-empty PAT/); + }); + + it('throws AuthenticationError when token is an empty string', () => { + const opts: ConnectionOptions = { + host: 'example.cloud.databricks.com', + path: '/sql/1.0/warehouses/abc', + token: '', + }; + + expect(() => buildSeaConnectionOptions(opts)).to.throw(AuthenticationError, /non-empty PAT/); + }); + + it('rejects OAuth with a clear M0-scope error', () => { + const opts: ConnectionOptions = { + host: 'example.cloud.databricks.com', + path: '/sql/1.0/warehouses/abc', + authType: 'databricks-oauth', + }; + + expect(() => buildSeaConnectionOptions(opts)).to.throw( + HiveDriverError, + /M0\) supports only PAT.*databricks-oauth.*M1/, + ); + }); + + it('rejects token-provider with a clear M0-scope error', () => { + const opts: ConnectionOptions = { + host: 'example.cloud.databricks.com', + path: '/sql/1.0/warehouses/abc', + authType: 'token-provider', + tokenProvider: { getToken: async () => 'tok' } as unknown as ConnectionOptions extends infer T + ? // eslint-disable-next-line @typescript-eslint/no-explicit-any + any + : never, + }; + + expect(() => buildSeaConnectionOptions(opts)).to.throw(HiveDriverError, /token-provider.*M1/); + }); + + it('rejects external-token, static-token, and custom auth modes', () => { + const authTypes = ['external-token', 'static-token', 'custom'] as const; + for (const authType of authTypes) { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const opts = { + host: 'h', + path: '/p', + authType, + } as any; + expect(() => buildSeaConnectionOptions(opts)).to.throw(HiveDriverError, /M0\) supports only PAT/); + } + }); + }); + + describe('SeaBackend.connect + openSession', () => { + it('resolves on a valid PAT options object and round-trips through the napi binding', async () => { + const { binding, calls } = makeFakeBinding(); + const backend = new SeaBackend(binding); + + await backend.connect({ + host: 'example.cloud.databricks.com', + path: '/sql/1.0/warehouses/abc', + token: 'dapi-fake-pat', + }); + + const session = await backend.openSession({}); + expect(session).to.exist; + expect(session.id).to.match(/^sea-session-\d+$/); + + expect(calls).to.have.lengthOf(1); + expect(calls[0].method).to.equal('openSession'); + expect(calls[0].args[0]).to.deep.equal({ + hostName: 'example.cloud.databricks.com', + httpPath: '/sql/1.0/warehouses/abc', + token: 'dapi-fake-pat', + }); + + // Round-trip close. + const status = await session.close(); + expect(status.isSuccess).to.equal(true); + expect(calls[1].method).to.equal('connection.close'); + + await backend.close(); + }); + + it('rejects connect() when token is missing with AuthenticationError', async () => { + const { binding, calls } = makeFakeBinding(); + const backend = new SeaBackend(binding); + + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const opts = { + host: 'example.cloud.databricks.com', + path: '/sql/1.0/warehouses/abc', + authType: 'access-token', + } as any; + + let caught: unknown; + try { + await backend.connect(opts); + } catch (e) { + caught = e; + } + expect(caught).to.be.instanceOf(AuthenticationError); + expect(calls).to.have.lengthOf(0); + }); + + it('rejects connect() for OAuth with the M0-scope error', async () => { + const { binding, calls } = makeFakeBinding(); + const backend = new SeaBackend(binding); + + let caught: unknown; + try { + await backend.connect({ + host: 'example.cloud.databricks.com', + path: '/sql/1.0/warehouses/abc', + authType: 'databricks-oauth', + }); + } catch (e) { + caught = e; + } + expect(caught).to.be.instanceOf(HiveDriverError); + expect((caught as Error).message).to.match(/M0\) supports only PAT/); + expect(calls).to.have.lengthOf(0); + }); + + it('throws when openSession() is called before connect()', async () => { + const { binding } = makeFakeBinding(); + const backend = new SeaBackend(binding); + + let caught: unknown; + try { + await backend.openSession({}); + } catch (e) { + caught = e; + } + expect(caught).to.be.instanceOf(HiveDriverError); + expect((caught as Error).message).to.match(/connect\(\) must be called/); + }); + + it('stubbed session methods reject with a clear M0-scope error', async () => { + const { binding } = makeFakeBinding(); + const backend = new SeaBackend(binding); + + await backend.connect({ + host: 'example.cloud.databricks.com', + path: '/sql/1.0/warehouses/abc', + token: 'dapi-fake-pat', + }); + const session = await backend.openSession({}); + + let caught: unknown; + try { + await session.executeStatement('SELECT 1', {}); + } catch (e) { + caught = e; + } + expect(caught).to.be.instanceOf(HiveDriverError); + expect((caught as Error).message).to.match(/not implemented in sea-auth \(M0\)/); + }); + }); +}); From fa6da7f04a0e5ba3a639ab29d62725047d0f3373 Mon Sep 17 00:00:00 2001 From: Madhavendra Rathore Date: Fri, 15 May 2026 01:45:06 +0000 Subject: [PATCH 06/11] =?UTF-8?q?sea-results:=20SeaOperationBackend=20wire?= =?UTF-8?q?s=20kernel=20result-stream=20=E2=86=92=20JS=20rows?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implements IOperationBackend over the napi binding's Statement. fetchChunk decodes Arrow IPC bytes → apache-arrow RecordBatch → ArrowResultConverter (Phase 1+2 reused unchanged) → JS rows. All M0 datatypes round-trip via the same converter the thrift path uses (BOOL, INT8/16/32/64, FLOAT, DOUBLE, DECIMAL, STRING, BINARY, DATE, TIMESTAMP, INTERVAL, ARRAY, MAP, STRUCT). Unit tests construct synthetic IPC batches; e2e test against pecotesting confirms byte-identical parity vs thrift. No new dependencies. ArrowResultConverter / ResultSlicer / OperationIterator all reused unchanged (DRY). --- lib/DBSQLClient.ts | 2 +- lib/sea/SeaArrowIpc.ts | 217 +++++++++++++++++ lib/sea/SeaBackend.ts | 107 +++++++- lib/sea/SeaOperationBackend.ts | 199 +++++++++++++++ lib/sea/SeaResultsProvider.ts | 111 +++++++++ lib/sea/SeaSessionBackend.ts | 163 +++++++++++++ tests/integration/.mocharc.js | 11 + tests/integration/sea/results-e2e.test.ts | 127 ++++++++++ tests/unit/sea/SeaOperationBackend.test.ts | 269 +++++++++++++++++++++ 9 files changed, 1199 insertions(+), 7 deletions(-) create mode 100644 lib/sea/SeaArrowIpc.ts create mode 100644 lib/sea/SeaOperationBackend.ts create mode 100644 lib/sea/SeaResultsProvider.ts create mode 100644 lib/sea/SeaSessionBackend.ts create mode 100644 tests/integration/.mocharc.js create mode 100644 tests/integration/sea/results-e2e.test.ts create mode 100644 tests/unit/sea/SeaOperationBackend.test.ts diff --git a/lib/DBSQLClient.ts b/lib/DBSQLClient.ts index 139d5f4e..25b438a6 100644 --- a/lib/DBSQLClient.ts +++ b/lib/DBSQLClient.ts @@ -238,7 +238,7 @@ export default class DBSQLClient extends EventEmitter implements IDBSQLClient, I this.connectionProvider = this.createConnectionProvider(options); this.backend = options.useSEA - ? new SeaBackend() + ? new SeaBackend({ context: this }) : new ThriftBackend({ context: this, onConnectionEvent: (event, payload) => this.forwardConnectionEvent(event, payload), diff --git a/lib/sea/SeaArrowIpc.ts b/lib/sea/SeaArrowIpc.ts new file mode 100644 index 00000000..57e26dac --- /dev/null +++ b/lib/sea/SeaArrowIpc.ts @@ -0,0 +1,217 @@ +// Copyright (c) 2026 Databricks, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import { RecordBatchReader, Schema, Field, DataType, TypeMap } from 'apache-arrow'; +import { TTableSchema, TTypeId, TPrimitiveTypeEntry } from '../../thrift/TCLIService_types'; + +/** + * Field metadata key used by the kernel to attach the original Databricks + * SQL type name to each Arrow field. See `databricks-sql-kernel/src/reader/mod.rs`. + */ +const DATABRICKS_TYPE_NAME = 'databricks.type_name'; + +/** + * Decode an Arrow IPC stream payload (schema header + zero-or-more + * record-batch messages) into its row count. + * + * Returns `{ schema, rowCount }`. The schema is left intact as the + * apache-arrow Schema object so callers can reuse it; the rowCount is + * the sum of `RecordBatch.numRows` across every record-batch message + * in the stream. + * + * Why we parse upfront: `ArrowResultConverter` consumes `ArrowBatch` + * objects which carry an explicit `rowCount`. The kernel's IPC payload + * does not carry a separate count — only per-RecordBatch numRows. We + * walk the messages once to sum them so the converter sees the same + * shape as the thrift path (`ArrowResultHandler.fetchNext` at + * `lib/result/ArrowResultHandler.ts:55`). + * + * Re-parsing inside the converter is unavoidable because `RecordBatch` + * instances created here cannot be passed across the converter's + * `Buffer[]` boundary without rewriting the converter. The IPC bytes + * themselves are small enough (one record batch per call) that the + * double-parse cost is negligible for M0. + */ +export function decodeIpcBatch(ipcBytes: Buffer): { schema: Schema; rowCount: number } { + const reader = RecordBatchReader.from(ipcBytes); + // Eagerly open so `schema` is populated. + reader.open(); + const { schema } = reader; + + let rowCount = 0; + // Iterate all record batches in the stream and sum row counts. + for (const batch of reader) { + rowCount += batch.numRows; + } + return { schema, rowCount }; +} + +/** + * Decode an Arrow IPC schema payload (no record batches) into the + * apache-arrow Schema object. + */ +export function decodeIpcSchema(ipcBytes: Buffer): Schema { + const reader = RecordBatchReader.from(ipcBytes); + reader.open(); + return reader.schema; +} + +/** + * Map an Arrow `DataType` (with optional `databricks.type_name` + * metadata) onto the closest Thrift `TTypeId`. + * + * This is the synthesis step that lets the existing + * `ArrowResultConverter` Phase-2 dispatch (`convertThriftValue` in + * `lib/result/utils.ts:61-98`) keep working unchanged for the SEA + * path. Phase-2 keys exclusively off `TPrimitiveTypeEntry.type` per + * column, so we synthesize a `TColumnDesc` whose `TTypeId` matches the + * server-emitted Arrow type as closely as possible. + * + * Resolution order: + * 1. The kernel attaches `databricks.type_name` (e.g. "DECIMAL", + * "INTERVAL", "STRUCT") to each field's metadata. Prefer that when + * present — it carries the original SQL semantic that the Arrow + * type alone can lose (e.g. INTERVAL → Utf8 with metadata). + * 2. Fall back to the Arrow `DataType.typeId` for primitive types. + * + * This matches the JDBC and Python drivers' policy of trusting the + * server's logical type assignment over the wire-level Arrow encoding. + */ +function arrowTypeToTTypeId(field: Field): TTypeId { + const typeName = field.metadata.get(DATABRICKS_TYPE_NAME)?.toUpperCase(); + + switch (typeName) { + case 'BOOLEAN': + return TTypeId.BOOLEAN_TYPE; + case 'TINYINT': + case 'BYTE': + return TTypeId.TINYINT_TYPE; + case 'SMALLINT': + case 'SHORT': + return TTypeId.SMALLINT_TYPE; + case 'INT': + case 'INTEGER': + return TTypeId.INT_TYPE; + case 'BIGINT': + case 'LONG': + return TTypeId.BIGINT_TYPE; + case 'FLOAT': + case 'REAL': + return TTypeId.FLOAT_TYPE; + case 'DOUBLE': + return TTypeId.DOUBLE_TYPE; + case 'STRING': + return TTypeId.STRING_TYPE; + case 'VARCHAR': + return TTypeId.VARCHAR_TYPE; + case 'CHAR': + return TTypeId.CHAR_TYPE; + case 'BINARY': + return TTypeId.BINARY_TYPE; + case 'DATE': + return TTypeId.DATE_TYPE; + case 'TIMESTAMP': + case 'TIMESTAMP_NTZ': + return TTypeId.TIMESTAMP_TYPE; + case 'DECIMAL': + return TTypeId.DECIMAL_TYPE; + case 'INTERVAL': + case 'INTERVAL DAY': + case 'INTERVAL DAY TO HOUR': + case 'INTERVAL DAY TO MINUTE': + case 'INTERVAL DAY TO SECOND': + case 'INTERVAL HOUR': + case 'INTERVAL HOUR TO MINUTE': + case 'INTERVAL HOUR TO SECOND': + case 'INTERVAL MINUTE': + case 'INTERVAL MINUTE TO SECOND': + case 'INTERVAL SECOND': + return TTypeId.INTERVAL_DAY_TIME_TYPE; + case 'INTERVAL YEAR': + case 'INTERVAL YEAR TO MONTH': + case 'INTERVAL MONTH': + return TTypeId.INTERVAL_YEAR_MONTH_TYPE; + case 'ARRAY': + return TTypeId.ARRAY_TYPE; + case 'MAP': + return TTypeId.MAP_TYPE; + case 'STRUCT': + return TTypeId.STRUCT_TYPE; + case 'NULL': + case 'VOID': + return TTypeId.NULL_TYPE; + default: + break; + } + + // Fall back to Arrow's own type id when no databricks metadata is set + // (e.g. unit tests constructing batches without metadata). + const arrowType = field.type; + if (DataType.isBool(arrowType)) return TTypeId.BOOLEAN_TYPE; + if (DataType.isInt(arrowType)) { + switch (arrowType.bitWidth) { + case 8: + return TTypeId.TINYINT_TYPE; + case 16: + return TTypeId.SMALLINT_TYPE; + case 32: + return TTypeId.INT_TYPE; + case 64: + return TTypeId.BIGINT_TYPE; + default: + return TTypeId.BIGINT_TYPE; + } + } + if (DataType.isFloat(arrowType)) { + // arrow Float precision: 16=HALF, 32=SINGLE, 64=DOUBLE + return arrowType.precision === 2 ? TTypeId.DOUBLE_TYPE : TTypeId.FLOAT_TYPE; + } + if (DataType.isDecimal(arrowType)) return TTypeId.DECIMAL_TYPE; + if (DataType.isUtf8(arrowType)) return TTypeId.STRING_TYPE; + if (DataType.isBinary(arrowType)) return TTypeId.BINARY_TYPE; + if (DataType.isDate(arrowType)) return TTypeId.DATE_TYPE; + if (DataType.isTimestamp(arrowType)) return TTypeId.TIMESTAMP_TYPE; + if (DataType.isList(arrowType)) return TTypeId.ARRAY_TYPE; + if (DataType.isMap(arrowType)) return TTypeId.MAP_TYPE; + if (DataType.isStruct(arrowType)) return TTypeId.STRUCT_TYPE; + if (DataType.isNull(arrowType)) return TTypeId.NULL_TYPE; + + return TTypeId.STRING_TYPE; +} + +/** + * Synthesize a Thrift `TTableSchema` from an Arrow schema decoded out + * of the kernel's IPC stream. Used by `SeaOperationBackend.getResultMetadata` + * to drive `ArrowResultConverter.convertThriftTypes` (Phase 2) without + * changing that code. + */ +export function arrowSchemaToThriftSchema(arrowSchema: Schema): TTableSchema { + const columns = arrowSchema.fields.map((field, index) => { + const primitiveEntry: TPrimitiveTypeEntry = { + type: arrowTypeToTTypeId(field), + }; + return { + columnName: field.name, + typeDesc: { + types: [ + { + primitiveEntry, + }, + ], + }, + position: index + 1, + }; + }); + return { columns }; +} diff --git a/lib/sea/SeaBackend.ts b/lib/sea/SeaBackend.ts index 5815dc05..3b082028 100644 --- a/lib/sea/SeaBackend.ts +++ b/lib/sea/SeaBackend.ts @@ -1,18 +1,113 @@ +// Copyright (c) 2026 Databricks, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + import IBackend from '../contracts/IBackend'; import ISessionBackend from '../contracts/ISessionBackend'; +import IClientContext from '../contracts/IClientContext'; +import { ConnectionOptions, OpenSessionRequest } from '../contracts/IDBSQLClient'; +import HiveDriverError from '../errors/HiveDriverError'; +import { getSeaNative } from './SeaNativeLoader'; +import SeaSessionBackend, { SeaConnectionNative } from './SeaSessionBackend'; -const NOT_IMPLEMENTED = 'SEA backend not implemented yet — wired in sea-napi-binding feature'; +interface SeaBackendOptions { + context: IClientContext; + /** + * Test seam: lets unit tests inject a fake `openSession` without + * loading the native binding. Defaults to `getSeaNative().openSession`. + */ + openSession?: (opts: { hostName: string; httpPath: string; token: string }) => Promise; +} +/** + * `IBackend` implementation that dispatches through the napi-bound + * kernel. For M0 this covers `connect → openSession → executeStatement` + * and the result-fetching pipeline; other session APIs throw via + * `SeaSessionBackend`. + * + * Auth: M0 supports PAT only. `ConnectionOptions.token` (the + * existing public field used by the thrift path's + * `PlainHttpAuthentication`) is forwarded to the kernel. + */ export default class SeaBackend implements IBackend { - public async connect(): Promise { - throw new Error(NOT_IMPLEMENTED); + private readonly context: IClientContext; + + private readonly openSessionImpl: NonNullable; + + private connection?: SeaConnectionNative; + + private connectOptions?: ConnectionOptions; + + constructor(options: SeaBackendOptions) { + this.context = options.context; + this.openSessionImpl = + options.openSession ?? + (async (opts) => { + const native = getSeaNative(); + return native.openSession(opts) as Promise; + }); + } + + public async connect(options: ConnectionOptions): Promise { + // M0: only PAT is wired through the napi binding. + if (options.authType !== undefined && options.authType !== 'access-token') { + throw new HiveDriverError( + `SEA backend (M0): authType '${options.authType}' is not yet supported; use 'access-token' (PAT).`, + ); + } + if (!options.host) { + throw new HiveDriverError('SEA backend: host is required.'); + } + if (!options.path) { + throw new HiveDriverError('SEA backend: path is required.'); + } + // `token` lives on the access-token branch of the discriminated + // `AuthOptions` union; TS narrows it once we exclude all non-default + // authTypes above. + const token = (options as { token?: string }).token; + if (!token) { + throw new HiveDriverError('SEA backend: token is required (M0 supports PAT only).'); + } + this.connectOptions = options; + // Open the kernel `Session` eagerly so `openSession` is just a + // wrapper that returns the bound `SeaSessionBackend`. The kernel's + // `Session` is reusable across statements, so one `Connection` + // serves the whole client lifetime. + this.connection = await this.openSessionImpl({ + hostName: options.host, + httpPath: options.path, + token, + }); } - public async openSession(): Promise { - throw new Error(NOT_IMPLEMENTED); + public async openSession(request: OpenSessionRequest): Promise { + if (!this.connection) { + throw new HiveDriverError('SEA backend: not connected.'); + } + return new SeaSessionBackend({ + connection: this.connection, + context: this.context, + initialCatalog: request.initialCatalog, + initialSchema: request.initialSchema, + configuration: request.configuration, + }); } public async close(): Promise { - throw new Error(NOT_IMPLEMENTED); + if (this.connection) { + await this.connection.close(); + this.connection = undefined; + } + this.connectOptions = undefined; } } diff --git a/lib/sea/SeaOperationBackend.ts b/lib/sea/SeaOperationBackend.ts new file mode 100644 index 00000000..bcee889a --- /dev/null +++ b/lib/sea/SeaOperationBackend.ts @@ -0,0 +1,199 @@ +// Copyright (c) 2026 Databricks, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import { NIL } from 'uuid'; +import { + TGetOperationStatusResp, + TGetResultSetMetadataResp, + TOperationState, + TSparkRowSetType, + TStatusCode, + TTableSchema, +} from '../../thrift/TCLIService_types'; +import IOperationBackend from '../contracts/IOperationBackend'; +import IClientContext from '../contracts/IClientContext'; +import Status from '../dto/Status'; +import ArrowResultConverter from '../result/ArrowResultConverter'; +import ResultSlicer from '../result/ResultSlicer'; +import SeaResultsProvider from './SeaResultsProvider'; +import { arrowSchemaToThriftSchema, decodeIpcSchema } from './SeaArrowIpc'; + +/** + * The minimal slice of the napi-binding `Statement` class that + * `SeaOperationBackend` calls. Defined locally so unit tests can pass + * a stub without loading the native binding. + */ +export interface SeaStatementNative { + fetchNextBatch(): Promise<{ ipcBytes: Buffer } | null>; + schema(): Promise<{ ipcBytes: Buffer }>; + cancel(): Promise; + close(): Promise; +} + +interface SeaOperationBackendOptions { + statement: SeaStatementNative; + context: IClientContext; + operationId?: string; +} + +/** + * `IOperationBackend` over the napi-bound kernel `Statement`. Adapts + * the kernel's Arrow IPC stream onto the existing thrift-shaped result + * pipeline (`ArrowResultConverter` + `ResultSlicer`) so the M0 row + * shape is byte-identical to the thrift path for every M0 datatype. + * + * Pipeline: + * napi.Statement.fetchNextBatch() (IPC bytes per batch) + * -> SeaResultsProvider (adapts to IResultsProvider) + * -> ArrowResultConverter (Phase 1 + Phase 2; reused unchanged) + * -> ResultSlicer (chunk-size normalisation; reused unchanged) + * + * The kernel exposes only the `Arrow` `ResultBatch` variant for M0 — + * both CloudFetch (external links) and inline batches flow through + * `ResultStream::next_batch` and surface as a single Arrow IPC stream + * per call, per + * `sea-workflow/findings/arch/napi-binding/round2-methods-2026-05-15.md:69`. + * One backend therefore covers both fetch modes without dispatching on + * `TSparkRowSetType`. + */ +export default class SeaOperationBackend implements IOperationBackend { + private readonly statement: SeaStatementNative; + + private readonly context: IClientContext; + + private readonly operationId: string; + + private resultSlicer?: ResultSlicer; + + private resultsProvider?: SeaResultsProvider; + + private metadata?: TGetResultSetMetadataResp; + + private metadataPromise?: Promise; + + // Tracks the operation's terminal state. The kernel does not expose + // pending/running observability at the napi surface today; `execute` + // resolves only after the statement has reached a result-fetching + // state, so we treat the backend as FINISHED until `close()`/`cancel()`. + private state: TOperationState = TOperationState.FINISHED_STATE; + + constructor({ statement, context, operationId }: SeaOperationBackendOptions) { + this.statement = statement; + this.context = context; + this.operationId = operationId ?? NIL; + } + + public get id(): string { + return this.operationId; + } + + public get hasResultSet(): boolean { + // M0 only routes through SeaOperationBackend for executeStatement + // calls. DDL/DML without a result set is not exercised through SEA + // for M0; the napi Statement still produces a schema (empty) in + // that case, which the converter renders as zero rows. Reporting + // `true` keeps the facade's fetch path enabled for M0 parity. + return true; + } + + public async fetchChunk({ + limit, + disableBuffering, + }: { + limit: number; + disableBuffering?: boolean; + }): Promise> { + const slicer = await this.getResultSlicer(); + return slicer.fetchNext({ limit, disableBuffering }); + } + + public async hasMore(): Promise { + const slicer = await this.getResultSlicer(); + return slicer.hasMore(); + } + + public async waitUntilReady(options?: { + progress?: boolean; + callback?: (progress: TGetOperationStatusResp) => unknown; + }): Promise { + // The kernel's `executeStatement` resolves once results are + // available; there's no pending/running state to observe here. We + // synthesise an immediate FINISHED status for the optional callback. + if (options?.callback) { + await Promise.resolve(options.callback(await this.status(Boolean(options.progress)))); + } + } + + public async status(_progress: boolean): Promise { + return { + status: { statusCode: TStatusCode.SUCCESS_STATUS }, + operationState: this.state, + hasResultSet: true, + }; + } + + public async getResultMetadata(): Promise { + if (this.metadata) { + return this.metadata; + } + if (this.metadataPromise) { + return this.metadataPromise; + } + this.metadataPromise = (async () => { + const arrowSchemaIpc = await this.statement.schema(); + const arrowSchema = decodeIpcSchema(arrowSchemaIpc.ipcBytes); + const thriftSchema: TTableSchema = arrowSchemaToThriftSchema(arrowSchema); + const meta: TGetResultSetMetadataResp = { + status: { statusCode: TStatusCode.SUCCESS_STATUS }, + schema: thriftSchema, + // SEA inline + CloudFetch both surface to JS as Arrow batches; + // both flow through the same converter that handles the + // ARROW_BASED_SET path on the thrift side. + resultFormat: TSparkRowSetType.ARROW_BASED_SET, + lz4Compressed: false, + isStagingOperation: false, + }; + this.metadata = meta; + return meta; + })(); + try { + return await this.metadataPromise; + } finally { + this.metadataPromise = undefined; + } + } + + public async cancel(): Promise { + await this.statement.cancel(); + this.state = TOperationState.CANCELED_STATE; + return Status.success(); + } + + public async close(): Promise { + await this.statement.close(); + this.state = TOperationState.CLOSED_STATE; + return Status.success(); + } + + private async getResultSlicer(): Promise> { + if (this.resultSlicer) { + return this.resultSlicer; + } + const metadata = await this.getResultMetadata(); + this.resultsProvider = new SeaResultsProvider(this.statement); + const converter = new ArrowResultConverter(this.context, this.resultsProvider, metadata); + this.resultSlicer = new ResultSlicer(this.context, converter); + return this.resultSlicer; + } +} diff --git a/lib/sea/SeaResultsProvider.ts b/lib/sea/SeaResultsProvider.ts new file mode 100644 index 00000000..7e94ee7a --- /dev/null +++ b/lib/sea/SeaResultsProvider.ts @@ -0,0 +1,111 @@ +// Copyright (c) 2026 Databricks, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import IResultsProvider, { ResultsProviderFetchNextOptions } from '../result/IResultsProvider'; +import { ArrowBatch } from '../result/utils'; +import { decodeIpcBatch } from './SeaArrowIpc'; + +/** + * The minimal slice of the napi-binding `Statement` class that we + * consume from JS. Defined locally (not imported from the binding's + * d.ts) so the loader layer's loose `unknown` typing doesn't force + * unsafe casts at every call site, and so unit tests can pass a stub. + */ +export interface SeaStatementHandle { + fetchNextBatch(): Promise<{ ipcBytes: Buffer } | null>; +} + +/** + * `IResultsProvider` that pulls Arrow IPC batches from the + * kernel via the napi `Statement` handle and adapts them onto the + * shape `ArrowResultConverter` already speaks + * (`lib/result/utils.ts:22-25`). + * + * Each kernel `fetchNextBatch()` call returns a complete Arrow IPC + * stream (schema header + 1 record-batch message) per the design + * documented at `sea-workflow/findings/arch/napi-binding/round2-methods-2026-05-15.md:46-60`. + * We pass that buffer through as a single-element `batches: [ipcBytes]` + * array — `RecordBatchReader.from(arrowBatch.batches)` inside the + * converter (`lib/result/ArrowResultConverter.ts:119`) reads the + * schema from the prefix and then the record-batch messages from the + * remainder of the same buffer. + * + * We pre-parse the IPC bytes once here to extract `rowCount` (the + * sum of `RecordBatch.numRows` across messages in the stream) because + * the converter consumes that as an explicit field rather than + * deriving it from the batch contents. See the comment in + * `SeaArrowIpc.ts:decodeIpcBatch` for the cost rationale. + */ +export default class SeaResultsProvider implements IResultsProvider { + private readonly statement: SeaStatementHandle; + + // Prefetched next batch so `hasMore()` can be answered without an + // extra round-trip. Set by `prime()` (lazy) and by `fetchNext`. + private prefetched?: ArrowBatch; + + // Set once the kernel returns `null` from `fetchNextBatch()`. + private exhausted = false; + + constructor(statement: SeaStatementHandle) { + this.statement = statement; + } + + public async hasMore(): Promise { + if (this.exhausted) { + return false; + } + if (this.prefetched !== undefined) { + return true; + } + await this.prime(); + return this.prefetched !== undefined; + } + + public async fetchNext(_options: ResultsProviderFetchNextOptions): Promise { + if (this.prefetched === undefined && !this.exhausted) { + await this.prime(); + } + if (this.prefetched === undefined) { + return { batches: [], rowCount: 0 }; + } + const out = this.prefetched; + this.prefetched = undefined; + return out; + } + + // Pull the next batch from the kernel and stash it in `prefetched`, + // or mark the stream exhausted. Used by both `hasMore` and `fetchNext` + // to keep one batch buffered ahead so `hasMore` is accurate without + // re-asking the kernel. + private async prime(): Promise { + if (this.exhausted || this.prefetched !== undefined) { + return; + } + const next = await this.statement.fetchNextBatch(); + if (next === null) { + this.exhausted = true; + return; + } + const { ipcBytes } = next; + const { rowCount } = decodeIpcBatch(ipcBytes); + if (rowCount === 0) { + // Skip empty batches — the converter handles them but pre-filtering + // here avoids one round-trip through the converter's prefetch loop. + // Re-prime to either find a non-empty batch or hit exhaustion. + await this.prime(); + return; + } + this.prefetched = { batches: [ipcBytes], rowCount }; + } +} diff --git a/lib/sea/SeaSessionBackend.ts b/lib/sea/SeaSessionBackend.ts new file mode 100644 index 00000000..0a3b6752 --- /dev/null +++ b/lib/sea/SeaSessionBackend.ts @@ -0,0 +1,163 @@ +// Copyright (c) 2026 Databricks, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import { v4 as uuidv4 } from 'uuid'; +import ISessionBackend from '../contracts/ISessionBackend'; +import IOperationBackend from '../contracts/IOperationBackend'; +import IClientContext from '../contracts/IClientContext'; +import { + ExecuteStatementOptions, + TypeInfoRequest, + CatalogsRequest, + SchemasRequest, + TablesRequest, + TableTypesRequest, + ColumnsRequest, + FunctionsRequest, + PrimaryKeysRequest, + CrossReferenceRequest, +} from '../contracts/IDBSQLSession'; +import Status from '../dto/Status'; +import InfoValue from '../dto/InfoValue'; +import HiveDriverError from '../errors/HiveDriverError'; +import SeaOperationBackend, { SeaStatementNative } from './SeaOperationBackend'; + +/** + * The minimal slice of the napi-binding `Connection` class that we + * consume from JS. Other methods on the binding's `Connection` (added + * in later rounds) can be appended here without affecting M0. + */ +export interface SeaConnectionNative { + executeStatement( + sql: string, + options: { + initialCatalog?: string; + initialSchema?: string; + sessionConfig?: Record; + }, + ): Promise; + close(): Promise; +} + +interface SeaSessionBackendOptions { + connection: SeaConnectionNative; + context: IClientContext; + initialCatalog?: string; + initialSchema?: string; + configuration?: Record; +} + +const M0_NOT_IMPLEMENTED = + 'SEA backend: metadata operations (getInfo, getCatalogs, getSchemas, …) are not implemented for M0'; + +/** + * `ISessionBackend` over the napi-bound kernel `Session`. M0 only wires + * `executeStatement`; the metadata calls are tracked by separate features + * and throw a clear "not implemented" error until they land. + * + * The session has no first-class identifier at the napi surface today + * (the kernel exposes `Session` only as an opaque handle), so we mint + * a client-side UUID for logging/telemetry. + */ +export default class SeaSessionBackend implements ISessionBackend { + public readonly id: string; + + private readonly connection: SeaConnectionNative; + + private readonly context: IClientContext; + + private readonly initialCatalog?: string; + + private readonly initialSchema?: string; + + private readonly configuration: Record; + + constructor({ connection, context, initialCatalog, initialSchema, configuration }: SeaSessionBackendOptions) { + this.connection = connection; + this.context = context; + this.initialCatalog = initialCatalog; + this.initialSchema = initialSchema; + this.configuration = configuration ?? {}; + this.id = uuidv4(); + } + + public async executeStatement(statement: string, options: ExecuteStatementOptions): Promise { + // Build sessionConfig overlay from session-level configuration and + // per-statement options. The napi binding currently accepts only + // string-valued configs; non-string options (queryTimeout, params) + // are not yet mapped (M1 work). + const sessionConfig: Record = { ...this.configuration }; + if (options.useCloudFetch !== undefined) { + sessionConfig.use_cloud_fetch = String(options.useCloudFetch); + } + const native = await this.connection.executeStatement(statement, { + initialCatalog: this.initialCatalog, + initialSchema: this.initialSchema, + sessionConfig, + }); + return new SeaOperationBackend({ + statement: native, + context: this.context, + operationId: uuidv4(), + }); + } + + public async close(): Promise { + await this.connection.close(); + return Status.success(); + } + + // Metadata operations are out of scope for M0-results. They are + // tracked by sibling features (sea-operation / sea-execution). Throw + // clearly so callers can opt out until those land. + public async getInfo(_infoType: number): Promise { + throw new HiveDriverError(M0_NOT_IMPLEMENTED); + } + + public async getTypeInfo(_request: TypeInfoRequest): Promise { + throw new HiveDriverError(M0_NOT_IMPLEMENTED); + } + + public async getCatalogs(_request: CatalogsRequest): Promise { + throw new HiveDriverError(M0_NOT_IMPLEMENTED); + } + + public async getSchemas(_request: SchemasRequest): Promise { + throw new HiveDriverError(M0_NOT_IMPLEMENTED); + } + + public async getTables(_request: TablesRequest): Promise { + throw new HiveDriverError(M0_NOT_IMPLEMENTED); + } + + public async getTableTypes(_request: TableTypesRequest): Promise { + throw new HiveDriverError(M0_NOT_IMPLEMENTED); + } + + public async getColumns(_request: ColumnsRequest): Promise { + throw new HiveDriverError(M0_NOT_IMPLEMENTED); + } + + public async getFunctions(_request: FunctionsRequest): Promise { + throw new HiveDriverError(M0_NOT_IMPLEMENTED); + } + + public async getPrimaryKeys(_request: PrimaryKeysRequest): Promise { + throw new HiveDriverError(M0_NOT_IMPLEMENTED); + } + + public async getCrossReference(_request: CrossReferenceRequest): Promise { + throw new HiveDriverError(M0_NOT_IMPLEMENTED); + } +} diff --git a/tests/integration/.mocharc.js b/tests/integration/.mocharc.js new file mode 100644 index 00000000..f7113140 --- /dev/null +++ b/tests/integration/.mocharc.js @@ -0,0 +1,11 @@ +'use strict'; + +const allSpecs = 'tests/integration/**/*.test.ts'; + +const argvSpecs = process.argv.slice(4); + +module.exports = { + spec: argvSpecs.length > 0 ? argvSpecs : allSpecs, + timeout: '300000', + require: ['ts-node/register'], +}; diff --git a/tests/integration/sea/results-e2e.test.ts b/tests/integration/sea/results-e2e.test.ts new file mode 100644 index 00000000..1707801d --- /dev/null +++ b/tests/integration/sea/results-e2e.test.ts @@ -0,0 +1,127 @@ +// Copyright (c) 2026 Databricks, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/* eslint-disable no-console */ + +import { expect } from 'chai'; +import { DBSQLClient } from '../../../lib'; + +// Integration suite: connect through both backends, run a probe query, +// and assert byte-identical row output (the M0 parity gate). Requires +// the developer's shell to export the pecotesting secrets: +// - DATABRICKS_PECOTESTING_SERVER_HOSTNAME +// - DATABRICKS_PECOTESTING_HTTP_PATH +// - DATABRICKS_PECOTESTING_TOKEN_PERSONAL +// If any is missing, the suite skips so CI / sandboxes without +// credentials don't flap. + +const PROBE_QUERY = + "SELECT 1 AS x, 'hello' AS s, true AS b, CAST(1.5 AS DECIMAL(10,2)) AS d, DATE '2026-01-01' AS dt"; + +interface PecoSecrets { + host: string; + path: string; + token: string; +} + +function readSecrets(): PecoSecrets | null { + const host = process.env.DATABRICKS_PECOTESTING_SERVER_HOSTNAME; + const path = process.env.DATABRICKS_PECOTESTING_HTTP_PATH; + const token = process.env.DATABRICKS_PECOTESTING_TOKEN_PERSONAL; + if (!host || !path || !token) return null; + return { host, path, token }; +} + +async function fetchProbeRows(useSEA: boolean, secrets: PecoSecrets): Promise>> { + const client = new DBSQLClient(); + await client.connect({ + host: secrets.host, + path: secrets.path, + token: secrets.token, + useSEA, + }); + try { + const session = await client.openSession(); + try { + const operation = await session.executeStatement(PROBE_QUERY); + try { + const rows = (await operation.fetchAll()) as Array>; + return rows; + } finally { + await operation.close(); + } + } finally { + await session.close(); + } + } finally { + await client.close(); + } +} + +// JSON-safe normalisation for byte-identical comparison. Buffers, Dates +// and BigInts each have distinct JSON representations; we coerce them +// to stable strings so deep.equal compares value-for-value across +// backends. The thrift converter and the SEA converter both surface +// these as JS Date / Buffer / Number — but we still normalise here so +// a future divergence (e.g. one path returning a string while the +// other returns a Date) trips the assertion explicitly. +function canonical(value: unknown): unknown { + if (value === null || value === undefined) return value; + if (Buffer.isBuffer(value)) return `__buffer__:${value.toString('hex')}`; + if (value instanceof Date) return `__date__:${value.toISOString()}`; + if (typeof value === 'bigint') return `__bigint__:${value.toString()}`; + if (Array.isArray(value)) return value.map(canonical); + if (typeof value === 'object') { + const out: Record = {}; + for (const [k, v] of Object.entries(value as Record)) { + out[k] = canonical(v); + } + return out; + } + return value; +} + +describe('SEA results end-to-end (pecotesting parity gate)', function suite() { + this.timeout(120_000); + + const secrets = readSecrets(); + + before(function gate() { + if (!secrets) { + // eslint-disable-next-line no-invalid-this + this.skip(); + } + }); + + it('SEA backend returns one row with expected columns', async () => { + const rows = await fetchProbeRows(true, secrets as PecoSecrets); + expect(rows.length).to.equal(1); + const row = rows[0]; + expect(row).to.have.property('x'); + expect(row).to.have.property('s'); + expect(row).to.have.property('b'); + expect(row).to.have.property('d'); + expect(row).to.have.property('dt'); + expect(Number(row.x)).to.equal(1); + expect(row.s).to.equal('hello'); + expect(row.b).to.equal(true); + expect(Number(row.d)).to.equal(1.5); + }); + + it('Thrift and SEA produce byte-identical rows for the probe query (parity gate)', async () => { + const seaRows = await fetchProbeRows(true, secrets as PecoSecrets); + const thriftRows = await fetchProbeRows(false, secrets as PecoSecrets); + expect(seaRows.map(canonical)).to.deep.equal(thriftRows.map(canonical)); + }); +}); diff --git a/tests/unit/sea/SeaOperationBackend.test.ts b/tests/unit/sea/SeaOperationBackend.test.ts new file mode 100644 index 00000000..17f593e3 --- /dev/null +++ b/tests/unit/sea/SeaOperationBackend.test.ts @@ -0,0 +1,269 @@ +// Copyright (c) 2026 Databricks, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import { expect } from 'chai'; +import { + Schema, + Field, + RecordBatch, + Table, + tableToIPC, + Bool, + Int8, + Int16, + Int32, + Int64, + Float32, + Float64, + Utf8, + Binary, + DateDay, + TimestampMicrosecond, + Decimal, + Struct, + makeData, + vectorFromArray, +} from 'apache-arrow'; + +import SeaOperationBackend from '../../../lib/sea/SeaOperationBackend'; +import ClientContextStub from '../.stubs/ClientContextStub'; + +// Minimal stub of the napi `Statement` surface that emits a precomputed +// Arrow IPC payload per `fetchNextBatch()` call. Used to feed +// `SeaOperationBackend` synthetic batches that mirror the kernel's +// per-batch IPC stream contract (`schema header + 1 record-batch +// message`) without loading the native binding. +class StatementStub { + private readonly batches: Buffer[]; + + private readonly schemaIpc: Buffer; + + public cancelled = false; + + public closed = false; + + constructor(schemaIpc: Buffer, batches: Buffer[]) { + this.schemaIpc = schemaIpc; + this.batches = [...batches]; + } + + public async fetchNextBatch(): Promise<{ ipcBytes: Buffer } | null> { + if (this.batches.length === 0) return null; + return { ipcBytes: this.batches.shift() as Buffer }; + } + + public async schema(): Promise<{ ipcBytes: Buffer }> { + return { ipcBytes: this.schemaIpc }; + } + + public async cancel(): Promise { + this.cancelled = true; + } + + public async close(): Promise { + this.closed = true; + } +} + +// Helper: attach `databricks.type_name` to a field so the SEA Thrift +// schema synthesiser can resolve the TTypeId (matches kernel behaviour +// at `src/reader/mod.rs:476-504`). +function withTypeName(field: T, typeName: string): T { + const meta = new Map(field.metadata); + meta.set('databricks.type_name', typeName); + return new Field(field.name, field.type, field.nullable, meta) as T; +} + +// Build a single IPC stream (schema header + 1 record-batch message) +// from a Schema and a column->values mapping. Mirrors the kernel's +// per-batch ResultStream output shape. +function ipcFromColumns(schema: Schema, columns: Record): Buffer { + const vectors: any[] = []; + for (const field of schema.fields) { + const col = columns[field.name]; + vectors.push(vectorFromArray(col, field.type)); + } + const data = vectors.map((v) => v.data[0]); + const struct = makeData({ + type: new Struct(schema.fields), + children: data, + length: data[0]?.length ?? 0, + nullCount: 0, + }); + const batch = new RecordBatch(schema, struct); + const table = new Table([batch]); + return Buffer.from(tableToIPC(table, 'stream')); +} + +function ipcSchemaOnly(schema: Schema): Buffer { + // tableToIPC on an empty table produces a schema-only stream. + const struct = makeData({ + type: new Struct(schema.fields), + children: schema.fields.map((f) => makeData({ type: f.type as any, length: 0, nullCount: 0 })), + length: 0, + nullCount: 0, + }); + const batch = new RecordBatch(schema, struct); + const table = new Table([batch]); + return Buffer.from(tableToIPC(table, 'stream')); +} + +describe('SeaOperationBackend — M0 datatype round-trip via napi → ArrowResultConverter', () => { + it('passes M0 primitive datatypes through the same converter the thrift path uses', async () => { + // One row per M0 primitive type with a kernel-style metadata tag on + // each field. Decimal carries a real scale (2) so the converter's + // Phase-1 scale division produces 1.5 from the unscaled bigint. + const fields = [ + withTypeName(new Field('b', new Bool(), true), 'BOOLEAN'), + withTypeName(new Field('i8', new Int8(), true), 'TINYINT'), + withTypeName(new Field('i16', new Int16(), true), 'SMALLINT'), + withTypeName(new Field('i32', new Int32(), true), 'INT'), + withTypeName(new Field('i64', new Int64(), true), 'BIGINT'), + withTypeName(new Field('f32', new Float32(), true), 'FLOAT'), + withTypeName(new Field('f64', new Float64(), true), 'DOUBLE'), + withTypeName(new Field('s', new Utf8(), true), 'STRING'), + withTypeName(new Field('bin', new Binary(), true), 'BINARY'), + withTypeName(new Field('dt', new DateDay(), true), 'DATE'), + withTypeName( + new Field('ts', new TimestampMicrosecond(), true), + 'TIMESTAMP', + ), + // apache-arrow's Decimal signature is `(scale, precision, bitWidth)`. + withTypeName(new Field('dec', new Decimal(2, 10, 128), true), 'DECIMAL'), + // INTERVAL on the kernel side: Utf8 + metadata annotation. + withTypeName(new Field('iv', new Utf8(), true), 'INTERVAL'), + ]; + const schema = new Schema(fields); + const schemaIpc = ipcSchemaOnly(schema); + + // DECIMAL: 128-bit little-endian unscaled integer. 150 little-endian + // → [150, 0, 0, 0, ...0]. Phase-1 reads `valueType.scale` (=2) so the + // converter divides by 100 to yield 1.5. + const decimalBytes = new Uint8Array(16); + decimalBytes[0] = 150; + const dataIpc = ipcFromColumns(schema, { + b: [true], + i8: [Int8Array.from([1])[0]], + i16: [Int16Array.from([200])[0]], + i32: [42], + i64: [BigInt(1234567890123)], + f32: [Math.fround(1.5)], + f64: [3.14], + s: ['hello'], + bin: [new Uint8Array([0xde, 0xad, 0xbe, 0xef])], + dt: [new Date('2026-01-01T00:00:00Z')], + // Builder for TimestampMicrosecond accepts numeric epoch-ms; the + // internal scaling multiplies by 1000 to land on µs. + ts: [new Date('2026-05-15T12:00:00Z').valueOf()], + dec: [decimalBytes], + iv: ['1-0'], + }); + + const stub = new StatementStub(schemaIpc, [dataIpc]); + const backend = new SeaOperationBackend({ + statement: stub, + context: new ClientContextStub(), + }); + + const rows = await backend.fetchChunk({ limit: 100 }); + expect(rows.length).to.equal(1); + const row = rows[0] as Record; + + expect(row.b).to.equal(true); + expect(row.i8).to.equal(1); + expect(row.i16).to.equal(200); + expect(row.i32).to.equal(42); + // BIGINT goes through Phase-2 convertBigInt → Number (matches thrift) + expect(row.i64).to.equal(1234567890123); + expect(row.f32).to.equal(Math.fround(1.5)); + expect(row.f64).to.equal(3.14); + expect(row.s).to.equal('hello'); + expect(Buffer.isBuffer(row.bin)).to.equal(true); + expect((row.bin as Buffer).equals(Buffer.from([0xde, 0xad, 0xbe, 0xef]))).to.equal(true); + // DECIMAL: Phase-1 scale-aware coercion via Arrow's Decimal type → 1.5 + expect(row.dec).to.equal(1.5); + // TIMESTAMP: Phase-1 produces JS Date for arrow timestamps + expect(row.ts).to.be.instanceOf(Date); + expect((row.ts as Date).toISOString()).to.equal('2026-05-15T12:00:00.000Z'); + // INTERVAL: kernel emits Utf8 + metadata; converter passes through as string + expect(row.iv).to.equal('1-0'); + + // After consuming the single batch, the backend should report no more rows. + expect(await backend.hasMore()).to.equal(false); + }); + + it('round-trips ARRAY / MAP / STRUCT via the converter Phase-2 JSON fallback', async () => { + // ARRAY / MAP / STRUCT have two possible wire encodings in M0: + // (a) native Arrow `List` / `Map` / `Struct` — Phase 1 produces plain + // JS objects; Phase 2 `convertJSON` sees a non-string and is a + // no-op (`utils.ts:39-49`). + // (b) Utf8 JSON strings — Phase 1 passthrough; Phase 2 `convertJSON` + // runs `JSON.parse` (`utils.ts:75-79`). + // Both produce identical row shapes. We validate (b) here because + // it's the deterministic case we can construct with the current + // apache-arrow JS API; the kernel emits either depending on server + // config (see `findings/rust-kernel/datatype-emission...:140-142`). + const strSchema = new Schema([ + withTypeName(new Field('arr', new Utf8(), true), 'ARRAY'), + withTypeName(new Field('m', new Utf8(), true), 'MAP'), + withTypeName(new Field('s', new Utf8(), true), 'STRUCT'), + ]); + const strSchemaIpc = ipcSchemaOnly(strSchema); + const strDataIpc = ipcFromColumns(strSchema, { + arr: ['[1,2,3]'], + m: ['{"k":1}'], + s: ['{"a":1,"b":"hi"}'], + }); + + const stub = new StatementStub(strSchemaIpc, [strDataIpc]); + const backend = new SeaOperationBackend({ + statement: stub, + context: new ClientContextStub(), + }); + const rows = await backend.fetchChunk({ limit: 100 }); + expect(rows.length).to.equal(1); + const row = rows[0] as Record; + expect(row.arr).to.deep.equal([1, 2, 3]); + expect(row.m).to.deep.equal({ k: 1 }); + expect(row.s).to.deep.equal({ a: 1, b: 'hi' }); + }); + + it('streams multiple batches and reports hasMore correctly', async () => { + const schema = new Schema([withTypeName(new Field('x', new Int32(), true), 'INT')]); + const schemaIpc = ipcSchemaOnly(schema); + const batch1 = ipcFromColumns(schema, { x: [1, 2] }); + const batch2 = ipcFromColumns(schema, { x: [3] }); + + const stub = new StatementStub(schemaIpc, [batch1, batch2]); + const backend = new SeaOperationBackend({ + statement: stub, + context: new ClientContextStub(), + }); + + const all = await backend.fetchChunk({ limit: 10 }); + expect(all).to.deep.equal([{ x: 1 }, { x: 2 }, { x: 3 }]); + expect(await backend.hasMore()).to.equal(false); + }); + + it('cancel / close delegate to the native statement', async () => { + const schema = new Schema([withTypeName(new Field('x', new Int32(), true), 'INT')]); + const schemaIpc = ipcSchemaOnly(schema); + const stub = new StatementStub(schemaIpc, []); + const backend = new SeaOperationBackend({ statement: stub, context: new ClientContextStub() }); + await backend.cancel(); + expect(stub.cancelled).to.equal(true); + await backend.close(); + expect(stub.closed).to.equal(true); + }); +}); From e06a5e892901711190f77da8d8a3f99f35458fdc Mon Sep 17 00:00:00 2001 From: Madhavendra Rathore Date: Fri, 15 May 2026 01:46:25 +0000 Subject: [PATCH 07/11] sea-operation: cancel/close/finished lifecycle for SEA operations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wraps napi Statement.cancel/close. finished() is a no-op for M0 (kernel Statement::execute.await blocks until complete; no polling needed). cancel mid-fetch propagates within 200ms via kernel's async cancellation token. Implementation: - lib/sea/SeaOperationLifecycle.ts — standalone helpers (seaCancel, seaClose, seaFinished, failIfNotActive) over a structurally-typed SeaStatementHandle so impl-results can pick them up cleanly. - lib/sea/SeaOperationBackend.ts — IOperationBackend impl that composes the lifecycle helpers; fetch* methods are stubbed and owned by the parallel sea-results branch. Tests: - 27 unit tests (lifecycle helpers + backend integration) - 4 e2e tests against pecotesting — cancel latency 64-80ms, cancel-mid-fetch throws OperationStateError(Canceled), close idempotent, finished() resolves <50ms. Includes a binding-side fix in native/sea/src/{statement,connection}.rs to keep the kernel's parent Statement alive alongside the ExecutedStatement. Without this fix, Statement::Drop invalidates the produced ExecutedStatement via the kernel's ValidityFlag and every cancel/close on the resulting JS Statement throws InvalidStatementHandle. Required because the operation feature's 200ms cancel acceptance is unreachable otherwise. --- lib/sea/SeaOperationBackend.ts | 201 ++++++++ lib/sea/SeaOperationLifecycle.ts | 285 +++++++++++ native/sea/src/connection.rs | 6 +- native/sea/src/statement.rs | 83 +++- tests/e2e/sea/operation-lifecycle-e2e.test.ts | 285 +++++++++++ tests/unit/sea/operation-lifecycle.test.ts | 445 ++++++++++++++++++ 6 files changed, 1288 insertions(+), 17 deletions(-) create mode 100644 lib/sea/SeaOperationBackend.ts create mode 100644 lib/sea/SeaOperationLifecycle.ts create mode 100644 tests/e2e/sea/operation-lifecycle-e2e.test.ts create mode 100644 tests/unit/sea/operation-lifecycle.test.ts diff --git a/lib/sea/SeaOperationBackend.ts b/lib/sea/SeaOperationBackend.ts new file mode 100644 index 00000000..5ddfbbd5 --- /dev/null +++ b/lib/sea/SeaOperationBackend.ts @@ -0,0 +1,201 @@ +// Copyright (c) 2026 Databricks, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/** + * `IOperationBackend` implementation for the SEA path. + * + * Round 1 (impl-operation) lands the **lifecycle** methods: + * - `cancel()` — forwards to napi `Statement.cancel()`, + * - `close()` — forwards to napi `Statement.close()`, + * - `finished({progress, callback})` — M0 no-op (kernel + * `Statement::execute().await` already blocks until the statement + * is in a terminal state, so by the time we have a `Statement` + * handle the operation has already finished). + * + * Fetch / metadata / status methods are intentionally stubbed; they + * are owned by the parallel impl-results feature on + * `~/databricks-sql-nodejs-sea-WT/results` (`sea-results`). At + * integration time impl-results either: + * (a) replaces this file with its own SeaOperationBackend that + * imports `SeaOperationLifecycle` for cancel/close/finished, or + * (b) extends the fetch methods here while keeping the lifecycle + * methods unchanged. + * Both directions land cleanly with this file shape — the lifecycle + * helpers are factored out into `SeaOperationLifecycle.ts` so neither + * branch's diff touches the other's call sites. + */ + +import { v4 as uuid } from 'uuid'; +import { + TGetOperationStatusResp, + TGetResultSetMetadataResp, + TOperationState, + TStatusCode, +} from '../../thrift/TCLIService_types'; +import IOperationBackend from '../contracts/IOperationBackend'; +import IClientContext from '../contracts/IClientContext'; +import Status from '../dto/Status'; +import HiveDriverError from '../errors/HiveDriverError'; +import { + SeaStatementHandle, + SeaOperationLifecycleState, + createLifecycleState, + seaCancel, + seaClose, + seaFinished, + failIfNotActive, +} from './SeaOperationLifecycle'; + +interface SeaOperationBackendOptions { + /** + * The napi `Statement` handle returned by + * `Connection.executeStatement(...)`. Typed structurally so unit + * tests can hand in a mock without loading the native binding. + */ + statement: SeaStatementHandle; + /** + * Driver-level context — used by lifecycle helpers for logging. + */ + context: IClientContext; + /** + * Optional operation id. The kernel doesn't expose a stable + * statement-id string yet (it's an internal `Uuid` on the + * `ExecutedStatementHandle` trait but not surfaced through the + * napi binding's public d.ts). For M0 we synthesize a client-side + * UUID so the public `id` getter on `DBSQLOperation` returns + * something stable; impl-results will swap this out for the + * kernel-provided id once the binding exposes it. + */ + id?: string; +} + +const NOT_IMPLEMENTED_FETCH = + 'SEA result fetching is owned by the parallel impl-results feature ' + + '(branch sea-results) and not wired in this commit.'; + +export default class SeaOperationBackend implements IOperationBackend { + private readonly statement: SeaStatementHandle; + + private readonly context: IClientContext; + + private readonly _id: string; + + private readonly lifecycle: SeaOperationLifecycleState = createLifecycleState(); + + constructor({ statement, context, id }: SeaOperationBackendOptions) { + this.statement = statement; + this.context = context; + this._id = id ?? uuid(); + } + + public get id(): string { + return this._id; + } + + /** + * SEA always returns row results from `executeStatement` (the + * kernel doesn't surface a separate "no result set" path the way + * Thrift does via `TOperationHandle.hasResultSet`). For M0 we + * report `true` so the public-facade `DBSQLOperation.fetchChunk` + * proceeds to call through. DDL statements (which produce no + * rows) will be handled by impl-results returning an empty Arrow + * batch from `fetchNextBatch`. + */ + public get hasResultSet(): boolean { + return true; + } + + /** + * Pre-flight gate used by every fetch* method. Exposed as + * protected so impl-results can call it without re-importing + * from `SeaOperationLifecycle`. + */ + protected ensureActive(): void { + failIfNotActive(this.lifecycle); + } + + // ------------------------------------------------------------------ + // Fetch / metadata / status — stubs owned by impl-results. + // ------------------------------------------------------------------ + + public async fetchChunk(_options: { + limit: number; + disableBuffering?: boolean; + }): Promise> { + // Active-state gate is still meaningful here so cancel-mid-fetch + // tests can drive against this stub: a fetch issued after cancel + // throws the cancelled error rather than the not-implemented one. + this.ensureActive(); + throw new HiveDriverError(NOT_IMPLEMENTED_FETCH); + } + + public async hasMore(): Promise { + this.ensureActive(); + throw new HiveDriverError(NOT_IMPLEMENTED_FETCH); + } + + public async status(_progress: boolean): Promise { + // For M0 the operation is always finished by the time we have a + // Statement handle. Synthesize a FINISHED_STATE response so any + // facade-level callers (`DBSQLOperation.status`, public surface + // via `IOperation.status`) get a sensible answer without + // throwing. Richer status fields (numModifiedRows, displayMessage, + // progressUpdateResponse) defer to M1. + if (this.lifecycle.isCancelled) { + return { + status: { statusCode: TStatusCode.SUCCESS_STATUS }, + operationState: TOperationState.CANCELED_STATE, + } as TGetOperationStatusResp; + } + if (this.lifecycle.isClosed) { + return { + status: { statusCode: TStatusCode.SUCCESS_STATUS }, + operationState: TOperationState.CLOSED_STATE, + } as TGetOperationStatusResp; + } + return { + status: { statusCode: TStatusCode.SUCCESS_STATUS }, + operationState: TOperationState.FINISHED_STATE, + } as TGetOperationStatusResp; + } + + public async getResultMetadata(): Promise { + this.ensureActive(); + throw new HiveDriverError(NOT_IMPLEMENTED_FETCH); + } + + // ------------------------------------------------------------------ + // Lifecycle — owned by impl-operation. + // ------------------------------------------------------------------ + + public async waitUntilReady(options?: { + progress?: boolean; + callback?: (progress: TGetOperationStatusResp) => unknown; + }): Promise { + // `IOperationBackend.waitUntilReady` is the polling-loop entry on + // Thrift (`ThriftOperationBackend.waitUntilReady`); for SEA M0 it + // shares the implementation with `finished()` because both have + // the same semantics here (the operation is already in a terminal + // state by the time we have a Statement handle). + return seaFinished(this.lifecycle, options); + } + + public async cancel(): Promise { + return seaCancel(this.lifecycle, this.statement, this.context, this._id); + } + + public async close(): Promise { + return seaClose(this.lifecycle, this.statement, this.context, this._id); + } +} diff --git a/lib/sea/SeaOperationLifecycle.ts b/lib/sea/SeaOperationLifecycle.ts new file mode 100644 index 00000000..3022c0a7 --- /dev/null +++ b/lib/sea/SeaOperationLifecycle.ts @@ -0,0 +1,285 @@ +// Copyright (c) 2026 Databricks, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/** + * SEA operation lifecycle helpers (M0). + * + * The three methods exposed here (`cancel`, `close`, `finished`) are + * standalone functions that the `SeaOperationBackend` implementation + * delegates to. Keeping them in this dedicated file lets the parallel + * impl-results work (which owns the fetch-* methods on + * `SeaOperationBackend`) land independently — at merge time it can + * either import these helpers from here or inline them, with no + * conflicts on the call sites. + * + * Mapping to the existing `DBSQLOperation` semantics: + * - `cancel()` → ` driver.cancelOperation(...)` on Thrift today + * (`lib/DBSQLOperation.ts:241-259`). For SEA this is a one-shot + * forward to the napi `Statement.cancel()` which in turn calls + * `ExecutedStatementHandle::cancel(&self).await` in the kernel. + * - `close()` → `driver.closeOperation(...)` on Thrift today + * (`lib/DBSQLOperation.ts:265-284`). For SEA this is the napi + * `Statement.close()` which awaits the server-side delete. + * - `finished({progress, callback})` → the 100ms polling loop in + * `DBSQLOperation.waitUntilReady` today (`lib/DBSQLOperation.ts:337-391`). + * For M0 the kernel's `Statement::execute().await` already blocks + * until the statement is in a terminal state, so by the time the JS + * side has an `ExecutedStatement` (and therefore a binding-level + * `Statement`) the underlying operation is already finished. The + * M0 implementation here therefore resolves immediately, optionally + * firing the progress callback once with a synthesized "finished" + * response so callers that wire a progress UI still see a single + * completion tick. + */ + +import { + TGetOperationStatusResp, + TOperationState, + TStatusCode, +} from '../../thrift/TCLIService_types'; +import Status from '../dto/Status'; +import { LogLevel } from '../contracts/IDBSQLLogger'; +import IClientContext from '../contracts/IClientContext'; +import { mapKernelErrorToJsError, KernelErrorShape } from './SeaErrorMapping'; + +/** + * Minimal shape of the napi `Statement` that the lifecycle helpers + * depend on. Declared structurally so unit tests can hand in a mock + * without pulling the real native binding into the test process. + * + * The real binding's `Statement` (see `native/sea/index.d.ts`) has + * additional methods (`fetchNextBatch`, `schema`) which the lifecycle + * helpers deliberately don't touch — those belong to the results + * feature's surface. + */ +export interface SeaStatementHandle { + cancel(): Promise; + close(): Promise; +} + +/** + * Internal lifecycle state shared between the operation backend and + * these helpers. `SeaOperationBackend` keeps an instance of this and + * passes it to each helper call. Centralising the flags here means + * the helpers stay pure (no `this`) and the backend stays + * straightforward. + */ +export interface SeaOperationLifecycleState { + /** True once `cancel()` has succeeded — subsequent fetch* must throw. */ + isCancelled: boolean; + /** True once `close()` has been called (idempotent). */ + isClosed: boolean; +} + +/** + * Factory for a fresh lifecycle-state record. Helps keep test setup + * tidy. + */ +export function createLifecycleState(): SeaOperationLifecycleState { + return { isCancelled: false, isClosed: false }; +} + +/** + * Normalise an error thrown by the napi `Statement` into one of the + * driver's typed error classes. The binding surfaces kernel errors as + * a JSON envelope on `napi::Error.reason` with the sentinel prefix + * `__databricks_error__:` (see the napi-binding round 2 findings, + * section "JSON-envelope error reason"). If we can parse out a kernel + * payload, we route it through `mapKernelErrorToJsError`; otherwise + * the original error is rethrown unchanged. + */ +function rethrowKernelError(err: unknown): never { + if (err instanceof Error && typeof err.message === 'string') { + const sentinel = '__databricks_error__:'; + const idx = err.message.indexOf(sentinel); + if (idx >= 0) { + const json = err.message.slice(idx + sentinel.length); + let parsed: KernelErrorShape | undefined; + try { + parsed = JSON.parse(json) as KernelErrorShape; + } catch { + // Malformed envelope — fall through and rethrow the original + // below; we never silently drop a kernel error. + parsed = undefined; + } + if (parsed) { + throw mapKernelErrorToJsError(parsed); + } + } + } + throw err; +} + +/** + * Cancel an in-flight SEA operation. + * + * Mirrors `DBSQLOperation.cancel` semantics + * (`lib/DBSQLOperation.ts:241-259`): + * - idempotent: returns success if already cancelled or closed + * (no-ops are not bubbled to the kernel because the binding's + * `Statement::cancel` already treats already-finished statements as + * a no-op, but we still want to avoid a network round-trip here), + * - sets the cancelled flag _before_ awaiting the napi call so that a + * concurrent `fetchChunk()` observing the flag short-circuits as + * soon as the await yields (matches the Thrift flag-set ordering + * at `lib/DBSQLOperation.ts:254`), + * - returns a `Status.success()` on success (no rich Thrift status + * payload is available from the kernel side). + */ +export async function seaCancel( + state: SeaOperationLifecycleState, + statement: SeaStatementHandle, + context: IClientContext, + operationId: string, +): Promise { + if (state.isCancelled || state.isClosed) { + return Status.success(); + } + + context + .getLogger() + .log(LogLevel.debug, `Cancelling SEA operation with id: ${operationId}`); + + state.isCancelled = true; + + try { + await statement.cancel(); + } catch (err) { + rethrowKernelError(err); + } + + return Status.success(); +} + +/** + * Close a SEA operation. + * + * Mirrors `DBSQLOperation.close` semantics + * (`lib/DBSQLOperation.ts:265-284`) without the Thrift-only + * direct-results-prefetch optimisation: + * - idempotent: a second call is a no-op, + * - awaits the binding's `Statement::close` (which goes through to + * the kernel's `delete_statement` RPC), + * - sets the closed flag _before_ awaiting so a concurrent fetch + * sees the closed state as soon as the await yields. + */ +export async function seaClose( + state: SeaOperationLifecycleState, + statement: SeaStatementHandle, + context: IClientContext, + operationId: string, +): Promise { + if (state.isClosed) { + return Status.success(); + } + + context + .getLogger() + .log(LogLevel.debug, `Closing SEA operation with id: ${operationId}`); + + state.isClosed = true; + + try { + await statement.close(); + } catch (err) { + rethrowKernelError(err); + } + + return Status.success(); +} + +/** + * Synthesize a `TGetOperationStatusResp` shaped object reporting the + * "finished" state. The kernel doesn't surface a Thrift-shaped status + * struct, but `IOperation.finished({progress, callback})` is public + * surface and the callback signature expects this exact shape (see + * `lib/contracts/IOperation.ts:5` `OperationStatusCallback`). For M0 + * we report `FINISHED_STATE` with a success status. Richer fields + * (`numModifiedRows`, `progressUpdateResponse`, `displayMessage`) + * defer to M1 per the operation feature plan. + */ +function synthesizeFinishedStatus(): TGetOperationStatusResp { + return { + status: { + statusCode: TStatusCode.SUCCESS_STATUS, + }, + operationState: TOperationState.FINISHED_STATE, + } as TGetOperationStatusResp; +} + +/** + * `IOperation.finished({progress, callback})` M0 implementation. + * + * The Thrift implementation is a 100ms polling loop over + * `getOperationStatus` (`lib/DBSQLOperation.ts:337-391`). For SEA M0, + * the kernel's `Statement::execute().await` already blocks until the + * statement reaches a terminal state — by the time the JS layer has + * a `Statement` handle, the operation has already finished. + * + * Therefore the M0 implementation resolves immediately. If the + * caller supplied a progress callback we still invoke it once (a + * single completion tick) so progress-UI consumers see the same + * "operation is now finished" signal they'd get from the polling + * Thrift path — just without the intermediate `RUNNING_STATE` + * notifications. + * + * If the operation is already cancelled or closed, this is a no-op + * (matches the Thrift `failIfClosed` / cancelled-state semantics + * without throwing; throwing is the responsibility of subsequent + * fetch calls). + */ +export async function seaFinished( + state: SeaOperationLifecycleState, + options?: { + progress?: boolean; + callback?: (progress: TGetOperationStatusResp) => unknown; + }, +): Promise { + if (state.isCancelled || state.isClosed) { + return; + } + + if (options?.callback) { + const response = synthesizeFinishedStatus(); + // Await the callback in case it returns a promise — matches the + // Thrift code path at `lib/DBSQLOperation.ts:348-351`. + await Promise.resolve(options.callback(response)); + } +} + +/** + * Pre-flight check used by fetch* methods on `SeaOperationBackend`. + * If the operation has been cancelled or closed, throws the same + * `HiveDriverError`-shaped failure that `DBSQLOperation.failIfClosed` + * raises today (`lib/DBSQLOperation.ts:328-335`), via the kernel + * error mapping so the SQLSTATE / message conventions stay + * consistent. + * + * Exported so impl-results can call it at the top of every fetch + * call without duplicating the if/throw logic. + */ +export function failIfNotActive(state: SeaOperationLifecycleState): void { + if (state.isCancelled) { + throw mapKernelErrorToJsError({ + code: 'Cancelled', + message: 'The operation was cancelled.', + }); + } + if (state.isClosed) { + throw mapKernelErrorToJsError({ + code: 'InvalidStatementHandle', + message: 'The operation was closed.', + }); + } +} diff --git a/native/sea/src/connection.rs b/native/sea/src/connection.rs index 4afbd724..f5aa108f 100644 --- a/native/sea/src/connection.rs +++ b/native/sea/src/connection.rs @@ -101,7 +101,11 @@ impl Connection { } let executed = stmt.execute().await.map_err(napi_err_from_kernel)?; - Ok(Statement::from_executed(executed)) + // Pass the parent kernel `Statement` into the JS wrapper. + // Dropping it here would invalidate the executed handle + // via the shared ValidityFlag — see `StatementInner` docs + // in `statement.rs` for the rationale. + Ok(Statement::from_executed(stmt, executed)) }) .await } diff --git a/native/sea/src/statement.rs b/native/sea/src/statement.rs index 6d7b8761..b0bb0c6b 100644 --- a/native/sea/src/statement.rs +++ b/native/sea/src/statement.rs @@ -29,13 +29,41 @@ use std::sync::Arc; use tokio::sync::Mutex; use arrow_ipc::writer::StreamWriter; -use databricks_sql_kernel::{ExecutedStatement, ExecutedStatementHandle, ResultBatch}; +use databricks_sql_kernel::{ + ExecutedStatement, ExecutedStatementHandle, ResultBatch, + Statement as KernelStatement, +}; use crate::error::napi_err_from_kernel; use crate::result::{ArrowBatch, ArrowSchema}; use crate::runtime; use crate::util::guarded; +/// Inner state for the JS-visible `Statement` wrapper. +/// +/// **Why we also hold the parent kernel `Statement`:** the kernel's +/// `Statement::Drop` (`src/statement/mutable.rs:341-361`) invalidates +/// its produced `ExecutedStatement` via the shared `ValidityFlag`. If +/// the binding wrapper held only the `ExecutedStatement`, the parent +/// kernel `Statement` constructed in `Connection::execute_statement` +/// would drop at the end of that function, flipping the validity flag +/// and rendering every subsequent `cancel()` / `close()` call against +/// the executed handle a no-op that throws +/// `InvalidStatementHandle("executed-statement handle has been +/// invalidated by a re-execute on its parent Statement")`. +/// +/// Keeping the kernel `Statement` alive in the same `Arc>` +/// as the `ExecutedStatement` ties their lifetimes together — the +/// validity flag stays set for as long as JS holds the wrapper. +struct StatementInner { + /// The kernel's `Statement` (mutable). Kept alive solely to + /// prevent its `Drop` from invalidating `executed`. + _parent: KernelStatement, + /// The executed handle — drives `fetchNextBatch` / `cancel` / + /// `close`. + executed: ExecutedStatement, +} + /// Opaque executed-statement handle. /// /// `inner` is wrapped in `Arc>>` so: @@ -48,16 +76,21 @@ use crate::util::guarded; /// touching `&mut self` across an `await`. #[napi] pub struct Statement { - inner: Arc>>, + inner: Arc>>, } impl Statement { /// Crate-internal constructor — called from /// `Connection::execute_statement` once the kernel hands back the - /// `ExecutedStatement`. - pub(crate) fn from_executed(executed: ExecutedStatement) -> Self { + /// `ExecutedStatement`. The parent kernel `Statement` is passed in + /// alongside so its `Drop` doesn't fire while the JS side still + /// holds the executed handle (see `StatementInner` docs above). + pub(crate) fn from_executed(parent: KernelStatement, executed: ExecutedStatement) -> Self { Self { - inner: Arc::new(Mutex::new(Some(executed))), + inner: Arc::new(Mutex::new(Some(StatementInner { + _parent: parent, + executed, + }))), } } } @@ -73,9 +106,10 @@ impl Statement { let inner = Arc::clone(&self.inner); guarded(async move { let mut guard = inner.lock().await; - let executed = guard.as_mut().ok_or_else(|| { + let inner_state = guard.as_mut().ok_or_else(|| { napi::Error::new(napi::Status::InvalidArg, "statement already closed") })?; + let executed = &mut inner_state.executed; let stream = executed.result_stream_mut(); // Capture the schema before borrowing the next batch — we @@ -117,9 +151,10 @@ impl Statement { let inner = Arc::clone(&self.inner); guarded(async move { let guard = inner.lock().await; - let executed = guard.as_ref().ok_or_else(|| { + let inner_state = guard.as_ref().ok_or_else(|| { napi::Error::new(napi::Status::InvalidArg, "statement already closed") })?; + let executed = &inner_state.executed; let schema = executed.schema(); let bytes = encode_ipc_stream(&schema, None)?; Ok(ArrowSchema { @@ -135,10 +170,14 @@ impl Statement { let inner = Arc::clone(&self.inner); guarded(async move { let guard = inner.lock().await; - let executed = guard.as_ref().ok_or_else(|| { + let inner_state = guard.as_ref().ok_or_else(|| { napi::Error::new(napi::Status::InvalidArg, "statement already closed") })?; - executed.cancel().await.map_err(napi_err_from_kernel) + inner_state + .executed + .cancel() + .await + .map_err(napi_err_from_kernel) }) .await } @@ -149,14 +188,22 @@ impl Statement { pub async fn close(&self) -> napi::Result<()> { let inner = Arc::clone(&self.inner); guarded(async move { - // Take the handle out so `Drop` knows there's nothing left - // to clean up. - let executed = { + // Take the inner state out so `Drop` knows there's nothing + // left to clean up. The parent kernel `Statement`'s Drop + // runs here too — but only AFTER the executed handle's + // close has finished, so we still observe a clean + // server-side close result before the validity flag flips. + let inner_state = { let mut guard = inner.lock().await; guard.take() }; - if let Some(executed) = executed { - executed.close().await.map_err(napi_err_from_kernel)?; + if let Some(inner_state) = inner_state { + inner_state + .executed + .close() + .await + .map_err(napi_err_from_kernel)?; + // _parent drops here, after close has resolved. } Ok(()) }) @@ -171,12 +218,16 @@ impl Drop for Statement { }; let inner = Arc::clone(&self.inner); handle.spawn(async move { - // Drop the executed statement on the runtime. The kernel's + // Drop the inner state on the runtime. The kernel's // `ExecutedStatement::Drop` already spawns a fire-and-forget // `close_statement` against its own captured handle, so we // just need to ensure the value is dropped inside a tokio // context (the kernel's Drop reads `runtime_handle.clone()` - // and spawns; that handle is the same one we captured here). + // and spawns; that handle is the same one we captured + // here). The kernel `Statement` we kept alive in + // `_parent` drops at the same time, but it only owns + // local state (its Drop is non-async — see + // `src/statement/mutable.rs:341-361`). let _taken = { let mut guard = inner.lock().await; guard.take() diff --git a/tests/e2e/sea/operation-lifecycle-e2e.test.ts b/tests/e2e/sea/operation-lifecycle-e2e.test.ts new file mode 100644 index 00000000..0ebaa430 --- /dev/null +++ b/tests/e2e/sea/operation-lifecycle-e2e.test.ts @@ -0,0 +1,285 @@ +// Copyright (c) 2026 Databricks, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/** + * End-to-end tests for the SEA operation lifecycle (cancel / close / + * finished) wired through `SeaOperationBackend`. + * + * The impl-execution feature has not yet wired + * `DBSQLClient.connect({ useSEA: true })` to dispatch into + * `SeaBackend`, so this test drives the lifecycle by: + * 1. Calling the napi `openSession(...)` free function directly to + * get a kernel `Connection`. + * 2. Calling `connection.executeStatement(...)` to get a napi + * `Statement` handle. + * 3. Wrapping that handle in a `SeaOperationBackend` and exercising + * its `cancel()` / `close()` / `waitUntilReady()` methods. + * + * This mirrors how the eventual `SeaSessionBackend.executeStatement` + * call path will assemble the operation — we just inline the kernel + * call here since the session backend is being built in parallel. + * + * Path note: the original task spec referenced + * `tests/integration/sea/operation-lifecycle-e2e.test.ts`. The + * existing project structure uses `tests/e2e/**` (with its own + * `.mocharc.js`), so this file lives under `tests/e2e/sea/` to be + * picked up by `npm run e2e` automatically. + */ + +import { expect } from 'chai'; +import IClientContext from '../../../lib/contracts/IClientContext'; +import IDBSQLLogger, { LogLevel } from '../../../lib/contracts/IDBSQLLogger'; +import { getSeaNative } from '../../../lib/sea/SeaNativeLoader'; +import SeaOperationBackend from '../../../lib/sea/SeaOperationBackend'; +import OperationStateError, { + OperationStateErrorCode, +} from '../../../lib/errors/OperationStateError'; + +// Minimal binding type shapes (mirrors the napi `index.d.ts`). +interface NativeBinding { + openSession(opts: { + hostName: string; + httpPath: string; + token: string; + }): Promise; +} + +interface NativeConnection { + executeStatement( + sql: string, + options: { + initialCatalog?: string; + initialSchema?: string; + sessionConfig?: Record; + }, + ): Promise; + close(): Promise; +} + +interface NativeStatement { + fetchNextBatch(): Promise<{ ipcBytes: Buffer } | null>; + schema(): Promise<{ ipcBytes: Buffer }>; + cancel(): Promise; + close(): Promise; +} + +class NoopLogger implements IDBSQLLogger { + log(_level: LogLevel, _message: string): void { + // no-op for e2e runs + } +} + +function makeContext(): IClientContext { + const logger = new NoopLogger(); + const notUsed = () => { + throw new Error('IClientContext member not expected in lifecycle e2e'); + }; + return { + getConfig: notUsed, + getLogger: () => logger, + getConnectionProvider: notUsed, + getClient: notUsed, + getDriver: notUsed, + } as unknown as IClientContext; +} + +describe('SEA operation lifecycle — end-to-end', function suite() { + // Live-warehouse tests can take >2s through warm-up; bump the + // mocha default (2000ms) generously. The base `tests/e2e/.mocharc.js` + // already sets 300s but we keep this explicit so the file is robust + // when run via `npx mocha …` outside the e2e harness. + this.timeout(120_000); + + const hostName = + process.env.DATABRICKS_PECOTESTING_SERVER_HOSTNAME || process.env.E2E_HOST; + const httpPath = + process.env.DATABRICKS_PECOTESTING_HTTP_PATH || process.env.E2E_PATH; + const token = + process.env.DATABRICKS_PECOTESTING_TOKEN_PERSONAL || process.env.E2E_ACCESS_TOKEN; + + before(function gate() { + if (!hostName || !httpPath || !token) { + // eslint-disable-next-line no-invalid-this + this.skip(); + } + }); + + it('cancel() succeeds against a live SEA statement and is fast', async () => { + const binding = getSeaNative() as unknown as NativeBinding; + + const connection = await binding.openSession({ + hostName: hostName as string, + httpPath: httpPath as string, + token: token as string, + }); + + let statement: NativeStatement | null = null; + try { + // Use a query that is long-enough running that cancel actually + // has work to do. `range(0, 100_000_000)` is large enough that + // even with kernel-side optimizations the server has not yet + // produced the full result by the time we cancel. + statement = await connection.executeStatement( + 'SELECT * FROM range(0, 100000000)', + {}, + ); + expect(statement).to.be.an('object'); + + const op = new SeaOperationBackend({ + statement: statement as unknown as NativeStatement, + context: makeContext(), + }); + + const t0 = Date.now(); + const status = await op.cancel(); + const elapsed = Date.now() - t0; + + // Cancel must complete within 200ms. + expect(elapsed).to.be.lessThan(200, `cancel latency ${elapsed}ms exceeds 200ms budget`); + expect(status.isSuccess).to.equal(true); + } finally { + // Bypass `op.close()` here because we want to verify cancel + // alone — close is exercised in the next test. + if (statement !== null) { + try { + await statement.close(); + } catch (_) { + // Cancelled statements may surface a close error from the + // server; ignore for cleanup. + } + } + await connection.close(); + } + }); + + it('cancel mid-fetch — subsequent fetchChunk throws OperationStateError', async () => { + const binding = getSeaNative() as unknown as NativeBinding; + + const connection = await binding.openSession({ + hostName: hostName as string, + httpPath: httpPath as string, + token: token as string, + }); + + let statement: NativeStatement | null = null; + try { + statement = await connection.executeStatement( + 'SELECT * FROM range(0, 100000000)', + {}, + ); + + const op = new SeaOperationBackend({ + statement: statement as unknown as NativeStatement, + context: makeContext(), + }); + + const t0 = Date.now(); + await op.cancel(); + const elapsed = Date.now() - t0; + expect(elapsed).to.be.lessThan(200, `cancel latency ${elapsed}ms exceeds 200ms budget`); + + // After cancel, fetchChunk must throw the cancellation error + // (regardless of whether the underlying fetch implementation + // is wired — the lifecycle gate runs first). + let thrown: unknown; + try { + await op.fetchChunk({ limit: 100 }); + } catch (err) { + thrown = err; + } + expect(thrown).to.be.instanceOf(OperationStateError); + expect((thrown as OperationStateError).errorCode).to.equal( + OperationStateErrorCode.Canceled, + ); + } finally { + if (statement !== null) { + try { + await statement.close(); + } catch (_) { + // ignore cleanup error after cancel + } + } + await connection.close(); + } + }); + + it('close() succeeds against a SEA statement and is idempotent', async () => { + const binding = getSeaNative() as unknown as NativeBinding; + + const connection = await binding.openSession({ + hostName: hostName as string, + httpPath: httpPath as string, + token: token as string, + }); + + try { + const statement = await connection.executeStatement('SELECT 1', {}); + + const op = new SeaOperationBackend({ + statement: statement as unknown as NativeStatement, + context: makeContext(), + }); + + const status1 = await op.close(); + expect(status1.isSuccess).to.equal(true); + + // Idempotent — a second close is a no-op on the JS side and + // does not hit the binding (which would already have taken the + // inner handle). + const status2 = await op.close(); + expect(status2.isSuccess).to.equal(true); + } finally { + await connection.close(); + } + }); + + it('finished() resolves immediately and fires the progress callback', async () => { + const binding = getSeaNative() as unknown as NativeBinding; + + const connection = await binding.openSession({ + hostName: hostName as string, + httpPath: httpPath as string, + token: token as string, + }); + + let statement: NativeStatement | null = null; + try { + statement = await connection.executeStatement('SELECT 1', {}); + + const op = new SeaOperationBackend({ + statement: statement as unknown as NativeStatement, + context: makeContext(), + }); + + let ticks = 0; + const t0 = Date.now(); + await op.waitUntilReady({ + callback: () => { + ticks += 1; + }, + }); + const elapsed = Date.now() - t0; + + // M0 finished() is a no-op — must resolve in <50ms. + expect(elapsed).to.be.lessThan(50); + // Progress callback fires exactly once. + expect(ticks).to.equal(1); + } finally { + if (statement !== null) { + await statement.close(); + } + await connection.close(); + } + }); +}); diff --git a/tests/unit/sea/operation-lifecycle.test.ts b/tests/unit/sea/operation-lifecycle.test.ts new file mode 100644 index 00000000..86101687 --- /dev/null +++ b/tests/unit/sea/operation-lifecycle.test.ts @@ -0,0 +1,445 @@ +// Copyright (c) 2026 Databricks, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/** + * Unit tests for the SEA operation lifecycle (`cancel`, `close`, + * `finished`) — both via the `SeaOperationLifecycle` helpers and + * via `SeaOperationBackend` which composes them. + * + * We mock the napi binding's `Statement` handle so the test process + * doesn't touch any native code; the helpers and the backend are + * structurally typed against `SeaStatementHandle` exactly so this + * works. + */ + +import { expect } from 'chai'; +import sinon from 'sinon'; +import { + TOperationState, + TStatusCode, + TGetOperationStatusResp, +} from '../../../thrift/TCLIService_types'; +import IClientContext from '../../../lib/contracts/IClientContext'; +import IDBSQLLogger, { LogLevel } from '../../../lib/contracts/IDBSQLLogger'; +import { + SeaStatementHandle, + createLifecycleState, + seaCancel, + seaClose, + seaFinished, + failIfNotActive, +} from '../../../lib/sea/SeaOperationLifecycle'; +import SeaOperationBackend from '../../../lib/sea/SeaOperationBackend'; +import OperationStateError, { + OperationStateErrorCode, +} from '../../../lib/errors/OperationStateError'; +import HiveDriverError from '../../../lib/errors/HiveDriverError'; + +class TestLogger implements IDBSQLLogger { + public readonly entries: Array<{ level: LogLevel; message: string }> = []; + + log(level: LogLevel, message: string): void { + this.entries.push({ level, message }); + } +} + +function makeContext(): IClientContext { + const logger = new TestLogger(); + // Only `getLogger` is exercised by the lifecycle helpers; the rest + // of `IClientContext` is stubbed to throw so accidental coupling + // to it shows up loudly in tests. + const notUsed = () => { + throw new Error('IClientContext member not expected to be used by lifecycle'); + }; + return { + getConfig: notUsed, + getLogger: () => logger, + getConnectionProvider: notUsed, + getClient: notUsed, + getDriver: notUsed, + } as unknown as IClientContext; +} + +function makeStatement(overrides: Partial = {}): { + handle: SeaStatementHandle; + cancel: sinon.SinonStub; + close: sinon.SinonStub; +} { + const cancel = sinon.stub().resolves(); + const close = sinon.stub().resolves(); + return { + handle: { cancel, close, ...overrides }, + cancel, + close, + }; +} + +describe('SeaOperationLifecycle (helpers)', () => { + describe('seaCancel', () => { + it('calls statement.cancel() and resolves with a success Status', async () => { + const ctx = makeContext(); + const { handle, cancel } = makeStatement(); + const state = createLifecycleState(); + + const status = await seaCancel(state, handle, ctx, 'op-id-1'); + + expect(cancel.calledOnce).to.equal(true); + expect(status.isSuccess).to.equal(true); + expect(state.isCancelled).to.equal(true); + }); + + it('is idempotent — second call does not hit the binding', async () => { + const ctx = makeContext(); + const { handle, cancel } = makeStatement(); + const state = createLifecycleState(); + + await seaCancel(state, handle, ctx, 'op-id-2'); + await seaCancel(state, handle, ctx, 'op-id-2'); + + expect(cancel.calledOnce).to.equal(true); + }); + + it('short-circuits when the operation is already closed', async () => { + const ctx = makeContext(); + const { handle, cancel } = makeStatement(); + const state = createLifecycleState(); + state.isClosed = true; + + const status = await seaCancel(state, handle, ctx, 'op-id-3'); + + expect(cancel.called).to.equal(false); + expect(status.isSuccess).to.equal(true); + }); + + it('sets isCancelled BEFORE awaiting the binding (so concurrent fetch sees it)', async () => { + const ctx = makeContext(); + const state = createLifecycleState(); + + // Cancel returns a promise that resolves only when we say so. + let release: (() => void) | undefined; + const cancelPromise = new Promise((resolve) => { + release = resolve; + }); + const handle: SeaStatementHandle = { + cancel: () => cancelPromise, + close: async () => undefined, + }; + + const inflight = seaCancel(state, handle, ctx, 'op-id-4'); + + // Yield once so the synchronous prelude of seaCancel runs. + await Promise.resolve(); + expect(state.isCancelled).to.equal(true); + // Before the await resolves, failIfNotActive must already throw. + expect(() => failIfNotActive(state)).to.throw(); + + release!(); + const status = await inflight; + expect(status.isSuccess).to.equal(true); + }); + + it('propagates binding errors via the kernel error mapping', async () => { + const ctx = makeContext(); + const state = createLifecycleState(); + const handle: SeaStatementHandle = { + cancel: async () => { + // Simulate the binding's JSON-envelope error format. + const payload = JSON.stringify({ + code: 'InvalidStatementHandle', + message: 'statement already closed', + }); + throw new Error(`__databricks_error__:${payload}`); + }, + close: async () => undefined, + }; + + let thrown: unknown; + try { + await seaCancel(state, handle, ctx, 'op-err-1'); + } catch (err) { + thrown = err; + } + expect(thrown).to.be.instanceOf(HiveDriverError); + expect((thrown as Error).message).to.contain('statement already closed'); + }); + + it('logs a debug message tagged with the operation id', async () => { + const ctx = makeContext(); + const logger = ctx.getLogger() as TestLogger; + const { handle } = makeStatement(); + const state = createLifecycleState(); + + await seaCancel(state, handle, ctx, 'op-id-log'); + + expect( + logger.entries.some( + (e) => e.level === LogLevel.debug && e.message.includes('op-id-log'), + ), + ).to.equal(true); + }); + }); + + describe('seaClose', () => { + it('calls statement.close() and resolves with a success Status', async () => { + const ctx = makeContext(); + const { handle, close } = makeStatement(); + const state = createLifecycleState(); + + const status = await seaClose(state, handle, ctx, 'op-close-1'); + + expect(close.calledOnce).to.equal(true); + expect(status.isSuccess).to.equal(true); + expect(state.isClosed).to.equal(true); + }); + + it('is idempotent — second call does not hit the binding', async () => { + const ctx = makeContext(); + const { handle, close } = makeStatement(); + const state = createLifecycleState(); + + await seaClose(state, handle, ctx, 'op-close-2'); + await seaClose(state, handle, ctx, 'op-close-2'); + + expect(close.calledOnce).to.equal(true); + }); + + it('propagates binding errors via the kernel error mapping', async () => { + const ctx = makeContext(); + const state = createLifecycleState(); + const handle: SeaStatementHandle = { + cancel: async () => undefined, + close: async () => { + const payload = JSON.stringify({ + code: 'NetworkError', + message: 'connection reset by peer', + }); + throw new Error(`__databricks_error__:${payload}`); + }, + }; + + let thrown: unknown; + try { + await seaClose(state, handle, ctx, 'op-err-close'); + } catch (err) { + thrown = err; + } + expect(thrown).to.be.instanceOf(HiveDriverError); + expect((thrown as Error).message).to.contain('connection reset'); + }); + }); + + describe('seaFinished', () => { + it('resolves immediately when no callback is provided (M0 no-op)', async () => { + const state = createLifecycleState(); + const start = Date.now(); + await seaFinished(state); + // Should be near-instantaneous — no 100ms poll. + expect(Date.now() - start).to.be.lessThan(50); + }); + + it('invokes the progress callback exactly once with a FINISHED status', async () => { + const state = createLifecycleState(); + const callback = sinon.stub(); + + await seaFinished(state, { callback }); + + expect(callback.calledOnce).to.equal(true); + const arg = callback.firstCall.args[0] as TGetOperationStatusResp; + expect(arg.operationState).to.equal(TOperationState.FINISHED_STATE); + expect(arg.status?.statusCode).to.equal(TStatusCode.SUCCESS_STATUS); + }); + + it('awaits an async progress callback', async () => { + const state = createLifecycleState(); + let resolvedInsideCallback = false; + const callback = async () => { + await new Promise((r) => setTimeout(r, 10)); + resolvedInsideCallback = true; + }; + + await seaFinished(state, { callback }); + + expect(resolvedInsideCallback).to.equal(true); + }); + + it('is a no-op when the operation is already cancelled', async () => { + const state = createLifecycleState(); + state.isCancelled = true; + const callback = sinon.stub(); + + await seaFinished(state, { callback }); + + expect(callback.called).to.equal(false); + }); + }); + + describe('failIfNotActive', () => { + it('throws OperationStateError(Canceled) when cancelled', () => { + const state = createLifecycleState(); + state.isCancelled = true; + // The kernel-error mapping routes Cancelled → OperationStateError. + try { + failIfNotActive(state); + expect.fail('expected throw'); + } catch (err) { + expect(err).to.be.instanceOf(OperationStateError); + expect((err as OperationStateError).errorCode).to.equal( + OperationStateErrorCode.Canceled, + ); + } + }); + + it('throws HiveDriverError when closed', () => { + const state = createLifecycleState(); + state.isClosed = true; + try { + failIfNotActive(state); + expect.fail('expected throw'); + } catch (err) { + expect(err).to.be.instanceOf(HiveDriverError); + } + }); + + it('does nothing when active', () => { + const state = createLifecycleState(); + // Should not throw. + failIfNotActive(state); + }); + }); +}); + +describe('SeaOperationBackend (lifecycle integration)', () => { + it('cancel() forwards to statement.cancel()', async () => { + const ctx = makeContext(); + const { handle, cancel } = makeStatement(); + const op = new SeaOperationBackend({ statement: handle, context: ctx }); + + const status = await op.cancel(); + + expect(cancel.calledOnce).to.equal(true); + expect(status.isSuccess).to.equal(true); + }); + + it('close() forwards to statement.close()', async () => { + const ctx = makeContext(); + const { handle, close } = makeStatement(); + const op = new SeaOperationBackend({ statement: handle, context: ctx }); + + const status = await op.close(); + + expect(close.calledOnce).to.equal(true); + expect(status.isSuccess).to.equal(true); + }); + + it('finished() resolves immediately and fires the callback once', async () => { + const ctx = makeContext(); + const { handle } = makeStatement(); + const op = new SeaOperationBackend({ statement: handle, context: ctx }); + + const responses: TGetOperationStatusResp[] = []; + const start = Date.now(); + await op.waitUntilReady({ callback: (r) => responses.push(r) }); + + expect(Date.now() - start).to.be.lessThan(50); + expect(responses).to.have.length(1); + expect(responses[0].operationState).to.equal(TOperationState.FINISHED_STATE); + }); + + it('fetchChunk after cancel throws the cancellation error', async () => { + const ctx = makeContext(); + const { handle } = makeStatement(); + const op = new SeaOperationBackend({ statement: handle, context: ctx }); + + await op.cancel(); + + let thrown: unknown; + try { + await op.fetchChunk({ limit: 10 }); + } catch (err) { + thrown = err; + } + expect(thrown).to.be.instanceOf(OperationStateError); + expect((thrown as OperationStateError).errorCode).to.equal( + OperationStateErrorCode.Canceled, + ); + }); + + it('cancel() is idempotent across the backend surface', async () => { + const ctx = makeContext(); + const { handle, cancel } = makeStatement(); + const op = new SeaOperationBackend({ statement: handle, context: ctx }); + + await op.cancel(); + await op.cancel(); + await op.cancel(); + + expect(cancel.calledOnce).to.equal(true); + }); + + it('close() is idempotent across the backend surface', async () => { + const ctx = makeContext(); + const { handle, close } = makeStatement(); + const op = new SeaOperationBackend({ statement: handle, context: ctx }); + + await op.close(); + await op.close(); + + expect(close.calledOnce).to.equal(true); + }); + + it('status() reports FINISHED_STATE when active', async () => { + const ctx = makeContext(); + const { handle } = makeStatement(); + const op = new SeaOperationBackend({ statement: handle, context: ctx }); + + const status = await op.status(false); + expect(status.operationState).to.equal(TOperationState.FINISHED_STATE); + }); + + it('status() reports CANCELED_STATE after cancel', async () => { + const ctx = makeContext(); + const { handle } = makeStatement(); + const op = new SeaOperationBackend({ statement: handle, context: ctx }); + + await op.cancel(); + const status = await op.status(false); + expect(status.operationState).to.equal(TOperationState.CANCELED_STATE); + }); + + it('id getter is stable', () => { + const ctx = makeContext(); + const { handle } = makeStatement(); + const op = new SeaOperationBackend({ statement: handle, context: ctx, id: 'fixed-id' }); + + expect(op.id).to.equal('fixed-id'); + expect(op.id).to.equal('fixed-id'); + }); + + it('id getter defaults to a uuid when none is supplied', () => { + const ctx = makeContext(); + const { handle } = makeStatement(); + const op = new SeaOperationBackend({ statement: handle, context: ctx }); + + // RFC4122 v4 — 36 chars with hyphens at positions 8/13/18/23. + expect(op.id).to.match(/^[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[0-9a-f]{4}-[0-9a-f]{12}$/); + }); + + it('hasResultSet is true by default (kernel always streams)', () => { + const ctx = makeContext(); + const { handle } = makeStatement(); + const op = new SeaOperationBackend({ statement: handle, context: ctx }); + + expect(op.hasResultSet).to.equal(true); + }); +}); From 130b4833b9f83f356e9de301a3bb22b284b0d08b Mon Sep 17 00:00:00 2001 From: Madhavendra Rathore Date: Fri, 15 May 2026 01:47:07 +0000 Subject: [PATCH 08/11] sea-execution: executeStatement + openSession via SeaSessionBackend SeaSessionBackend wraps the napi Connection handle. executeStatement passes through to napi.executeStatement and returns an IOperationBackend (SeaOperationBackend in sea-results feature). Session config + initialCatalog/initialSchema flow to napi openSession. M0 stops at executeStatement; metadata methods + per-stmt overrides defer to M1. No new dependencies. Reuses existing ConnectionOptions / Session config shapes. Co-authored-by: Isaac --- lib/DBSQLClient.ts | 2 +- lib/sea/SeaBackend.ts | 143 +++++- lib/sea/SeaNativeLoader.ts | 59 ++- lib/sea/SeaOperationBackend.ts | 214 +++++++++ lib/sea/SeaSessionBackend.ts | 228 ++++++++++ tests/integration/sea/execution-e2e.test.ts | 122 ++++++ tests/unit/sea/execution.test.ts | 462 ++++++++++++++++++++ 7 files changed, 1212 insertions(+), 18 deletions(-) create mode 100644 lib/sea/SeaOperationBackend.ts create mode 100644 lib/sea/SeaSessionBackend.ts create mode 100644 tests/integration/sea/execution-e2e.test.ts create mode 100644 tests/unit/sea/execution.test.ts diff --git a/lib/DBSQLClient.ts b/lib/DBSQLClient.ts index 139d5f4e..25b438a6 100644 --- a/lib/DBSQLClient.ts +++ b/lib/DBSQLClient.ts @@ -238,7 +238,7 @@ export default class DBSQLClient extends EventEmitter implements IDBSQLClient, I this.connectionProvider = this.createConnectionProvider(options); this.backend = options.useSEA - ? new SeaBackend() + ? new SeaBackend({ context: this }) : new ThriftBackend({ context: this, onConnectionEvent: (event, payload) => this.forwardConnectionEvent(event, payload), diff --git a/lib/sea/SeaBackend.ts b/lib/sea/SeaBackend.ts index 5815dc05..dbd9f627 100644 --- a/lib/sea/SeaBackend.ts +++ b/lib/sea/SeaBackend.ts @@ -1,18 +1,149 @@ +// Copyright (c) 2026 Databricks, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + import IBackend from '../contracts/IBackend'; import ISessionBackend from '../contracts/ISessionBackend'; +import IClientContext from '../contracts/IClientContext'; +import { ConnectionOptions, OpenSessionRequest } from '../contracts/IDBSQLClient'; +import HiveDriverError from '../errors/HiveDriverError'; +import { + getSeaNative, + SeaNativeBinding, + SeaNativeConnection, +} from './SeaNativeLoader'; +import { mapKernelErrorToJsError, KernelErrorShape } from './SeaErrorMapping'; +import SeaSessionBackend from './SeaSessionBackend'; + +/** + * Sentinel string the napi binding uses on `Error.reason` JSON envelopes. + * Keep in sync with `native/sea/src/error.rs` (`SENTINEL`). + */ +const KERNEL_ERROR_SENTINEL = '__databricks_error__:'; + +function rethrowKernelError(err: unknown): never { + if (err && typeof err === 'object' && 'message' in err) { + const reason = (err as { reason?: unknown }).reason; + if (typeof reason === 'string' && reason.startsWith(KERNEL_ERROR_SENTINEL)) { + try { + const payload = JSON.parse(reason.slice(KERNEL_ERROR_SENTINEL.length)) as KernelErrorShape; + throw mapKernelErrorToJsError(payload); + } catch (parseErr) { + if (parseErr !== err) { + throw parseErr; + } + } + } + } + throw err; +} -const NOT_IMPLEMENTED = 'SEA backend not implemented yet — wired in sea-napi-binding feature'; +export interface SeaBackendOptions { + context: IClientContext; + /** + * Optional injection seam for unit tests. When provided, replaces the + * default `getSeaNative()` call so tests can swap in a mock napi + * binding without loading the `.node` artifact. + */ + nativeBinding?: SeaNativeBinding; +} +/** + * SEA-backed implementation of `IBackend`. + * + * **M0 dispatch model:** the napi binding's `openSession()` already + * builds a kernel `Session` from PAT + hostname + httpPath, so there is + * no "connect" round-trip before `openSession` — `connect()` only + * captures the `ConnectionOptions` and validates that PAT auth is in + * use. The actual session open happens inside `openSession()`. + * + * **Why we don't use IClientContext's connectionProvider here:** that + * provider is the Thrift HTTP transport. The kernel owns its own + * reqwest+rustls stack inside the native binding, so there is no + * NodeJS-level connection state to manage on the SEA path. The + * `IClientContext` is still useful for logger + config access. + */ export default class SeaBackend implements IBackend { - public async connect(): Promise { - throw new Error(NOT_IMPLEMENTED); + private readonly context: IClientContext; + + private readonly binding: SeaNativeBinding; + + private connectionOptions?: ConnectionOptions; + + constructor(options?: SeaBackendOptions) { + this.context = options?.context as IClientContext; + this.binding = options?.nativeBinding ?? getSeaNative(); + } + + public async connect(options: ConnectionOptions): Promise { + // M0 supports PAT only. OAuth flows defer to the M1 auth feature. + // We validate at connect-time so misconfigured callers fail fast + // rather than at openSession-time. + if (options.authType !== undefined && options.authType !== 'access-token') { + throw new HiveDriverError( + `SEA backend (M0) supports authType: 'access-token' only — received '${options.authType}'. ` + + `OAuth flows defer to M1.`, + ); + } + // The `token` field lives on the `access-token` arm of the + // ConnectionOptions discriminated union. Narrow via `authType` check. + const token = (options as { token?: string }).token; + if (!token) { + throw new HiveDriverError('SEA backend: token is required for access-token auth'); + } + this.connectionOptions = options; } - public async openSession(): Promise { - throw new Error(NOT_IMPLEMENTED); + public async openSession(request: OpenSessionRequest): Promise { + if (!this.connectionOptions) { + throw new HiveDriverError('SeaBackend: not connected. Call connect() first.'); + } + + const { host, path } = this.connectionOptions; + const token = (this.connectionOptions as { token?: string }).token as string; + + let nativeConnection: SeaNativeConnection; + try { + nativeConnection = await this.binding.openSession({ + hostName: host, + httpPath: path, + token, + }); + } catch (err) { + rethrowKernelError(err); + } + + // Merge `request.configuration` (the existing public field for Spark + // conf) with any backend-specific session config. The SEA wire + // protocol applies these per-statement, but we capture them at + // session-open time and forward with every executeStatement to + // preserve session-config semantics. + const sessionConfig = request.configuration ? { ...request.configuration } : undefined; + + return new SeaSessionBackend({ + connection: nativeConnection!, + context: this.context, + defaults: { + initialCatalog: request.initialCatalog, + initialSchema: request.initialSchema, + sessionConfig, + }, + }); } public async close(): Promise { - throw new Error(NOT_IMPLEMENTED); + // No backend-level resources to release — each `SeaSessionBackend` + // owns its own napi `Connection` lifecycle. + this.connectionOptions = undefined; } } diff --git a/lib/sea/SeaNativeLoader.ts b/lib/sea/SeaNativeLoader.ts index c66cdf33..3058a21e 100644 --- a/lib/sea/SeaNativeLoader.ts +++ b/lib/sea/SeaNativeLoader.ts @@ -35,27 +35,64 @@ // eslint-disable-next-line @typescript-eslint/no-var-requires, import/no-dynamic-require, global-require const native = require('../../native/sea/index.js'); +/** + * JS-visible per-execute options carried over the napi binding boundary. + * Mirrors the `ExecuteOptions` shape generated by napi-rs into + * `native/sea/index.d.ts`. Re-declared here so the JS adapter layer + * isn't tied to the binding-generated types. + */ +export interface SeaExecuteOptions { + initialCatalog?: string; + initialSchema?: string; + sessionConfig?: Record; +} + +/** + * Arrow IPC payload returned by `Statement.fetchNextBatch()`. Carries a + * complete Arrow IPC stream (schema header + 1 record-batch message). + */ +export interface SeaArrowBatch { + ipcBytes: Buffer; +} + +/** + * Arrow IPC payload returned by `Statement.schema()` (schema header only). + */ +export interface SeaArrowSchema { + ipcBytes: Buffer; +} + +/** + * Typed surface for the opaque napi `Statement` handle. Method signatures + * match `native/sea/index.d.ts` exactly so the JS-side wrappers can + * `await` them without `any` casts. + */ +export interface SeaNativeStatement { + fetchNextBatch(): Promise; + schema(): Promise; + cancel(): Promise; + close(): Promise; +} + +/** + * Typed surface for the opaque napi `Connection` handle. + */ +export interface SeaNativeConnection { + executeStatement(sql: string, options: SeaExecuteOptions): Promise; + close(): Promise; +} + /** * Public surface of the native binding exposed to the rest of the * NodeJS driver. Round 2 lands `openSession` + opaque `Connection` / * `Statement` classes (the binding-generated `.d.ts` is the source of * truth for their method signatures — see `native/sea/index.d.ts`). - * - * We deliberately keep this typed loosely (`unknown` for the class - * shapes) so the loader layer doesn't have to import the binding's - * generated types and the JS adapter layer can introduce its own - * higher-level wrappers without conflicting with the binding's TS - * declarations. */ export interface SeaNativeBinding { /** Returns the native crate version (smoke test for the binding's load path). */ version(): string; /** Open a session over PAT auth. Returns an opaque Connection. */ - openSession(opts: { - hostName: string; - httpPath: string; - token: string; - }): Promise; + openSession(opts: { hostName: string; httpPath: string; token: string }): Promise; /** Opaque Connection class — instance methods on the binding-generated d.ts. */ Connection: Function; /** Opaque Statement class — instance methods on the binding-generated d.ts. */ diff --git a/lib/sea/SeaOperationBackend.ts b/lib/sea/SeaOperationBackend.ts new file mode 100644 index 00000000..edae5c49 --- /dev/null +++ b/lib/sea/SeaOperationBackend.ts @@ -0,0 +1,214 @@ +// Copyright (c) 2026 Databricks, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import { v4 as uuidv4 } from 'uuid'; +import { TGetOperationStatusResp, TGetResultSetMetadataResp, TOperationState } from '../../thrift/TCLIService_types'; +import IOperationBackend from '../contracts/IOperationBackend'; +import IClientContext from '../contracts/IClientContext'; +import Status from '../dto/Status'; +import { SeaNativeStatement } from './SeaNativeLoader'; +import { mapKernelErrorToJsError, KernelErrorShape } from './SeaErrorMapping'; +import HiveDriverError from '../errors/HiveDriverError'; + +/** + * Constructor options for `SeaOperationBackend`. + * + * `statement` is the opaque napi `Statement` handle returned by + * `Connection.executeStatement(...)`. The kernel has already internalized + * async polling — by the time we hold a `Statement`, the SQL is at least + * accepted by the server. + * + * `id` is captured at construction so `IOperationBackend.id` can return a + * stable string without async work. The napi binding does not currently + * expose the server-side `statement_id`, so the M0 shim generates a + * synthetic UUIDv4. Once the binding surfaces the kernel statement id, + * this is the only line that needs to change. + */ +export interface SeaOperationBackendOptions { + statement: SeaNativeStatement; + context: IClientContext; + /** + * Optional override for `id`. When not provided a fresh UUIDv4 is used. + * Reserved for the sea-results / sea-integration features which may + * thread the kernel-side statement id through once the napi binding + * surfaces it. + */ + id?: string; +} + +/** + * Sentinel string the napi binding uses on `Error.reason` JSON envelopes. + * Keep in sync with `native/sea/src/error.rs` (`SENTINEL`). + */ +const KERNEL_ERROR_SENTINEL = '__databricks_error__:'; + +/** + * Inspect a thrown error from the napi binding. If it carries the + * sentinel-prefixed JSON envelope, parse and re-throw as the mapped JS + * driver error class; otherwise re-throw verbatim. + * + * Used by every method body that crosses the napi boundary so that + * kernel `ErrorCode` + SQLSTATE are preserved on the JS error surface. + */ +function rethrowKernelError(err: unknown): never { + if (err && typeof err === 'object' && 'message' in err) { + const reason = (err as { reason?: unknown }).reason; + if (typeof reason === 'string' && reason.startsWith(KERNEL_ERROR_SENTINEL)) { + try { + const payload = JSON.parse(reason.slice(KERNEL_ERROR_SENTINEL.length)) as KernelErrorShape; + throw mapKernelErrorToJsError(payload); + } catch (parseErr) { + // If JSON.parse failed, fall through to the raw error. The + // `parseErr` itself is the mapped error if we successfully threw above. + if (parseErr !== err) { + throw parseErr; + } + } + } + } + throw err; +} + +/** + * SEA-backed implementation of `IOperationBackend`. + * + * **M0 scope:** carries the napi `Statement` handle and supports + * `cancel()` + `close()` (both pass-through to the kernel). The + * row-fetch / status / result-metadata methods are owned by the + * `sea-results` feature — until that lands, calling them throws an + * explicit `M1`-deferred error so consumers fail loudly rather than + * silently. The `sea-integration` round will reconcile this shim with + * the real implementation from `sea-results`. + * + * **Why a thin shim now:** `sea-execution` (this feature) needs to + * return an `IOperationBackend` from `SeaSessionBackend.executeStatement` + * to keep the abstraction's type contract. Splitting the row-fetch + * implementation into `sea-results` lets the two features land + * independently in a stacked-PR workflow without one blocking the other. + */ +export default class SeaOperationBackend implements IOperationBackend { + private readonly statement: SeaNativeStatement; + + // Retained for symmetry with ThriftOperationBackend — logger access happens + // via `context.getLogger()`. The integration round will lean on this to + // emit per-operation lifecycle events. + // eslint-disable-next-line @typescript-eslint/no-unused-vars + private readonly context: IClientContext; + + private readonly _id: string; + + private closed = false; + + private cancelled = false; + + constructor({ statement, context, id }: SeaOperationBackendOptions) { + this.statement = statement; + this.context = context; + this._id = id ?? uuidv4(); + } + + public get id(): string { + return this._id; + } + + public get hasResultSet(): boolean { + // SEA's `Statement::execute` only returns a handle for successfully + // started statements; rows may be empty but the result-set channel is + // always available (the kernel's `ResultStream::next_batch` resolves + // to `None` when exhausted). M0 mirrors the JDBC SEA driver which + // treats every executed statement as result-set-bearing. + return true; + } + + /** + * Pull the next batch of rows. **Owned by sea-results.** Returning a + * deferred error here keeps the build green while the row-decoding + * pipeline (Arrow IPC → JS objects) lands separately. + */ + // eslint-disable-next-line @typescript-eslint/no-unused-vars + public async fetchChunk(_options: { limit: number; disableBuffering?: boolean }): Promise> { + throw new HiveDriverError( + 'SeaOperationBackend.fetchChunk: not implemented yet (lands in sea-results feature)', + ); + } + + public async hasMore(): Promise { + throw new HiveDriverError( + 'SeaOperationBackend.hasMore: not implemented yet (lands in sea-results feature)', + ); + } + + /** + * Wait until the operation reaches a terminal state. The kernel + * already internalises async polling inside `Statement::execute`, so + * by the time we hold a `Statement` handle the operation is at least + * RUNNING or FINISHED. M0 treats this as a no-op; the JDBC SEA driver + * does the same when the kernel has already absorbed the polling + * loop. The sea-results feature may override if status callbacks need + * to fire. + */ + // eslint-disable-next-line @typescript-eslint/no-unused-vars + public async waitUntilReady(_options?: { + progress?: boolean; + callback?: (progress: TGetOperationStatusResp) => unknown; + }): Promise { + // No-op — kernel has already polled to readiness internally. + } + + /** + * Single-shot status. M0 synthesises a "finished" response because the + * kernel surfaces only terminal-or-running statements through its + * public API. The sea-results feature will tighten this up with the + * real kernel `StatementStatus` mapping. + */ + // eslint-disable-next-line @typescript-eslint/no-unused-vars + public async status(_progress: boolean): Promise { + return { + status: { statusCode: 0 }, + operationState: TOperationState.FINISHED_STATE, + } as TGetOperationStatusResp; + } + + public async getResultMetadata(): Promise { + throw new HiveDriverError( + 'SeaOperationBackend.getResultMetadata: not implemented yet (lands in sea-results feature)', + ); + } + + public async cancel(): Promise { + if (this.cancelled || this.closed) { + return Status.success(); + } + try { + await this.statement.cancel(); + } catch (err) { + rethrowKernelError(err); + } + this.cancelled = true; + return Status.success(); + } + + public async close(): Promise { + if (this.closed) { + return Status.success(); + } + try { + await this.statement.close(); + } catch (err) { + rethrowKernelError(err); + } + this.closed = true; + return Status.success(); + } +} diff --git a/lib/sea/SeaSessionBackend.ts b/lib/sea/SeaSessionBackend.ts new file mode 100644 index 00000000..c475e040 --- /dev/null +++ b/lib/sea/SeaSessionBackend.ts @@ -0,0 +1,228 @@ +// Copyright (c) 2026 Databricks, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import { v4 as uuidv4 } from 'uuid'; +import ISessionBackend from '../contracts/ISessionBackend'; +import IOperationBackend from '../contracts/IOperationBackend'; +import IClientContext from '../contracts/IClientContext'; +import { + ExecuteStatementOptions, + TypeInfoRequest, + CatalogsRequest, + SchemasRequest, + TablesRequest, + TableTypesRequest, + ColumnsRequest, + FunctionsRequest, + PrimaryKeysRequest, + CrossReferenceRequest, +} from '../contracts/IDBSQLSession'; +import Status from '../dto/Status'; +import InfoValue from '../dto/InfoValue'; +import HiveDriverError from '../errors/HiveDriverError'; +import { SeaNativeConnection, SeaExecuteOptions } from './SeaNativeLoader'; +import { mapKernelErrorToJsError, KernelErrorShape } from './SeaErrorMapping'; +import SeaOperationBackend from './SeaOperationBackend'; + +const KERNEL_ERROR_SENTINEL = '__databricks_error__:'; + +function rethrowKernelError(err: unknown): never { + if (err && typeof err === 'object' && 'message' in err) { + const reason = (err as { reason?: unknown }).reason; + if (typeof reason === 'string' && reason.startsWith(KERNEL_ERROR_SENTINEL)) { + try { + const payload = JSON.parse(reason.slice(KERNEL_ERROR_SENTINEL.length)) as KernelErrorShape; + throw mapKernelErrorToJsError(payload); + } catch (parseErr) { + if (parseErr !== err) { + throw parseErr; + } + } + } + } + throw err; +} + +/** + * Per-session defaults that apply to every `executeStatement` issued + * through this backend. Captured at `SeaBackend.openSession()` time from + * the `OpenSessionRequest` — `initialCatalog` / `initialSchema` / + * `sessionConfig`. + * + * The napi binding routes these to the kernel's `statement_conf` map, + * which the SEA wire treats as session-scoped parameters. They are + * forwarded with every `executeStatement` call so the JDBC-style + * "session config" semantics are preserved even though SEA's wire + * protocol is statement-scoped. + */ +export interface SeaSessionDefaults { + initialCatalog?: string; + initialSchema?: string; + sessionConfig?: Record; +} + +export interface SeaSessionBackendOptions { + /** The opaque napi `Connection` handle returned by `openSession`. */ + connection: SeaNativeConnection; + context: IClientContext; + defaults?: SeaSessionDefaults; + /** Optional override for `id`. Defaults to a fresh UUIDv4. */ + id?: string; +} + +/** + * SEA-backed implementation of `ISessionBackend`. + * + * **M0 scope:** `executeStatement` + `close`. Metadata methods + * (`getCatalogs`, `getSchemas`, etc.) defer to M1 — they throw a clear + * `HiveDriverError` so consumers using SEA against metadata APIs get an + * actionable message instead of silently falling back. The Thrift + * backend continues to handle the metadata path by default (callers + * opt into SEA via `ConnectionOptions.useSEA`). + * + * **Session config flow:** the SEA wire protocol is statement-scoped, + * so "session config" semantics (Spark conf, `initialCatalog`, + * `initialSchema`) are emulated by forwarding the same defaults with + * every `executeStatement` call. Per-statement overrides on + * `ExecuteStatementOptions` are reserved for M1; M0 carries only the + * defaults captured at session-open time. + */ +export default class SeaSessionBackend implements ISessionBackend { + private readonly connection: SeaNativeConnection; + + // eslint-disable-next-line @typescript-eslint/no-unused-vars + private readonly context: IClientContext; + + private readonly defaults: SeaSessionDefaults; + + private readonly _id: string; + + private closed = false; + + constructor({ connection, context, defaults, id }: SeaSessionBackendOptions) { + this.connection = connection; + this.context = context; + this.defaults = defaults ?? {}; + this._id = id ?? uuidv4(); + } + + public get id(): string { + return this._id; + } + + public async getInfo(_infoType: number): Promise { + throw new HiveDriverError('SeaSessionBackend.getInfo: not implemented yet (deferred to M1)'); + } + + /** + * Execute a SQL statement through the napi binding. Merges the + * session-level defaults (`initialCatalog` / `initialSchema` / + * `sessionConfig`) with any per-call overrides — per-call overrides + * win when both are present. + * + * M0 intentionally ignores `queryTimeout`, `maxRows`, `useCloudFetch`, + * `useLZ4Compression`, `namedParameters`, `ordinalParameters`, + * `stagingAllowedLocalPath`, and `queryTags` — those defer to M1 per + * the execution plan. The Thrift backend remains the path for + * consumers that need any of those today. + */ + public async executeStatement(statement: string, options: ExecuteStatementOptions): Promise { + this.failIfClosed(); + + // M0 surfaces a clear error rather than silently dropping M1-only knobs. + // Tracking via the execution plan's M1 scope. + if (options.namedParameters !== undefined || options.ordinalParameters !== undefined) { + throw new HiveDriverError( + 'SEA executeStatement: query parameters are not supported in M0 (deferred to M1)', + ); + } + if (options.queryTimeout !== undefined) { + throw new HiveDriverError( + 'SEA executeStatement: queryTimeout is not supported in M0 (deferred to M1)', + ); + } + + const executeOptions: SeaExecuteOptions = { + initialCatalog: this.defaults.initialCatalog, + initialSchema: this.defaults.initialSchema, + sessionConfig: this.defaults.sessionConfig, + }; + + let nativeStatement; + try { + nativeStatement = await this.connection.executeStatement(statement, executeOptions); + } catch (err) { + rethrowKernelError(err); + } + return new SeaOperationBackend({ + statement: nativeStatement!, + context: this.context, + }); + } + + public async getTypeInfo(_request: TypeInfoRequest): Promise { + throw new HiveDriverError('SeaSessionBackend.getTypeInfo: not implemented yet (deferred to M1)'); + } + + public async getCatalogs(_request: CatalogsRequest): Promise { + throw new HiveDriverError('SeaSessionBackend.getCatalogs: not implemented yet (deferred to M1)'); + } + + public async getSchemas(_request: SchemasRequest): Promise { + throw new HiveDriverError('SeaSessionBackend.getSchemas: not implemented yet (deferred to M1)'); + } + + public async getTables(_request: TablesRequest): Promise { + throw new HiveDriverError('SeaSessionBackend.getTables: not implemented yet (deferred to M1)'); + } + + public async getTableTypes(_request: TableTypesRequest): Promise { + throw new HiveDriverError('SeaSessionBackend.getTableTypes: not implemented yet (deferred to M1)'); + } + + public async getColumns(_request: ColumnsRequest): Promise { + throw new HiveDriverError('SeaSessionBackend.getColumns: not implemented yet (deferred to M1)'); + } + + public async getFunctions(_request: FunctionsRequest): Promise { + throw new HiveDriverError('SeaSessionBackend.getFunctions: not implemented yet (deferred to M1)'); + } + + public async getPrimaryKeys(_request: PrimaryKeysRequest): Promise { + throw new HiveDriverError('SeaSessionBackend.getPrimaryKeys: not implemented yet (deferred to M1)'); + } + + public async getCrossReference(_request: CrossReferenceRequest): Promise { + throw new HiveDriverError('SeaSessionBackend.getCrossReference: not implemented yet (deferred to M1)'); + } + + public async close(): Promise { + if (this.closed) { + return Status.success(); + } + try { + await this.connection.close(); + } catch (err) { + rethrowKernelError(err); + } + this.closed = true; + return Status.success(); + } + + private failIfClosed(): void { + if (this.closed) { + throw new HiveDriverError('SeaSessionBackend: session is closed'); + } + } +} diff --git a/tests/integration/sea/execution-e2e.test.ts b/tests/integration/sea/execution-e2e.test.ts new file mode 100644 index 00000000..6092bdea --- /dev/null +++ b/tests/integration/sea/execution-e2e.test.ts @@ -0,0 +1,122 @@ +// Copyright (c) 2026 Databricks, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import { expect } from 'chai'; +import { DBSQLClient } from '../../../lib'; + +/** + * sea-execution end-to-end test. + * + * Walks the full `DBSQLClient` → `SeaBackend` → napi binding → kernel + * pipeline against a live warehouse over PAT: + * + * 1. `connect({ useSEA: true })` selects the SEA backend. + * 2. `openSession({ initialCatalog: 'main' })` opens a kernel session + * and threads `initialCatalog` through to the napi `ExecuteOptions`. + * 3. `executeStatement('SELECT 1')` returns an `IOperation` backed by + * `SeaOperationBackend` (wraps a napi `Statement`). + * 4. `operation.id` is observable (via `IOperation.id` on the public + * surface). + * 5. `operation.cancel()` and `operation.close()` succeed without + * throwing. + * 6. `session.close()` and `client.close()` succeed without throwing. + * + * **Test gating:** requires the same env vars as `tests/native/e2e-smoke`. + * If any is missing, the suite is skipped so dev machines without + * provisioned secrets don't flap. + * + * **Proxy-validation note (per execution plan §17.4):** M0 verifies + * "no thrift fallback" indirectly — by selecting `useSEA: true` and + * exercising the executeStatement path. A proxy that captures + * `executeStatement` + `GetStatement` wire counts lands in the + * sea-integration round; for now we assert that the SEA pipeline + * itself runs cleanly to completion. + */ +describe('SEA execution end-to-end', function e2eSuite() { + const hostName = process.env.DATABRICKS_PECOTESTING_SERVER_HOSTNAME; + const httpPath = process.env.DATABRICKS_PECOTESTING_HTTP_PATH; + const token = process.env.DATABRICKS_PECOTESTING_TOKEN_PERSONAL; + + // Live-warehouse round-trips can take a few seconds through warm-up. + this.timeout(60_000); + + before(function gate() { + if (!hostName || !httpPath || !token) { + // eslint-disable-next-line no-invalid-this + this.skip(); + } + }); + + it('opens a session, executes SELECT 1, and closes cleanly via SEA backend', async () => { + const client = new DBSQLClient(); + + await client.connect({ + host: hostName as string, + path: httpPath as string, + token: token as string, + useSEA: true, + }); + + const session = await client.openSession({ + initialCatalog: 'main', + }); + expect(session).to.be.an('object'); + expect(session.id).to.be.a('string').and.have.length.greaterThan(0); + + const operation = await session.executeStatement('SELECT 1', {}); + expect(operation).to.be.an('object'); + // `IOperation.id` is the public-API observable identity for the + // returned operation. SeaOperationBackend generates a UUIDv4 for + // M0 until the napi binding surfaces the server statement id. + expect(operation.id).to.be.a('string').and.have.length.greaterThan(0); + + // M0 does not yet plumb fetchChunk through the SEA pipeline + // (sea-results owns that). We exercise the lifecycle: cancel is a + // no-op against a finished statement, close releases the kernel + // handle. + await operation.close(); + + await session.close(); + await client.close(); + }); + + it('passes sessionConfig (Spark conf) through openSession.configuration', async () => { + const client = new DBSQLClient(); + + await client.connect({ + host: hostName as string, + path: httpPath as string, + token: token as string, + useSEA: true, + }); + + // Sanity-check that supplying session-level Spark conf does not + // break openSession. The SEA wire applies these as `parameters` on + // every executeStatement; we don't observe them in the response + // for M0, but the absence of an error proves the napi binding + // accepts and forwards the map. + const session = await client.openSession({ + initialCatalog: 'main', + configuration: { + 'spark.sql.session.timeZone': 'UTC', + }, + }); + + const operation = await session.executeStatement('SELECT 1', {}); + await operation.close(); + + await session.close(); + await client.close(); + }); +}); diff --git a/tests/unit/sea/execution.test.ts b/tests/unit/sea/execution.test.ts new file mode 100644 index 00000000..f4493472 --- /dev/null +++ b/tests/unit/sea/execution.test.ts @@ -0,0 +1,462 @@ +// Copyright (c) 2026 Databricks, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import { expect } from 'chai'; +import sinon from 'sinon'; +import SeaBackend from '../../../lib/sea/SeaBackend'; +import SeaSessionBackend from '../../../lib/sea/SeaSessionBackend'; +import SeaOperationBackend from '../../../lib/sea/SeaOperationBackend'; +import { + SeaNativeBinding, + SeaNativeConnection, + SeaNativeStatement, + SeaExecuteOptions, +} from '../../../lib/sea/SeaNativeLoader'; +import IClientContext, { ClientConfig } from '../../../lib/contracts/IClientContext'; +import IDBSQLLogger, { LogLevel } from '../../../lib/contracts/IDBSQLLogger'; +import HiveDriverError from '../../../lib/errors/HiveDriverError'; +import { ConnectionOptions } from '../../../lib/contracts/IDBSQLClient'; + +// ----------------------------------------------------------------------------- +// Fakes — minimal stand-ins for the napi-rs generated surface and the +// IClientContext side of the abstraction. Keeping them inline avoids +// pulling in test-only fixtures from outside the sea/ namespace. +// ----------------------------------------------------------------------------- + +class FakeNativeStatement implements SeaNativeStatement { + public closed = false; + + public cancelled = false; + + public async fetchNextBatch() { + return null; + } + + public async schema() { + return { ipcBytes: Buffer.alloc(0) }; + } + + public async cancel() { + this.cancelled = true; + } + + public async close() { + this.closed = true; + } +} + +class FakeNativeConnection implements SeaNativeConnection { + public closed = false; + + public lastSql?: string; + + public lastOptions?: SeaExecuteOptions; + + public throwOnExecute: Error | null = null; + + public statementToReturn: FakeNativeStatement = new FakeNativeStatement(); + + public async executeStatement(sql: string, options: SeaExecuteOptions): Promise { + if (this.throwOnExecute) { + throw this.throwOnExecute; + } + this.lastSql = sql; + this.lastOptions = options; + return this.statementToReturn; + } + + public async close(): Promise { + this.closed = true; + } +} + +function makeBinding(connection: SeaNativeConnection): SeaNativeBinding & { + openSessionStub: sinon.SinonStub; +} { + const openSessionStub = sinon.stub().resolves(connection); + const binding: SeaNativeBinding = { + version: () => 'test', + openSession: openSessionStub, + Connection: function Connection() {}, + Statement: function Statement() {}, + }; + return Object.assign(binding, { openSessionStub }); +} + +function makeContext(): IClientContext { + const logger: IDBSQLLogger = { + log(_level: LogLevel, _message: string): void { + // no-op + }, + }; + const config = {} as ClientConfig; + return { + getConfig: () => config, + getLogger: () => logger, + getConnectionProvider: async () => { + throw new Error('not used by SEA backend'); + }, + getClient: async () => { + throw new Error('not used by SEA backend'); + }, + getDriver: async () => { + throw new Error('not used by SEA backend'); + }, + }; +} + +// ----------------------------------------------------------------------------- +// Tests +// ----------------------------------------------------------------------------- + +describe('SeaBackend', () => { + it('connect() captures the connection options and validates PAT auth', async () => { + const connection = new FakeNativeConnection(); + const binding = makeBinding(connection); + const backend = new SeaBackend({ context: makeContext(), nativeBinding: binding }); + + await backend.connect({ + host: 'example.databricks.com', + path: '/sql/1.0/warehouses/abc', + token: 'dapi-token', + } as ConnectionOptions); + + // openSession should not have been called by connect() + expect(binding.openSessionStub.called).to.equal(false); + }); + + it('connect() rejects non-PAT auth (M0 PAT-only)', async () => { + const connection = new FakeNativeConnection(); + const binding = makeBinding(connection); + const backend = new SeaBackend({ context: makeContext(), nativeBinding: binding }); + + let thrown: unknown; + try { + await backend.connect({ + host: 'example.databricks.com', + path: '/sql/1.0/warehouses/abc', + authType: 'databricks-oauth', + } as ConnectionOptions); + } catch (err) { + thrown = err; + } + expect(thrown).to.be.instanceOf(HiveDriverError); + expect((thrown as Error).message).to.match(/access-token/); + }); + + it('connect() rejects missing token', async () => { + const connection = new FakeNativeConnection(); + const binding = makeBinding(connection); + const backend = new SeaBackend({ context: makeContext(), nativeBinding: binding }); + + let thrown: unknown; + try { + await backend.connect({ + host: 'example.databricks.com', + path: '/sql/1.0/warehouses/abc', + token: '', + } as ConnectionOptions); + } catch (err) { + thrown = err; + } + expect(thrown).to.be.instanceOf(HiveDriverError); + expect((thrown as Error).message).to.match(/token is required/); + }); + + it('openSession() throws if connect() was not called', async () => { + const connection = new FakeNativeConnection(); + const binding = makeBinding(connection); + const backend = new SeaBackend({ context: makeContext(), nativeBinding: binding }); + + let thrown: unknown; + try { + await backend.openSession({}); + } catch (err) { + thrown = err; + } + expect(thrown).to.be.instanceOf(HiveDriverError); + expect((thrown as Error).message).to.match(/not connected/); + }); + + it('openSession() forwards hostName / httpPath / token to napi binding', async () => { + const connection = new FakeNativeConnection(); + const binding = makeBinding(connection); + const backend = new SeaBackend({ context: makeContext(), nativeBinding: binding }); + + await backend.connect({ + host: 'workspace.example', + path: '/sql/1.0/warehouses/xyz', + token: 'dapi-token', + } as ConnectionOptions); + + await backend.openSession({}); + + expect(binding.openSessionStub.calledOnce).to.equal(true); + const args = binding.openSessionStub.firstCall.args[0]; + expect(args).to.deep.equal({ + hostName: 'workspace.example', + httpPath: '/sql/1.0/warehouses/xyz', + token: 'dapi-token', + }); + }); + + it('openSession() returns a SeaSessionBackend wrapping the napi Connection', async () => { + const connection = new FakeNativeConnection(); + const binding = makeBinding(connection); + const backend = new SeaBackend({ context: makeContext(), nativeBinding: binding }); + + await backend.connect({ + host: 'h', + path: '/p', + token: 't', + } as ConnectionOptions); + + const sessionBackend = await backend.openSession({}); + expect(sessionBackend).to.be.instanceOf(SeaSessionBackend); + expect(sessionBackend.id).to.be.a('string').and.have.length.greaterThan(0); + }); + + it('openSession() propagates initialCatalog / initialSchema / sessionConfig through to executeStatement', async () => { + const connection = new FakeNativeConnection(); + const binding = makeBinding(connection); + const backend = new SeaBackend({ context: makeContext(), nativeBinding: binding }); + + await backend.connect({ + host: 'h', + path: '/p', + token: 't', + } as ConnectionOptions); + + const session = await backend.openSession({ + initialCatalog: 'main', + initialSchema: 'default', + configuration: { 'spark.sql.execution.arrow.enabled': 'true' }, + }); + + await session.executeStatement('SELECT 1', {}); + + expect(connection.lastSql).to.equal('SELECT 1'); + expect(connection.lastOptions).to.deep.equal({ + initialCatalog: 'main', + initialSchema: 'default', + sessionConfig: { 'spark.sql.execution.arrow.enabled': 'true' }, + }); + }); + + it('close() clears connection state without throwing', async () => { + const connection = new FakeNativeConnection(); + const binding = makeBinding(connection); + const backend = new SeaBackend({ context: makeContext(), nativeBinding: binding }); + await backend.connect({ host: 'h', path: '/p', token: 't' } as ConnectionOptions); + await backend.close(); + + let thrown: unknown; + try { + await backend.openSession({}); + } catch (err) { + thrown = err; + } + expect(thrown).to.be.instanceOf(HiveDriverError); + }); +}); + +describe('SeaSessionBackend', () => { + function makeSession(connection: SeaNativeConnection, defaults = {}) { + return new SeaSessionBackend({ connection, context: makeContext(), defaults }); + } + + it('executeStatement passes sql through verbatim', async () => { + const connection = new FakeNativeConnection(); + const session = makeSession(connection); + await session.executeStatement('SELECT * FROM foo', {}); + expect(connection.lastSql).to.equal('SELECT * FROM foo'); + }); + + it('executeStatement returns a SeaOperationBackend with an id', async () => { + const connection = new FakeNativeConnection(); + const session = makeSession(connection); + const op = await session.executeStatement('SELECT 1', {}); + expect(op).to.be.instanceOf(SeaOperationBackend); + expect(op.id).to.be.a('string').and.have.length.greaterThan(0); + }); + + it('executeStatement merges session defaults into ExecuteOptions', async () => { + const connection = new FakeNativeConnection(); + const session = makeSession(connection, { + initialCatalog: 'main', + initialSchema: 'default', + sessionConfig: { foo: 'bar' }, + }); + await session.executeStatement('SELECT 1', {}); + expect(connection.lastOptions).to.deep.equal({ + initialCatalog: 'main', + initialSchema: 'default', + sessionConfig: { foo: 'bar' }, + }); + }); + + it('executeStatement rejects namedParameters (M1)', async () => { + const connection = new FakeNativeConnection(); + const session = makeSession(connection); + let thrown: unknown; + try { + await session.executeStatement('SELECT :x', { namedParameters: { x: 1 } }); + } catch (err) { + thrown = err; + } + expect(thrown).to.be.instanceOf(HiveDriverError); + expect((thrown as Error).message).to.match(/parameters/); + }); + + it('executeStatement rejects ordinalParameters (M1)', async () => { + const connection = new FakeNativeConnection(); + const session = makeSession(connection); + let thrown: unknown; + try { + await session.executeStatement('SELECT ?', { ordinalParameters: [1] }); + } catch (err) { + thrown = err; + } + expect(thrown).to.be.instanceOf(HiveDriverError); + }); + + it('executeStatement rejects queryTimeout (M1)', async () => { + const connection = new FakeNativeConnection(); + const session = makeSession(connection); + let thrown: unknown; + try { + await session.executeStatement('SELECT 1', { queryTimeout: 30 }); + } catch (err) { + thrown = err; + } + expect(thrown).to.be.instanceOf(HiveDriverError); + expect((thrown as Error).message).to.match(/queryTimeout/); + }); + + it('metadata methods throw deferred-M1 errors', async () => { + const connection = new FakeNativeConnection(); + const session = makeSession(connection); + for (const method of [ + 'getInfo', + 'getTypeInfo', + 'getCatalogs', + 'getSchemas', + 'getTables', + 'getTableTypes', + 'getColumns', + 'getFunctions', + 'getPrimaryKeys', + 'getCrossReference', + ] as const) { + let thrown: unknown; + try { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + await (session as any)[method]({}); + } catch (err) { + thrown = err; + } + expect(thrown, `expected ${method} to throw`).to.be.instanceOf(HiveDriverError); + expect((thrown as Error).message).to.match(/M1|not implemented/); + } + }); + + it('close() forwards to the native connection', async () => { + const connection = new FakeNativeConnection(); + const session = makeSession(connection); + const status = await session.close(); + expect(connection.closed).to.equal(true); + expect(status.isSuccess).to.equal(true); + }); + + it('close() is idempotent', async () => { + const connection = new FakeNativeConnection(); + const session = makeSession(connection); + await session.close(); + // Second call should not re-invoke connection.close + connection.closed = false; + const status = await session.close(); + expect(connection.closed).to.equal(false); + expect(status.isSuccess).to.equal(true); + }); + + it('executeStatement fails after close()', async () => { + const connection = new FakeNativeConnection(); + const session = makeSession(connection); + await session.close(); + let thrown: unknown; + try { + await session.executeStatement('SELECT 1', {}); + } catch (err) { + thrown = err; + } + expect(thrown).to.be.instanceOf(HiveDriverError); + }); +}); + +describe('SeaOperationBackend', () => { + function makeOperation(statement: SeaNativeStatement = new FakeNativeStatement()) { + return new SeaOperationBackend({ statement, context: makeContext() }); + } + + it('id is a stable string', () => { + const op = makeOperation(); + expect(op.id).to.equal(op.id); + expect(op.id).to.be.a('string').and.have.length.greaterThan(0); + }); + + it('hasResultSet is true for M0', () => { + const op = makeOperation(); + expect(op.hasResultSet).to.equal(true); + }); + + it('cancel() forwards to napi Statement', async () => { + const stmt = new FakeNativeStatement(); + const op = makeOperation(stmt); + await op.cancel(); + expect(stmt.cancelled).to.equal(true); + }); + + it('cancel() is idempotent', async () => { + const stmt = new FakeNativeStatement(); + const op = makeOperation(stmt); + await op.cancel(); + stmt.cancelled = false; + await op.cancel(); + expect(stmt.cancelled).to.equal(false); + }); + + it('close() forwards to napi Statement', async () => { + const stmt = new FakeNativeStatement(); + const op = makeOperation(stmt); + await op.close(); + expect(stmt.closed).to.equal(true); + }); + + it('waitUntilReady() is a no-op (kernel internalises polling)', async () => { + const op = makeOperation(); + await op.waitUntilReady(); + }); + + it('fetchChunk() throws M1-deferred error (owned by sea-results)', async () => { + const op = makeOperation(); + let thrown: unknown; + try { + await op.fetchChunk({ limit: 100 }); + } catch (err) { + thrown = err; + } + expect(thrown).to.be.instanceOf(HiveDriverError); + expect((thrown as Error).message).to.match(/sea-results/); + }); +}); From 5559c7ced5c818abef2ef3ed3cfab9998fd94dd2 Mon Sep 17 00:00:00 2001 From: Madhavendra Rathore Date: Fri, 15 May 2026 02:05:53 +0000 Subject: [PATCH 09/11] =?UTF-8?q?integration:=20post-merge=20fix=20?= =?UTF-8?q?=E2=80=94=20update=20execution.test.ts=20assertions?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two assertions in tests/unit/sea/execution.test.ts were specific to the pre-merge SeaBackend / SeaOperationBackend stubs: 1. connect() missing-token rejection now flows through SeaAuth.buildSeaConnectionOptions which throws AuthenticationError (still a HiveDriverError subclass) with message "non-empty PAT". Updated the regex match accordingly. 2. fetchChunk() is no longer a stub — the merged SeaOperationBackend uses the sea-results pipeline (SeaResultsProvider + ArrowResultConverter + ResultSlicer). The "throws M1-deferred error owned by sea-results" test is now incorrect by design; removed it with a pointer comment to the real coverage in SeaOperationBackend.test.ts and results-e2e.test.ts. 891/891 unit tests passing post-merge. --- tests/unit/sea/execution.test.ts | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/tests/unit/sea/execution.test.ts b/tests/unit/sea/execution.test.ts index f4493472..88d7da23 100644 --- a/tests/unit/sea/execution.test.ts +++ b/tests/unit/sea/execution.test.ts @@ -171,7 +171,10 @@ describe('SeaBackend', () => { thrown = err; } expect(thrown).to.be.instanceOf(HiveDriverError); - expect((thrown as Error).message).to.match(/token is required/); + // After sea-integration merge, missing-token validation goes through + // SeaAuth.buildSeaConnectionOptions which throws AuthenticationError + // (extends HiveDriverError) with the "non-empty PAT" message. + expect((thrown as Error).message).to.match(/non-empty PAT/); }); it('openSession() throws if connect() was not called', async () => { @@ -448,15 +451,9 @@ describe('SeaOperationBackend', () => { await op.waitUntilReady(); }); - it('fetchChunk() throws M1-deferred error (owned by sea-results)', async () => { - const op = makeOperation(); - let thrown: unknown; - try { - await op.fetchChunk({ limit: 100 }); - } catch (err) { - thrown = err; - } - expect(thrown).to.be.instanceOf(HiveDriverError); - expect((thrown as Error).message).to.match(/sea-results/); - }); + // Note: after sea-integration merge, fetchChunk is no longer a stub — + // the sea-results SeaResultsProvider + ArrowResultConverter pipeline + // implements the real fetch path. Full coverage lives in + // tests/unit/sea/SeaOperationBackend.test.ts and the parity-gate e2e + // at tests/integration/sea/results-e2e.test.ts. }); From 4d8026715a1164de771e88e18a609452bd13cb34 Mon Sep 17 00:00:00 2001 From: Madhavendra Rathore Date: Fri, 15 May 2026 02:44:50 +0000 Subject: [PATCH 10/11] sea-integration: INTERVAL YEAR-MONTH + DAY-TIME parity with thrift - YEAR-MONTH: convert Arrow Interval[YearMonth] to thrift "N-M" string format (with leading "-" for negatives) in Phase 1 of converter - DAY-TIME: pre-process IPC schema bytes before apache-arrow@13 decode (which predates the Arrow Duration type id 18) to remap Duration -> Int64 with original time unit preserved in `databricks.arrow.duration_unit` field metadata; convert Int64 duration values to thrift "D HH:mm:ss.fffffffff" string format Both interval flavours are formatted by the same converter helper (formatDayTimeFromTotal); the duration_unit metadata gates between the native Arrow Interval Int32Array path and the rewritten Duration Int64 path. No apache-arrow bump, no node_modules edits, no kernel-side change. New: lib/sea/SeaArrowIpcDurationFix.ts (FlatBuffer rewriter using apache-arrow's internal fb/* accessors). M0 datatype parity now 25/25. --- lib/result/ArrowResultConverter.ts | 210 ++++++- lib/sea/SeaArrowIpc.ts | 41 +- lib/sea/SeaArrowIpcDurationFix.ts | 663 +++++++++++++++++++++++ lib/sea/SeaResultsProvider.ts | 10 +- tests/unit/sea/SeaIntervalParity.test.ts | 365 +++++++++++++ 5 files changed, 1274 insertions(+), 15 deletions(-) create mode 100644 lib/sea/SeaArrowIpcDurationFix.ts create mode 100644 tests/unit/sea/SeaIntervalParity.test.ts diff --git a/lib/result/ArrowResultConverter.ts b/lib/result/ArrowResultConverter.ts index 57fa02af..7a3c190c 100644 --- a/lib/result/ArrowResultConverter.ts +++ b/lib/result/ArrowResultConverter.ts @@ -23,6 +23,143 @@ const { isArrowBigNumSymbol, bigNumToBigInt } = arrowUtils; type ArrowSchema = Schema; type ArrowSchemaField = Field>; +/** + * Metadata key carrying the original Arrow `Duration` time unit on + * fields that were rewritten to `Int64` by the SEA IPC pre-processor + * (`lib/sea/SeaArrowIpcDurationFix.ts`). We re-declare the constant + * here (rather than importing it) so the converter has no compile-time + * dependency on the SEA module — it's reused unchanged by the + * thrift-path which has no SEA awareness. + */ +const DURATION_UNIT_METADATA_KEY = 'databricks.arrow.duration_unit'; + +/** + * Format an Arrow `Interval[YearMonth]` or `Interval[DayTime]` value + * into the canonical thrift string the JDBC/ODBC server emits: + * YEAR-MONTH → `"Y-M"` (e.g. 1 year 2 months → `"1-2"`) + * DAY-TIME → `"D HH:mm:ss.fffffffff"` + * (e.g. 1 day 02:03:04 → `"1 02:03:04.000000000"`) + * + * Arrow surfaces these as `Int32Array(2)` via the `GetVisitor` + * (`apache-arrow/visitor/get.js:177-185`): + * YEAR-MONTH: `[years, months]` (years/months derived from a single + * int32 holding total months) + * DAY-TIME: `[days, milliseconds]` (legacy two-int32 form) + * + * Negative intervals: the FULL interval is emitted with a leading `-` + * (Spark convention), and individual fields are unsigned. We mirror + * Spark's display. + */ +function formatArrowInterval(value: any, valueType: any): string { + // `value` is an Int32Array of length 2. + const a = Number(value[0]); + const b = Number(value[1]); + // unit 0 = YEAR_MONTH, unit 1 = DAY_TIME, unit 2 = MONTH_DAY_NANO + const unit = valueType?.unit; + if (unit === 0) { + return formatYearMonth(a, b); + } + // DAY_TIME: a = days, b = milliseconds (within the day, can be ≥0 or <0) + // We re-normalise: total milliseconds = a * 86_400_000 + b, then split into + // days, hours, minutes, seconds, nanoseconds (nanoseconds is always 0 + // because the legacy IntervalDayTime carries only millisecond precision). + const totalMs = BigInt(a) * BigInt(86_400_000) + BigInt(b); + return formatDayTimeFromTotal(totalMs * BigInt(1_000_000) /* → ns */, 'NANOSECOND'); +} + +/** + * Format the (years, months) decomposition into `"Y-M"` (or `"-Y-M"` + * for negative intervals). Arrow's `getIntervalYearMonth` (in + * `apache-arrow/visitor/get.js:179`) decomposes a signed total-months + * int32 via integer truncation, so years and months always share the + * same sign. We render the absolute values with a single leading `-` + * to match the Spark display format used on the thrift path. + */ +function formatYearMonth(years: number, months: number): string { + const total = years * 12 + months; + if (total < 0) { + const abs = -total; + const y = Math.trunc(abs / 12); + const m = abs % 12; + return `-${y}-${m}`; + } + return `${years}-${months}`; +} + +/** + * Format an Arrow `Duration` value (rewritten by the SEA IPC + * pre-processor to `Int64`) into the thrift INTERVAL DAY-TIME string. + * + * @param value the duration value as `bigint` (signed nanos/micros/ + * millis/seconds depending on `unit`) + * @param unit one of `SECOND` / `MILLISECOND` / `MICROSECOND` / + * `NANOSECOND` (the original Arrow time unit, captured + * by `SeaArrowIpcDurationFix.ts`) + */ +function formatDurationToIntervalDayTime(value: bigint | number, unit: string): string { + const bi = typeof value === 'bigint' ? value : BigInt(value); + const nanos = toNanoseconds(bi, unit); + return formatDayTimeFromTotal(nanos, unit); +} + +/** + * Scale a duration value to nanoseconds based on its unit. + * + * SECOND → ×1_000_000_000 + * MILLISECOND → × 1_000_000 + * MICROSECOND → × 1_000 + * NANOSECOND → × 1 + */ +function toNanoseconds(value: bigint, unit: string): bigint { + switch (unit) { + case 'SECOND': + return value * BigInt(1_000_000_000); + case 'MILLISECOND': + return value * BigInt(1_000_000); + case 'MICROSECOND': + return value * BigInt(1_000); + case 'NANOSECOND': + default: + return value; + } +} + +/** + * Format a signed total-nanoseconds value as `"D HH:mm:ss.fffffffff"`. + * Always emits 9 fractional digits to match the thrift driver's wire + * format (`"1 02:03:04.000000000"` — 9 digits regardless of the + * server-side storage precision). Negative values get a single + * leading `-`. + * + * The `unit` parameter is currently unused for formatting (the value + * is already in nanoseconds by the time we get here) but is retained + * for future use if a unit-aware precision is ever needed. + */ +function formatDayTimeFromTotal(totalNanos: bigint, _unit: string): string { + const ZERO = BigInt(0); + const sign = totalNanos < ZERO ? '-' : ''; + const abs = totalNanos < ZERO ? -totalNanos : totalNanos; + + const NS_PER_SEC = BigInt(1_000_000_000); + const NS_PER_MIN = NS_PER_SEC * BigInt(60); + const NS_PER_HOUR = NS_PER_MIN * BigInt(60); + const NS_PER_DAY = NS_PER_HOUR * BigInt(24); + + const days = abs / NS_PER_DAY; + let rem = abs % NS_PER_DAY; + const hours = rem / NS_PER_HOUR; + rem %= NS_PER_HOUR; + const minutes = rem / NS_PER_MIN; + rem %= NS_PER_MIN; + const seconds = rem / NS_PER_SEC; + const subSeconds = rem % NS_PER_SEC; + + const pad2 = (n: bigint): string => n.toString().padStart(2, '0'); + const fraction = `.${subSeconds.toString().padStart(9, '0')}`; + + return `${sign}${days.toString()} ${pad2(hours)}:${pad2(minutes)}:${pad2(seconds)}${fraction}`; +} + export default class ArrowResultConverter implements IResultsProvider> { private readonly context: IClientContext; @@ -142,37 +279,52 @@ export default class ArrowResultConverter implements IResultsProvider private getRows(schema: ArrowSchema, rows: Array): Array { return rows.map((row) => { // First, convert native Arrow values to corresponding plain JS objects - const record = this.convertArrowTypes(row, undefined, schema.fields); + const record = this.convertArrowTypes(row, undefined, schema.fields, undefined); // Second, cast all the values to original Thrift types return this.convertThriftTypes(record); }); } - private convertArrowTypes(value: any, valueType: DataType | undefined, fields: Array = []): any { + private convertArrowTypes( + value: any, + valueType: DataType | undefined, + fields: Array = [], + field?: ArrowSchemaField, + ): any { if (value === null) { return value; } const fieldsMap: Record = {}; - for (const field of fields) { - fieldsMap[field.name] = field; + for (const f of fields) { + fieldsMap[f.name] = f; } // Convert structures to plain JS object and process all its fields recursively if (value instanceof StructRow) { const result = value.toJSON(); for (const key of Object.keys(result)) { - const field: ArrowSchemaField | undefined = fieldsMap[key]; - result[key] = this.convertArrowTypes(result[key], field?.type, field?.type.children || []); + const childField: ArrowSchemaField | undefined = fieldsMap[key]; + result[key] = this.convertArrowTypes( + result[key], + childField?.type, + childField?.type.children || [], + childField, + ); } return result; } if (value instanceof MapRow) { const result = value.toJSON(); // Map type consists of its key and value types. We need only value type here, key will be cast to string anyway - const field = fieldsMap.entries?.type.children.find((item) => item.name === 'value'); + const valueField = fieldsMap.entries?.type.children.find((item) => item.name === 'value'); for (const key of Object.keys(result)) { - result[key] = this.convertArrowTypes(result[key], field?.type, field?.type.children || []); + result[key] = this.convertArrowTypes( + result[key], + valueField?.type, + valueField?.type.children || [], + valueField, + ); } return result; } @@ -181,14 +333,28 @@ export default class ArrowResultConverter implements IResultsProvider if (value instanceof Vector) { const result = value.toJSON(); // Array type contains the only child which defines a type of each array's element - const field = fieldsMap.element; - return result.map((item) => this.convertArrowTypes(item, field?.type, field?.type.children || [])); + const elementField = fieldsMap.element; + return result.map((item) => + this.convertArrowTypes(item, elementField?.type, elementField?.type.children || [], elementField), + ); } if (DataType.isTimestamp(valueType)) { return new Date(value); } + // INTERVAL — Spark/Databricks SEA emits two flavours: native Arrow + // `Interval[YearMonth]` / `Interval[DayTime]` (handled here) and + // `Duration` (transparently rewritten to `Int64` upstream by + // `SeaArrowIpcDurationFix.ts`; handled in the bigint/Int64 branch + // below). In every case we coerce to the canonical thrift string + // form so the SEA path is byte-identical with the thrift path: + // YEAR-MONTH → `"Y-M"` + // DAY-TIME → `"D HH:mm:ss.fffffffff"` + if (DataType.isInterval(valueType)) { + return formatArrowInterval(value, valueType); + } + // Convert big number values to BigInt // Decimals are also represented as big numbers in Arrow, so additionally process them (convert to float) if (value instanceof Object && value[isArrowBigNumSymbol]) { @@ -196,16 +362,38 @@ export default class ArrowResultConverter implements IResultsProvider if (DataType.isDecimal(valueType)) { return Number(result) / 10 ** valueType.scale; } + // Duration columns rewritten to Int64 — detect via metadata. + const durationUnit = field?.metadata.get(DURATION_UNIT_METADATA_KEY); + if (durationUnit) { + return formatDurationToIntervalDayTime(result, durationUnit); + } return result; } // Convert binary data to Buffer if (value instanceof Uint8Array) { + // INTERVAL DAY-TIME / YEAR-MONTH that apache-arrow surfaced as + // an Int32Array (size 2). `Uint8Array.isInstanceOf` is true for + // every TypedArray subclass, so we have to check the parent type + // first. The `DataType.isInterval` branch above already handles + // the case where Arrow knew the field was an interval — this + // fallback covers schemas where the interval surfaced as bare + // bytes (defensive; not exercised in M0). return Buffer.from(value); } + // Bigint fallback — for raw bigints (not BigNum wrappers), the + // duration_unit metadata also gates the INTERVAL DAY-TIME format. + if (typeof value === 'bigint') { + const durationUnit = field?.metadata.get(DURATION_UNIT_METADATA_KEY); + if (durationUnit) { + return formatDurationToIntervalDayTime(value, durationUnit); + } + return Number(value); + } + // Return other values as is - return typeof value === 'bigint' ? Number(value) : value; + return value; } private convertThriftTypes(record: Record): any { diff --git a/lib/sea/SeaArrowIpc.ts b/lib/sea/SeaArrowIpc.ts index 57e26dac..59418ab5 100644 --- a/lib/sea/SeaArrowIpc.ts +++ b/lib/sea/SeaArrowIpc.ts @@ -14,6 +14,7 @@ import { RecordBatchReader, Schema, Field, DataType, TypeMap } from 'apache-arrow'; import { TTableSchema, TTypeId, TPrimitiveTypeEntry } from '../../thrift/TCLIService_types'; +import { rewriteDurationToInt64, DURATION_UNIT_METADATA_KEY } from './SeaArrowIpcDurationFix'; /** * Field metadata key used by the kernel to attach the original Databricks @@ -44,7 +45,8 @@ const DATABRICKS_TYPE_NAME = 'databricks.type_name'; * double-parse cost is negligible for M0. */ export function decodeIpcBatch(ipcBytes: Buffer): { schema: Schema; rowCount: number } { - const reader = RecordBatchReader.from(ipcBytes); + const patched = rewriteDurationToInt64(ipcBytes); + const reader = RecordBatchReader.from(patched); // Eagerly open so `schema` is populated. reader.open(); const { schema } = reader; @@ -62,11 +64,30 @@ export function decodeIpcBatch(ipcBytes: Buffer): { schema: Schema; row * apache-arrow Schema object. */ export function decodeIpcSchema(ipcBytes: Buffer): Schema { - const reader = RecordBatchReader.from(ipcBytes); + const patched = rewriteDurationToInt64(ipcBytes); + const reader = RecordBatchReader.from(patched); reader.open(); return reader.schema; } +/** + * Pre-process raw IPC bytes from the kernel so they're consumable by + * `apache-arrow@13`. The current transformation is `Duration → Int64` + * with the original duration unit preserved in field metadata (see + * `SeaArrowIpcDurationFix.ts`). Returned bytes are byte-identical to + * the input when no transformation is needed. + * + * Exposed so callers can pre-patch the buffer **once** and pass the + * result through both `decodeIpcBatch` (for row-count extraction in + * `SeaResultsProvider`) and `ArrowResultConverter.fetchNext` (which + * re-decodes the same bytes via `RecordBatchReader.from`). Without + * this, the converter would re-throw on `Duration` because it never + * sees the patched bytes. + */ +export function patchIpcBytes(ipcBytes: Buffer): Buffer { + return rewriteDurationToInt64(ipcBytes); +} + /** * Map an Arrow `DataType` (with optional `databricks.type_name` * metadata) onto the closest Thrift `TTypeId`. @@ -160,6 +181,13 @@ function arrowTypeToTTypeId(field: Field): TTypeId { const arrowType = field.type; if (DataType.isBool(arrowType)) return TTypeId.BOOLEAN_TYPE; if (DataType.isInt(arrowType)) { + // Duration columns are rewritten to Int64 with a + // `databricks.arrow.duration_unit` metadata marker (see + // `SeaArrowIpcDurationFix.ts`). Surface them as INTERVAL_DAY_TIME + // so the converter formats them back into the thrift string form. + if (arrowType.bitWidth === 64 && field.metadata.has(DURATION_UNIT_METADATA_KEY)) { + return TTypeId.INTERVAL_DAY_TIME_TYPE; + } switch (arrowType.bitWidth) { case 8: return TTypeId.TINYINT_TYPE; @@ -182,6 +210,15 @@ function arrowTypeToTTypeId(field: Field): TTypeId { if (DataType.isBinary(arrowType)) return TTypeId.BINARY_TYPE; if (DataType.isDate(arrowType)) return TTypeId.DATE_TYPE; if (DataType.isTimestamp(arrowType)) return TTypeId.TIMESTAMP_TYPE; + // Native Arrow Interval types. The server-side INTERVAL YEAR-MONTH + // (and the legacy IntervalDayTime variant) come through with type + // id 11 / -25 / -26 — apache-arrow@13 surfaces them as `Int32Array` + // pairs which the converter formats to thrift's `"Y-M"` / day-time + // strings. + if (DataType.isInterval(arrowType)) { + // unit 0 = YEAR_MONTH, unit 1 = DAY_TIME, unit 2 = MONTH_DAY_NANO + return arrowType.unit === 0 ? TTypeId.INTERVAL_YEAR_MONTH_TYPE : TTypeId.INTERVAL_DAY_TIME_TYPE; + } if (DataType.isList(arrowType)) return TTypeId.ARRAY_TYPE; if (DataType.isMap(arrowType)) return TTypeId.MAP_TYPE; if (DataType.isStruct(arrowType)) return TTypeId.STRUCT_TYPE; diff --git a/lib/sea/SeaArrowIpcDurationFix.ts b/lib/sea/SeaArrowIpcDurationFix.ts new file mode 100644 index 00000000..02275211 --- /dev/null +++ b/lib/sea/SeaArrowIpcDurationFix.ts @@ -0,0 +1,663 @@ +// Copyright (c) 2026 Databricks, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/** + * Pre-process an Arrow IPC stream payload to make it consumable by + * `apache-arrow@13`, which predates the addition of the `Duration` type + * (FlatBuffer `Type` enum id 18) in version 14. + * + * The Databricks SQL server emits INTERVAL DAY-TIME columns as Arrow + * `Duration(MICROSECOND)` in the SEA IPC stream. apache-arrow@13's + * `decodeFieldType` (`node_modules/apache-arrow/ipc/metadata/message.js:339-397`) + * throws `Unrecognized type: "Duration" (18)` on the schema FlatBuffer + * before any record batch is read, breaking the entire SEA path for any + * result that contains an INTERVAL DAY-TIME column. + * + * Because the physical layout of an Arrow `Duration` column is + * **identical** to an Arrow `Int64` column (8 bytes of signed integer per + * row in the values buffer, plus the validity bitmap), we can losslessly + * rewrite the schema FlatBuffer to advertise `Int(bitWidth=64, + * signed=true)` in place of `Duration(unit)`. The record-batch body + * bytes pass through unchanged. We embed the original `Duration` time + * unit (`SECOND`/`MILLISECOND`/`MICROSECOND`/`NANOSECOND`) into the + * rewritten field's `custom_metadata` under the key + * `databricks.arrow.duration_unit` so the JS converter can format the + * Int64 value back into a thrift-equivalent string (e.g. + * `"1 02:03:04.000000000"`). + * + * Why this lives in its own file: the rewriter is the only place in the + * codebase that needs to construct FlatBuffers by hand using the + * `flatbuffers` library; isolating it keeps `SeaArrowIpc.ts` focused on + * the high-level Arrow-decoded views. + * + * @see lib/result/ArrowResultConverter.ts — Phase-1 INTERVAL formatting + * reads the metadata key written here. + * @see findings/parity-mismatch/round5-implementation-2026-05-15.md — + * original failure mode (`Unrecognized type: "Duration" (18)`). + */ + +import * as flatbuffers from 'flatbuffers'; +// We reach into apache-arrow's internal FlatBuffer accessor modules +// rather than the high-level Schema/Field classes because the latter +// throw on the `Duration` type id 18 (`apache-arrow@13` predates the +// `Duration` enum entry). The internal `fb/*` modules are generated +// FlatBuffer code and recognize every type id present in the +// FlatBuffer schema, including `Duration`, so we can decode the +// original schema and rebuild it with `Duration` rewritten to `Int64`. +// eslint-disable-next-line import/no-internal-modules +import { Message } from 'apache-arrow/fb/message'; +// eslint-disable-next-line import/no-internal-modules +import { MessageHeader } from 'apache-arrow/fb/message-header'; +// eslint-disable-next-line import/no-internal-modules +import { Schema as FbSchema } from 'apache-arrow/fb/schema'; +// eslint-disable-next-line import/no-internal-modules +import { Field as FbField } from 'apache-arrow/fb/field'; +// eslint-disable-next-line import/no-internal-modules +import { KeyValue as FbKeyValue } from 'apache-arrow/fb/key-value'; +// eslint-disable-next-line import/no-internal-modules +import { Type as FbType } from 'apache-arrow/fb/type'; +// eslint-disable-next-line import/no-internal-modules +import { Duration as FbDuration } from 'apache-arrow/fb/duration'; +// eslint-disable-next-line import/no-internal-modules +import { Int as FbInt } from 'apache-arrow/fb/int'; +// eslint-disable-next-line import/no-internal-modules +import { TimeUnit as FbTimeUnit } from 'apache-arrow/fb/time-unit'; + +/** + * Metadata key written onto rewritten fields to preserve the original + * `Duration` time unit. Consumed by + * `lib/result/ArrowResultConverter.ts` Phase 1 to choose the correct + * scale when formatting INTERVAL DAY-TIME values. + */ +export const DURATION_UNIT_METADATA_KEY = 'databricks.arrow.duration_unit'; + +const IPC_CONTINUATION_MARKER = 0xffffffff; + +const TIME_UNIT_NAME: Record = { + [FbTimeUnit.SECOND]: 'SECOND', + [FbTimeUnit.MILLISECOND]: 'MILLISECOND', + [FbTimeUnit.MICROSECOND]: 'MICROSECOND', + [FbTimeUnit.NANOSECOND]: 'NANOSECOND', +}; + +/** + * Walk an IPC stream payload and rewrite any `Duration` field in the + * schema message to `Int64` (preserving the original time unit in + * custom metadata). Subsequent record-batch messages are forwarded + * verbatim — the data layout matches the rewritten `Int64` type + * bit-for-bit. + * + * If the schema contains no `Duration` fields, the input buffer is + * returned unchanged (zero-copy fast path). + * + * The caller is expected to pass a complete IPC stream payload (the + * full byte buffer the kernel returned for one `fetchNextBatch` call, + * or the schema-only payload from `statement.schema()`). Multi-segment + * stream payloads are supported; we walk through each message until + * the buffer is exhausted. + * + * @param ipcBytes raw IPC stream bytes from the napi binding + * @returns either the original buffer (no rewrite needed) or a fresh + * buffer with the schema message replaced + */ +export function rewriteDurationToInt64(ipcBytes: Buffer | Uint8Array): Buffer { + const view = ipcBytes instanceof Buffer ? ipcBytes : Buffer.from(ipcBytes); + + // First message must be the schema. If we can't find a schema message + // we leave the bytes alone — better to surface apache-arrow's normal + // error path than to mask a malformed stream. + const first = readMessageAt(view, 0); + if (!first) { + return view; + } + + if (first.message.headerType() !== MessageHeader.Schema) { + return view; + } + + const rewrittenSchema = maybeRewriteSchemaMessage(first.messageBytes); + if (!rewrittenSchema) { + // No Duration fields; nothing to do. + return view; + } + + // Splice the rewritten schema back into the stream: continuation + // marker + new metadata length + new metadata bytes + everything after + // the original schema message (body of schema is empty per Arrow spec; + // record batches follow). + const outputs: Buffer[] = []; + outputs.push(encodeContinuationAndLength(rewrittenSchema.byteLength)); + outputs.push(rewrittenSchema); + // Schema messages have no body (bodyLength=0 always — Arrow spec). + // Forward everything after the schema's metadata bytes unchanged. + const tailStart = first.totalEnd; + if (tailStart < view.byteLength) { + outputs.push(view.subarray(tailStart)); + } + + return Buffer.concat(outputs); +} + +/** + * Read one IPC message at the given offset. Returns the parsed Message + * object and byte ranges, or `null` if the buffer is exhausted. + * + * IPC stream message format (post-0.15): + * [continuation: 0xFFFFFFFF (4 bytes LE)] [length: int32 LE] + * [metadata: flatbuffer Message of `length` bytes] [body: bodyLength bytes] + * + * Pre-0.15 streams omit the continuation marker — the first 4 bytes are + * the metadata length directly. apache-arrow handles both + * (`message.js:44-50`); we mirror that here. + */ +function readMessageAt( + view: Buffer, + start: number, +): { + message: Message; + messageBytes: Buffer; + metadataStart: number; + metadataEnd: number; + bodyEnd: number; + totalEnd: number; +} | null { + if (start + 4 > view.byteLength) { + return null; + } + let cursor = start; + let metadataLength = view.readInt32LE(cursor); + cursor += 4; + + // Continuation marker (0xFFFFFFFF reads as -1 as int32) — followed by + // the actual length. + if (metadataLength === -1) { + if (cursor + 4 > view.byteLength) { + return null; + } + metadataLength = view.readInt32LE(cursor); + cursor += 4; + } + + if (metadataLength === 0) { + return null; + } + + const metadataStart = cursor; + const metadataEnd = cursor + metadataLength; + if (metadataEnd > view.byteLength) { + return null; + } + + const metadataBytes = view.subarray(metadataStart, metadataEnd); + const bb = new flatbuffers.ByteBuffer(metadataBytes); + const message = Message.getRootAsMessage(bb); + + const bodyLength = Number(message.bodyLength()); + const bodyStart = metadataEnd; + const bodyEnd = bodyStart + bodyLength; + if (bodyEnd > view.byteLength) { + // Malformed; let apache-arrow surface the error downstream. + return null; + } + + return { + message, + messageBytes: metadataBytes, + metadataStart, + metadataEnd, + bodyEnd, + totalEnd: bodyEnd, + }; +} + +/** + * If the schema message contains any `Duration` fields, returns a fresh + * FlatBuffer-encoded Message containing the rewritten schema. Otherwise + * returns `null` so the caller can short-circuit. + */ +function maybeRewriteSchemaMessage(schemaMessageBytes: Buffer): Buffer | null { + const bb = new flatbuffers.ByteBuffer(schemaMessageBytes); + const message = Message.getRootAsMessage(bb); + const fbSchema = message.header(new FbSchema()) as FbSchema | null; + if (!fbSchema) { + return null; + } + + // Scan top-level fields and children for Duration. We rewrite only + // top-level Duration fields for M0 (Spark INTERVAL DAY-TIME surfaces + // as a top-level column — children of Struct/List/Map are out of + // scope until we see a real-world payload with nested Duration). + let hasDuration = false; + const fieldsLength = fbSchema.fieldsLength(); + for (let i = 0; i < fieldsLength; i += 1) { + const f = fbSchema.fields(i); + if (f && f.typeType() === FbType.Duration) { + hasDuration = true; + break; + } + } + if (!hasDuration) { + return null; + } + + // Snapshot the (name, originalTypeType, durationUnit, originalCustomMetadata) + // for every field, then rebuild the schema using the flatbuffer builder. + type FieldSnapshot = { + name: string; + nullable: boolean; + isDuration: boolean; + durationUnit?: number; // FbTimeUnit + /** Preserved metadata key→value pairs (we add ours on top for Duration). */ + metadata: Array<[string, string]>; + /** Raw bytes for the original field if no rewrite needed; we'll re-encode it. */ + typeType: number; + /** Pre-decoded type sub-table bytes for non-Duration fields. */ + // For M0 we only rewrite Duration; other fields we re-create with the + // same primitive type. To keep the rewriter narrow, we only support + // schemas where non-Duration fields use type sub-tables that can be + // round-tripped via Field.decode → re-encode through flatbuffers' + // SizedByteArray serialization. That's complex, so instead we use + // a different approach: copy the raw FlatBuffer field offset + // directly when no rewrite is needed (handled by the + // copy-field-by-reference path below). + }; + // We can't simply "copy field by reference" across FlatBuffer + // builders, so we have to re-encode every field. For non-Duration + // fields, we re-encode using the apache-arrow `fb/*` accessors. + // That requires touching every existing supported type. + // + // To keep this rewriter narrow and DRY, we take a different + // approach: in-place patch. We do NOT rebuild the FlatBuffer. + // Instead, we mutate the field's `type_type` byte from Duration(18) + // to Int(2), and we point its `type` offset at a freshly-appended + // Int sub-table that we splice into the message bytes. Then we + // append a fresh `KeyValue` for `databricks.arrow.duration_unit` + // into the field's `custom_metadata` vector. This avoids re-encoding + // every other field. + // + // FlatBuffer in-place mutation is tricky because tables have vtables + // and offsets are 32-bit relative pointers. The fields we need to + // change are: + // 1. Field.type_type (1-byte enum at vtable slot for field #2): + // mutate the byte from 18 → 2. Same width, safe to overwrite. + // 2. Field.type (4-byte relative offset to the type sub-table): + // change the offset to point at our appended Int sub-table. + // Same width, safe to overwrite. + // 3. Field.custom_metadata (4-byte relative offset to vector): + // either rewrite the existing vector to add our entry, or + // append a new vector and update the offset. + // + // Because relative offsets are forward-only in FlatBuffers (offset is + // distance from the storage location to the target), and our + // appended sub-tables live AFTER the storage location, the math + // works out. We append to a growing byte buffer and patch the + // existing offset fields to point at the new tail. + + // Bail back to the full rebuild approach; in-place patching of + // arbitrary vtable layouts is fragile (vtables may share storage + // across fields). Re-encode the whole schema. + return rebuildSchemaWithDurationRewritten(message, fbSchema); +} + +/** + * Full re-encode path: parse every field in the schema, substitute + * `Duration` with `Int64` (carrying the unit in custom metadata), and + * emit a fresh Message FlatBuffer. This handles arbitrary schemas + * correctly at the cost of decode+re-encode of all fields. + * + * For non-Duration fields we copy the *bytes* of the original + * `type` sub-table verbatim into the new builder — FlatBuffer + * sub-tables are self-contained address spaces, so this is safe. + */ +function rebuildSchemaWithDurationRewritten(message: Message, fbSchema: FbSchema): Buffer { + const builder = new flatbuffers.Builder(1024); + + // Re-encode each field. + const fieldOffsets: number[] = []; + const fieldsLength = fbSchema.fieldsLength(); + for (let i = 0; i < fieldsLength; i += 1) { + const field = fbSchema.fields(i); + if (!field) { + continue; + } + fieldOffsets.push(reEncodeField(builder, field)); + } + + // Re-encode top-level schema custom_metadata verbatim. + const schemaMetadataOffsets: number[] = []; + const schemaMetadataLength = fbSchema.customMetadataLength(); + for (let i = 0; i < schemaMetadataLength; i += 1) { + const kv = fbSchema.customMetadata(i); + if (!kv) { + continue; + } + const keyStr = kv.key() ?? ''; + const valStr = kv.value() ?? ''; + const keyOff = builder.createString(keyStr); + const valOff = builder.createString(valStr); + FbKeyValue.startKeyValue(builder); + FbKeyValue.addKey(builder, keyOff); + FbKeyValue.addValue(builder, valOff); + schemaMetadataOffsets.push(FbKeyValue.endKeyValue(builder)); + } + + // Build the fields and metadata vectors, then the Schema, then the Message. + const fieldsVec = FbSchema.createFieldsVector(builder, fieldOffsets); + const metadataVec = + schemaMetadataOffsets.length > 0 + ? FbSchema.createCustomMetadataVector(builder, schemaMetadataOffsets) + : 0; + + // Preserve features vector — `features()` requires walking the + // bigint vector; for the kernel's payloads this is typically empty + // so we skip it. If a non-empty features vector appears, we drop it + // (Arrow features encode optional compression flags; the kernel + // emits uncompressed streams for the SEA path per + // `findings/rust-kernel/M0-kernel-async-readiness-2026-05-15.md`). + FbSchema.startSchema(builder); + FbSchema.addEndianness(builder, fbSchema.endianness()); + FbSchema.addFields(builder, fieldsVec); + if (metadataVec !== 0) { + FbSchema.addCustomMetadata(builder, metadataVec); + } + const schemaOffset = FbSchema.endSchema(builder); + + // Wrap in a Message. version + headerType + header + bodyLength + custom_metadata. + Message.startMessage(builder); + Message.addVersion(builder, message.version()); + Message.addHeaderType(builder, MessageHeader.Schema); + Message.addHeader(builder, schemaOffset); + Message.addBodyLength(builder, BigInt(0)); + const newMessage = Message.endMessage(builder); + builder.finish(newMessage); + + let bytes = builder.asUint8Array(); + + // The Arrow IPC spec requires each message to be 8-byte aligned so + // that subsequent record batches' body buffers stay aligned for SIMD + // reads. apache-arrow's MessageReader doesn't enforce this on read + // (it just trusts the metadata length), so any padding is fine. + // Round up the metadata bytes to a multiple of 8 by appending zero + // padding — this keeps the IPC stream spec-compliant. + const padded = padToAlignment(bytes, 8); + return Buffer.from(padded); +} + +/** + * Re-encode a single Field. For `Duration` fields, substitute `Int64` + * and add `databricks.arrow.duration_unit` metadata. For all other + * types we re-encode via the appropriate type-sub-table-aware path — + * but to keep this rewriter compact we just walk the FlatBuffer-level + * accessors needed for the M0 primitive types and complex types Arrow + * surfaces from the kernel. Unknown types fall back to copying the + * raw type sub-table bytes via FlatBuffer's serialization (which + * always works because sub-tables are self-contained). + */ +function reEncodeField(builder: flatbuffers.Builder, field: FbField): number { + const nameStr = field.name() ?? ''; + const nameOffset = builder.createString(nameStr); + + // Re-encode children recursively (Struct/List/Map all carry children). + const childOffsets: number[] = []; + const childrenLength = field.childrenLength(); + for (let i = 0; i < childrenLength; i += 1) { + const child = field.children(i); + if (child) { + childOffsets.push(reEncodeField(builder, child)); + } + } + const childrenVec = + childOffsets.length > 0 ? FbField.createChildrenVector(builder, childOffsets) : 0; + + // Re-encode custom_metadata (preserving everything). For Duration + // fields we'll add our marker on top. + const metadataOffsets: number[] = []; + const metadataLength = field.customMetadataLength(); + for (let i = 0; i < metadataLength; i += 1) { + const kv = field.customMetadata(i); + if (!kv) { + continue; + } + const keyStr = kv.key() ?? ''; + const valStr = kv.value() ?? ''; + const keyOff = builder.createString(keyStr); + const valOff = builder.createString(valStr); + FbKeyValue.startKeyValue(builder); + FbKeyValue.addKey(builder, keyOff); + FbKeyValue.addValue(builder, valOff); + metadataOffsets.push(FbKeyValue.endKeyValue(builder)); + } + + const originalTypeType = field.typeType(); + let typeType = originalTypeType; + let typeOffset = 0; + + if (originalTypeType === FbType.Duration) { + // Read the original Duration unit. Substitute Int(64, signed) and + // append a custom_metadata entry recording the original unit. + const durationTable = field.type(new FbDuration()) as FbDuration | null; + const unit = durationTable ? durationTable.unit() : FbTimeUnit.MICROSECOND; + const unitName = TIME_UNIT_NAME[unit] ?? 'MICROSECOND'; + + const keyOff = builder.createString(DURATION_UNIT_METADATA_KEY); + const valOff = builder.createString(unitName); + FbKeyValue.startKeyValue(builder); + FbKeyValue.addKey(builder, keyOff); + FbKeyValue.addValue(builder, valOff); + metadataOffsets.push(FbKeyValue.endKeyValue(builder)); + + typeType = FbType.Int; + typeOffset = FbInt.createInt(builder, 64, true); + } else { + // Copy the original type sub-table by re-encoding it from the + // FlatBuffer-level accessor. Sub-tables are self-contained, but + // the builder API requires us to write each known type with its + // generated `createXxx`. For M0, the kernel emits a fixed set of + // top-level types (matching the SQL datatype table in + // `findings/rust-kernel/datatype-emission-and-block-on-2026-05-15.md`). + // We re-encode each known type sub-table; unsupported types fall + // through to a generic offset-only copy (zero-byte type sub-table), + // which apache-arrow's `decodeFieldType` accepts for the + // children-only types (List, Struct, Null). + typeOffset = reEncodeTypeSubtable(builder, field, originalTypeType); + } + + const metadataVec = + metadataOffsets.length > 0 ? FbField.createCustomMetadataVector(builder, metadataOffsets) : 0; + + FbField.startField(builder); + FbField.addName(builder, nameOffset); + FbField.addNullable(builder, field.nullable()); + FbField.addTypeType(builder, typeType); + if (typeOffset !== 0) { + FbField.addType(builder, typeOffset); + } + if (childrenVec !== 0) { + FbField.addChildren(builder, childrenVec); + } + if (metadataVec !== 0) { + FbField.addCustomMetadata(builder, metadataVec); + } + // Note: dictionary encoding is not re-emitted. The kernel doesn't + // emit dictionary-encoded columns for M0; if it ever does, this + // rewriter would need to copy the DictionaryEncoding sub-table too. + return FbField.endField(builder); +} + +/** + * Re-encode a Field's type sub-table by reading it from the original + * FlatBuffer (via the apache-arrow generated accessors) and writing it + * into the new builder. Supports the full M0 type matrix: + * primitives: Null, Int (all widths), FloatingPoint (Float16/32/64), + * Bool, Utf8, Binary, Decimal, Date, Time, Timestamp, Interval + * complex: List (header only), Struct (header only), Map, FixedSizeList, + * FixedSizeBinary, Union + * Children-only types (Struct, List, Null) emit an empty sub-table. + */ +function reEncodeTypeSubtable( + builder: flatbuffers.Builder, + field: FbField, + typeType: number, +): number { + // Lazy imports to avoid cyclic resolution and to keep this file's + // top-of-module imports tight. These are zero-cost — Node caches + // them after the first require. + /* eslint-disable @typescript-eslint/no-var-requires, global-require, import/no-internal-modules */ + const { Null } = require('apache-arrow/fb/null'); + const { FloatingPoint } = require('apache-arrow/fb/floating-point'); + const { Binary } = require('apache-arrow/fb/binary'); + const { Utf8 } = require('apache-arrow/fb/utf8'); + const { Bool } = require('apache-arrow/fb/bool'); + const { Decimal } = require('apache-arrow/fb/decimal'); + const { Date: DateTbl } = require('apache-arrow/fb/date'); + const { Time } = require('apache-arrow/fb/time'); + const { Timestamp } = require('apache-arrow/fb/timestamp'); + const { Interval } = require('apache-arrow/fb/interval'); + const { List } = require('apache-arrow/fb/list'); + const { Struct_ } = require('apache-arrow/fb/struct-'); + const { Union } = require('apache-arrow/fb/union'); + const { FixedSizeBinary } = require('apache-arrow/fb/fixed-size-binary'); + const { FixedSizeList } = require('apache-arrow/fb/fixed-size-list'); + const { Map: MapTbl } = require('apache-arrow/fb/map'); + /* eslint-enable @typescript-eslint/no-var-requires, global-require, import/no-internal-modules */ + + switch (typeType) { + case FbType.NONE: + case FbType.Null: { + // Null has no fields; emit an empty table. + const t = new Null(); + field.type(t); + Null.startNull(builder); + return Null.endNull(builder); + } + case FbType.Int: { + const t = field.type(new FbInt()) as InstanceType | null; + if (!t) { + return FbInt.createInt(builder, 32, true); + } + return FbInt.createInt(builder, t.bitWidth(), t.isSigned()); + } + case FbType.FloatingPoint: { + const t = field.type(new FloatingPoint()); + return FloatingPoint.createFloatingPoint(builder, t.precision()); + } + case FbType.Binary: { + Binary.startBinary(builder); + return Binary.endBinary(builder); + } + case FbType.Utf8: { + Utf8.startUtf8(builder); + return Utf8.endUtf8(builder); + } + case FbType.Bool: { + Bool.startBool(builder); + return Bool.endBool(builder); + } + case FbType.Decimal: { + const t = field.type(new Decimal()); + return Decimal.createDecimal(builder, t.precision(), t.scale(), t.bitWidth()); + } + case FbType.Date: { + const t = field.type(new DateTbl()); + return DateTbl.createDate(builder, t.unit()); + } + case FbType.Time: { + const t = field.type(new Time()); + return Time.createTime(builder, t.unit(), t.bitWidth()); + } + case FbType.Timestamp: { + const t = field.type(new Timestamp()); + const tz: string | null = t.timezone(); + const tzOffset = tz ? builder.createString(tz) : 0; + Timestamp.startTimestamp(builder); + Timestamp.addUnit(builder, t.unit()); + if (tzOffset !== 0) { + Timestamp.addTimezone(builder, tzOffset); + } + return Timestamp.endTimestamp(builder); + } + case FbType.Interval: { + const t = field.type(new Interval()); + return Interval.createInterval(builder, t.unit()); + } + case FbType.List: { + List.startList(builder); + return List.endList(builder); + } + case FbType.Struct_: { + Struct_.startStruct_(builder); + return Struct_.endStruct_(builder); + } + case FbType.Union: { + const t = field.type(new Union()); + // typeIds is an int32 vector — copy it. + const typeIdsArr = t.typeIdsArray(); + let typeIdsOffset = 0; + if (typeIdsArr) { + typeIdsOffset = Union.createTypeIdsVector(builder, Array.from(typeIdsArr)); + } + Union.startUnion(builder); + Union.addMode(builder, t.mode()); + if (typeIdsOffset !== 0) { + Union.addTypeIds(builder, typeIdsOffset); + } + return Union.endUnion(builder); + } + case FbType.FixedSizeBinary: { + const t = field.type(new FixedSizeBinary()); + return FixedSizeBinary.createFixedSizeBinary(builder, t.byteWidth()); + } + case FbType.FixedSizeList: { + const t = field.type(new FixedSizeList()); + return FixedSizeList.createFixedSizeList(builder, t.listSize()); + } + case FbType.Map: { + const t = field.type(new MapTbl()); + return MapTbl.createMap(builder, t.keysSorted()); + } + default: + // Unknown / newer types (LargeBinary, LargeUtf8, LargeList, + // RunEndEncoded, ...). The kernel doesn't emit these for M0; + // emit an empty sub-table and let apache-arrow's normal error + // path fire when it tries to decode an unrecognized type id. + return 0; + } +} + +/** + * Prefix the given FlatBuffer message bytes with the IPC stream + * framing: the continuation marker (0xFFFFFFFF) followed by the + * little-endian int32 metadata length. + */ +function encodeContinuationAndLength(metadataLength: number): Buffer { + const out = Buffer.alloc(8); + out.writeInt32LE(IPC_CONTINUATION_MARKER | 0, 0); // -1 + out.writeInt32LE(metadataLength, 4); + return out; +} + +/** + * Pad `bytes` with trailing zeros so its length is a multiple of + * `alignment`. Returns the original buffer when it is already + * aligned. + */ +function padToAlignment(bytes: Uint8Array, alignment: number): Uint8Array { + const remainder = bytes.byteLength % alignment; + if (remainder === 0) { + return bytes; + } + const padded = new Uint8Array(bytes.byteLength + (alignment - remainder)); + padded.set(bytes, 0); + return padded; +} diff --git a/lib/sea/SeaResultsProvider.ts b/lib/sea/SeaResultsProvider.ts index 7e94ee7a..0a0636d6 100644 --- a/lib/sea/SeaResultsProvider.ts +++ b/lib/sea/SeaResultsProvider.ts @@ -14,7 +14,7 @@ import IResultsProvider, { ResultsProviderFetchNextOptions } from '../result/IResultsProvider'; import { ArrowBatch } from '../result/utils'; -import { decodeIpcBatch } from './SeaArrowIpc'; +import { decodeIpcBatch, patchIpcBytes } from './SeaArrowIpc'; /** * The minimal slice of the napi-binding `Statement` class that we @@ -97,7 +97,13 @@ export default class SeaResultsProvider implements IResultsProvider this.exhausted = true; return; } - const { ipcBytes } = next; + // Patch the raw bytes once: rewrite any Arrow `Duration` field to + // `Int64` with a `databricks.arrow.duration_unit` marker, so that + // apache-arrow@13 (which predates Duration support) can decode the + // stream. `decodeIpcBatch` and the downstream + // `RecordBatchReader.from` inside `ArrowResultConverter` both see + // the patched buffer. See `SeaArrowIpcDurationFix.ts`. + const ipcBytes = patchIpcBytes(next.ipcBytes); const { rowCount } = decodeIpcBatch(ipcBytes); if (rowCount === 0) { // Skip empty batches — the converter handles them but pre-filtering diff --git a/tests/unit/sea/SeaIntervalParity.test.ts b/tests/unit/sea/SeaIntervalParity.test.ts new file mode 100644 index 00000000..bc1bf083 --- /dev/null +++ b/tests/unit/sea/SeaIntervalParity.test.ts @@ -0,0 +1,365 @@ +// Copyright (c) 2026 Databricks, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 + +/** + * TDD harness for the round-2 INTERVAL parity fix. + * + * Verifies that the SEA path renders the exact thrift wire string for + * INTERVAL YEAR-MONTH and INTERVAL DAY-TIME columns, regardless of + * whether the kernel emits the value as native Arrow `Interval` or + * native Arrow `Duration` (the latter is transparently rewritten to + * `Int64` by `lib/sea/SeaArrowIpcDurationFix.ts` because `apache-arrow@13` + * predates the `Duration` type id). + * + * Reference failure modes (round 5 testing): + * - YEAR-MONTH: + * thrift → `"1-2"` (string) + * SEA pre-fix → `{"0":1,"1":2}` (Int32Array surfaced as struct) + * - DAY-TIME: + * thrift → `"1 02:03:04.000000000"` (string) + * SEA pre-fix → throws `Unrecognized type: "Duration" (18)` on schema decode + * + * Both modes must now produce byte-identical thrift strings. + */ + +import { expect } from 'chai'; +import * as flatbuffers from 'flatbuffers'; +import { + Schema, + Field, + Int32, + Int64, + Interval, + IntervalUnit, + Table, + RecordBatch, + makeData, + Struct, + vectorFromArray, + tableToIPC, +} from 'apache-arrow'; + +// eslint-disable-next-line import/no-internal-modules +import { Message as FbMessage } from 'apache-arrow/fb/message'; +// eslint-disable-next-line import/no-internal-modules +import { MessageHeader } from 'apache-arrow/fb/message-header'; +// eslint-disable-next-line import/no-internal-modules +import { Schema as FbSchema } from 'apache-arrow/fb/schema'; +// eslint-disable-next-line import/no-internal-modules +import { Field as FbField } from 'apache-arrow/fb/field'; +// eslint-disable-next-line import/no-internal-modules +import { Type as FbType } from 'apache-arrow/fb/type'; +// eslint-disable-next-line import/no-internal-modules +import { Duration as FbDuration } from 'apache-arrow/fb/duration'; +// eslint-disable-next-line import/no-internal-modules +import { TimeUnit as FbTimeUnit } from 'apache-arrow/fb/time-unit'; + +import SeaOperationBackend from '../../../lib/sea/SeaOperationBackend'; +import ClientContextStub from '../.stubs/ClientContextStub'; + +// --------------------------------------------------------------------------- +// Test helpers. +// --------------------------------------------------------------------------- + +class StatementStub { + private readonly batches: Buffer[]; + + private readonly schemaIpc: Buffer; + + public cancelled = false; + + public closed = false; + + constructor(schemaIpc: Buffer, batches: Buffer[]) { + this.schemaIpc = schemaIpc; + this.batches = [...batches]; + } + + public async fetchNextBatch(): Promise<{ ipcBytes: Buffer } | null> { + if (this.batches.length === 0) return null; + return { ipcBytes: this.batches.shift() as Buffer }; + } + + public async schema(): Promise<{ ipcBytes: Buffer }> { + return { ipcBytes: this.schemaIpc }; + } + + public async cancel(): Promise { + this.cancelled = true; + } + + public async close(): Promise { + this.closed = true; + } +} + +function withTypeName(field: T, typeName: string): T { + const meta = new Map(field.metadata); + meta.set('databricks.type_name', typeName); + return new Field(field.name, field.type, field.nullable, meta) as T; +} + +function ipcFromColumns(schema: Schema, columns: Record): Buffer { + const vectors: any[] = []; + for (const field of schema.fields) { + const col = columns[field.name]; + vectors.push(vectorFromArray(col as any, field.type)); + } + const data = vectors.map((v) => v.data[0]); + const struct = makeData({ + type: new Struct(schema.fields), + children: data, + length: vectors[0]?.length ?? 0, + nullCount: 0, + }); + const batch = new RecordBatch(schema, struct); + const table = new Table([batch]); + return Buffer.from(tableToIPC(table, 'stream')); +} + +function ipcSchemaOnly(schema: Schema): Buffer { + const struct = makeData({ + type: new Struct(schema.fields), + children: schema.fields.map((f) => makeData({ type: f.type as any, length: 0, nullCount: 0 })), + length: 0, + nullCount: 0, + }); + const batch = new RecordBatch(schema, struct); + const table = new Table([batch]); + return Buffer.from(tableToIPC(table, 'stream')); +} + +/** + * Build a schema-only IPC payload whose schema declares a single Arrow + * `Duration` column. `apache-arrow@13` cannot build this directly (no + * Duration class in the public API), so we hand-roll the FlatBuffer + * using the internal `fb/*` accessor classes. The body bytes for this + * column are bit-identical to an Int64 column. + */ +function ipcWithDurationSchema(fieldName: string, durationUnit: FbTimeUnit, typeName = 'INTERVAL'): Buffer { + const builder = new flatbuffers.Builder(256); + + // KeyValue for databricks.type_name + const tnKey = builder.createString('databricks.type_name'); + const tnVal = builder.createString(typeName); + const { KeyValue: FbKeyValueLocal } = require('apache-arrow/fb/key-value'); // eslint-disable-line @typescript-eslint/no-var-requires, global-require, import/no-internal-modules + FbKeyValueLocal.startKeyValue(builder); + FbKeyValueLocal.addKey(builder, tnKey); + FbKeyValueLocal.addValue(builder, tnVal); + const tnKv = FbKeyValueLocal.endKeyValue(builder); + const metadataVec = FbField.createCustomMetadataVector(builder, [tnKv]); + + const nameOff = builder.createString(fieldName); + const durOff = FbDuration.createDuration(builder, durationUnit); + FbField.startField(builder); + FbField.addName(builder, nameOff); + FbField.addNullable(builder, true); + FbField.addTypeType(builder, FbType.Duration); + FbField.addType(builder, durOff); + FbField.addCustomMetadata(builder, metadataVec); + const fieldOff = FbField.endField(builder); + const fieldsVec = FbSchema.createFieldsVector(builder, [fieldOff]); + FbSchema.startSchema(builder); + FbSchema.addFields(builder, fieldsVec); + const schemaOff = FbSchema.endSchema(builder); + FbMessage.startMessage(builder); + FbMessage.addVersion(builder, 4); // V5 + FbMessage.addHeaderType(builder, MessageHeader.Schema); + FbMessage.addHeader(builder, schemaOff); + FbMessage.addBodyLength(builder, BigInt(0)); + const msgOff = FbMessage.endMessage(builder); + builder.finish(msgOff); + const bytes = builder.asUint8Array(); + const rem = bytes.byteLength % 8; + const padded = rem === 0 ? bytes : new Uint8Array(bytes.byteLength + (8 - rem)); + if (rem !== 0) padded.set(bytes, 0); + + // IPC stream framing: continuation marker (0xFFFFFFFF) + length + bytes + const prefix = Buffer.alloc(8); + prefix.writeInt32LE(-1, 0); + prefix.writeInt32LE(padded.byteLength, 4); + + // EOS marker (continuation + zero length) — terminates the stream. + const eos = Buffer.alloc(8); + eos.writeInt32LE(-1, 0); + eos.writeInt32LE(0, 4); + + return Buffer.concat([prefix, Buffer.from(padded), eos]); +} + +/** + * Splice a hand-built Duration schema into an Int64-based IPC stream + * so the record batch body bytes (which are Int64-encoded) become + * "Duration-shaped" without us re-encoding the body. Used to fabricate + * a kernel-shaped Duration IPC payload using only the apache-arrow@13 + * public API. + */ +function buildDurationIpc(fieldName: string, durationUnit: FbTimeUnit, values: bigint[], typeName = 'INTERVAL'): Buffer { + // Build an Int64 stream that carries the values. + const int64Schema = new Schema([new Field(fieldName, new Int64(), true)]); + const int64Ipc = ipcFromColumns(int64Schema, { + [fieldName]: [new BigInt64Array(values)], + }); + + // Build a Duration schema-only message that we splice in to replace + // the Int64 schema. The record-batch bytes from int64Ipc follow + // unchanged. + const durationSchemaIpc = ipcWithDurationSchema(fieldName, durationUnit, typeName); + + // Skip the Int64 schema header + EOS in durationSchemaIpc, then + // append the int64 stream's record batches. + // int64Ipc layout: [continuation+len+schema][continuation+len+recordbatch][continuation+0 EOS] + let cursor = 0; + let len = int64Ipc.readInt32LE(cursor); + cursor += 4; + if (len === -1) { + len = int64Ipc.readInt32LE(cursor); + cursor += 4; + } + // Skip the schema body (always empty for schema messages) + const intRecordsStart = cursor + len; + const intRecords = int64Ipc.subarray(intRecordsStart); + + // durationSchemaIpc layout: [prefix][padded schema bytes][EOS]. + // Drop its EOS so it concatenates cleanly with intRecords (which has + // its own EOS). + const durationNoEos = durationSchemaIpc.subarray(0, durationSchemaIpc.byteLength - 8); + return Buffer.concat([durationNoEos, intRecords]); +} + +// --------------------------------------------------------------------------- +// Tests. +// --------------------------------------------------------------------------- + +describe('SeaOperationBackend — INTERVAL parity with thrift', () => { + it('YEAR-MONTH via native Arrow Interval[YearMonth] → "Y-M"', async () => { + // Arrow `Interval[YearMonth]` carries a single int32 total-months + // value. apache-arrow surfaces it as Int32Array(2) via the + // GetVisitor. The kernel emits this type for INTERVAL YEAR-MONTH. + const fields = [ + withTypeName(new Field('iv', new Interval(IntervalUnit.YEAR_MONTH), true), 'INTERVAL'), + ]; + const schema = new Schema(fields); + const schemaIpc = ipcSchemaOnly(schema); + + // 1 year, 2 months → 14 total months. `vectorFromArray(Int32Array, + // new Interval(...))` packs the int32 total directly into the + // Interval column's underlying values buffer. + const dataIpc = ipcFromColumns(schema, { iv: Int32Array.from([14]) }); + + const stub = new StatementStub(schemaIpc, [dataIpc]); + const backend = new SeaOperationBackend({ statement: stub, context: new ClientContextStub() }); + const rows = await backend.fetchChunk({ limit: 100 }); + expect(rows).to.have.length(1); + expect((rows[0] as any).iv).to.equal('1-2'); + }); + + it('YEAR-MONTH negative → "-Y-M"', async () => { + const fields = [ + withTypeName(new Field('iv', new Interval(IntervalUnit.YEAR_MONTH), true), 'INTERVAL'), + ]; + const schema = new Schema(fields); + const schemaIpc = ipcSchemaOnly(schema); + + // -14 total months → -1 year -2 months. + const dataIpc = ipcFromColumns(schema, { iv: Int32Array.from([-14]) }); + + const stub = new StatementStub(schemaIpc, [dataIpc]); + const backend = new SeaOperationBackend({ statement: stub, context: new ClientContextStub() }); + const rows = await backend.fetchChunk({ limit: 100 }); + expect(rows).to.have.length(1); + expect((rows[0] as any).iv).to.equal('-1-2'); + }); + + it('DAY-TIME via Arrow Duration(MICROSECOND) → "1 02:03:04.000000000"', async () => { + // 1 day + 2h + 3min + 4s = 93784 seconds = 93_784_000_000 µs. + const microseconds = BigInt(93_784) * BigInt(1_000_000); + const ipc = buildDurationIpc('iv', FbTimeUnit.MICROSECOND, [microseconds], 'INTERVAL'); + const schemaIpc = ipcWithDurationSchema('iv', FbTimeUnit.MICROSECOND, 'INTERVAL'); + + const stub = new StatementStub(schemaIpc, [ipc]); + const backend = new SeaOperationBackend({ statement: stub, context: new ClientContextStub() }); + const rows = await backend.fetchChunk({ limit: 100 }); + expect(rows).to.have.length(1); + expect((rows[0] as any).iv).to.equal('1 02:03:04.000000000'); + }); + + it('DAY-TIME via Arrow Duration(NANOSECOND) preserves nanosecond precision', async () => { + // 1 day + 2h + 3min + 4.123456789s + const nanos = + BigInt(86400 + 2 * 3600 + 3 * 60 + 4) * BigInt(1_000_000_000) + BigInt(123_456_789); + const ipc = buildDurationIpc('iv', FbTimeUnit.NANOSECOND, [nanos], 'INTERVAL'); + const schemaIpc = ipcWithDurationSchema('iv', FbTimeUnit.NANOSECOND, 'INTERVAL'); + + const stub = new StatementStub(schemaIpc, [ipc]); + const backend = new SeaOperationBackend({ statement: stub, context: new ClientContextStub() }); + const rows = await backend.fetchChunk({ limit: 100 }); + expect(rows).to.have.length(1); + expect((rows[0] as any).iv).to.equal('1 02:03:04.123456789'); + }); + + it('DAY-TIME zero → "0 00:00:00.000000000"', async () => { + const ipc = buildDurationIpc('iv', FbTimeUnit.MICROSECOND, [BigInt(0)], 'INTERVAL'); + const schemaIpc = ipcWithDurationSchema('iv', FbTimeUnit.MICROSECOND, 'INTERVAL'); + + const stub = new StatementStub(schemaIpc, [ipc]); + const backend = new SeaOperationBackend({ statement: stub, context: new ClientContextStub() }); + const rows = await backend.fetchChunk({ limit: 100 }); + expect(rows).to.have.length(1); + expect((rows[0] as any).iv).to.equal('0 00:00:00.000000000'); + }); + + it('DAY-TIME negative → leading "-"', async () => { + // -(1 day + 2h + 3min + 4s) in microseconds. + const microseconds = -(BigInt(93_784) * BigInt(1_000_000)); + const ipc = buildDurationIpc('iv', FbTimeUnit.MICROSECOND, [microseconds], 'INTERVAL'); + const schemaIpc = ipcWithDurationSchema('iv', FbTimeUnit.MICROSECOND, 'INTERVAL'); + + const stub = new StatementStub(schemaIpc, [ipc]); + const backend = new SeaOperationBackend({ statement: stub, context: new ClientContextStub() }); + const rows = await backend.fetchChunk({ limit: 100 }); + expect(rows).to.have.length(1); + expect((rows[0] as any).iv).to.equal('-1 02:03:04.000000000'); + }); + + it('Duration column round-trips alongside primitive columns (DRY: same converter handles both intervals)', async () => { + // Schema: [iv: Duration(µs), n: Int32]. The pre-processor must + // rewrite the Duration field WITHOUT disturbing the Int32 sibling. + // We hand-build the Duration schema (apache-arrow@13 can't build + // Duration directly) and a body that has [Int64 column, Int32 col]. + // The rewriter must keep the Int32 column intact and substitute + // Int64 for Duration. + // + // Note: we use a single-Duration-column test here because mixing + // hand-built Duration with apache-arrow's batch builder requires + // hand-rolling the entire IPC stream. The "Duration alongside + // other columns" coverage is provided by the E2E parity tests + // (M0-DT-019 in `tests/nodejs/test/parity/M0DatatypeParityTests.test.ts`) + // which use a real warehouse query that mixes INTERVAL with other + // types. + const microseconds = BigInt(86_400) * BigInt(1_000_000); // 1 day + const ipc = buildDurationIpc('iv', FbTimeUnit.MICROSECOND, [microseconds], 'INTERVAL'); + const schemaIpc = ipcWithDurationSchema('iv', FbTimeUnit.MICROSECOND, 'INTERVAL'); + + const stub = new StatementStub(schemaIpc, [ipc]); + const backend = new SeaOperationBackend({ statement: stub, context: new ClientContextStub() }); + + // Round-trip the metadata to confirm we synthesise the right TTypeId. + const metadata = await backend.getResultMetadata(); + expect(metadata.schema?.columns?.[0]?.typeDesc.types?.[0]?.primitiveEntry?.type).to.equal( + // INTERVAL_DAY_TIME_TYPE = 30 in TCLIService_types + // We assert by importing the enum below to avoid magic numbers. + // eslint-disable-next-line global-require, @typescript-eslint/no-var-requires + require('../../../thrift/TCLIService_types').TTypeId.INTERVAL_DAY_TIME_TYPE, + ); + + const rows = await backend.fetchChunk({ limit: 100 }); + expect(rows).to.have.length(1); + expect((rows[0] as any).iv).to.equal('1 00:00:00.000000000'); + }); +}); From d5ce0cd8b0d74317b7e01af019def9ea0f36384c Mon Sep 17 00:00:00 2001 From: Madhavendra Rathore Date: Fri, 15 May 2026 09:02:08 +0000 Subject: [PATCH 11/11] sea-napi-binding: relocate Rust source to kernel workspace MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per D-006 architectural decision (Python team's workspace pattern): all language bindings (PyO3, napi-rs) now live as workspace siblings in the kernel repo at databricks-sql-kernel/{pyo3,napi}/. What this commit removes from the nodejs repo: - native/sea/Cargo.toml (path dep relocated; package now at databricks-sql-kernel/napi/Cargo.toml with path = "..") - native/sea/build.rs - native/sea/src/* (lib, runtime, database, connection, statement, result, error, logger, util — all 9 files) - native/sea/package.json (the @databricks/sea-native-linux-x64-gnu sub-package moves to the kernel workspace too) - native/sea/index.js (regenerated artifact) What stays in nodejs: - native/sea/index.d.ts — TS declarations consumed by lib/sea/ adapter - native/sea/README.md (new) — explains the move; points readers at databricks-sql-kernel/napi/ What's updated: - package.json: `build:native` and `build:native:debug` scripts now delegate to the kernel workspace via $DATABRICKS_SQL_KERNEL_REPO (defaults to ../../databricks-sql-kernel-sea-WT/napi-binding for the local dev worktree layout). Build copies index.node + index.d.ts back into native/sea/ for the loader to find. Why workspace co-location: - Arrow version pinning lockstep — no silent IPC version drift - path = ".." (clean) vs ../../../../databricks-sql-kernel-sea-WT/... - Single CI: cargo build --workspace covers kernel + pyo3 + napi - Kernel API changes that break either binding caught at PR-review time - Future cgo binding for Go SEA slots in as another workspace member This branch (sea-napi-binding) is now a thin consumer of the kernel napi crate. The actual Rust code lives at krn-napi-binding HEAD on the kernel repo (commit debe3d7). --- native/sea/.gitignore | 7 - native/sea/Cargo.toml | 64 ------- native/sea/README.md | 41 +++++ native/sea/build.rs | 17 -- native/sea/index.js | 318 ----------------------------------- native/sea/package.json | 23 --- native/sea/src/connection.rs | 166 ------------------ native/sea/src/database.rs | 90 ---------- native/sea/src/error.rs | 153 ----------------- native/sea/src/lib.rs | 44 ----- native/sea/src/logger.rs | 17 -- native/sea/src/result.rs | 36 ---- native/sea/src/runtime.rs | 56 ------ native/sea/src/statement.rs | 212 ----------------------- native/sea/src/util.rs | 62 ------- package.json | 6 +- 16 files changed, 44 insertions(+), 1268 deletions(-) delete mode 100644 native/sea/.gitignore delete mode 100644 native/sea/Cargo.toml create mode 100644 native/sea/README.md delete mode 100644 native/sea/build.rs delete mode 100644 native/sea/index.js delete mode 100644 native/sea/package.json delete mode 100644 native/sea/src/connection.rs delete mode 100644 native/sea/src/database.rs delete mode 100644 native/sea/src/error.rs delete mode 100644 native/sea/src/lib.rs delete mode 100644 native/sea/src/logger.rs delete mode 100644 native/sea/src/result.rs delete mode 100644 native/sea/src/runtime.rs delete mode 100644 native/sea/src/statement.rs delete mode 100644 native/sea/src/util.rs diff --git a/native/sea/.gitignore b/native/sea/.gitignore deleted file mode 100644 index 92ba58de..00000000 --- a/native/sea/.gitignore +++ /dev/null @@ -1,7 +0,0 @@ -# Rust build artifacts -target/ -Cargo.lock - -# Platform-specific `.node` binaries are produced per-platform by the -# bundling feature; not committed. -*.node diff --git a/native/sea/Cargo.toml b/native/sea/Cargo.toml deleted file mode 100644 index c001e04b..00000000 --- a/native/sea/Cargo.toml +++ /dev/null @@ -1,64 +0,0 @@ -# Copyright (c) 2026 Databricks, Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -[package] -name = "databricks-sea-native" -version = "0.1.0" -edition = "2021" -authors = ["Databricks"] -license = "Apache-2.0" -description = "Databricks SQL Node.js SEA native binding (napi-rs)" -publish = false - -[lib] -crate-type = ["cdylib"] - -[dependencies] -# napi-rs v2 line; `napi6` enables N-API 6 surface, `async` enables the -# `#[napi] async fn` glue that drives futures on napi-rs's tokio runtime. -napi = { version = "2", default-features = false, features = ["napi6", "async"] } -napi-derive = "2" - -# Kernel — path dep on the async-public-api branch worktree. Once the -# kernel is published this becomes a version dep. -databricks-sql-kernel = { path = "../../../../databricks-sql-kernel-sea-WT/async-public-api" } - -# Tokio is a transitive dep via the kernel and via napi's `async` feature; -# declared explicitly so we can name `tokio::runtime::Handle` and -# `tokio::sync::Mutex` directly. -tokio = { version = "1", default-features = false, features = ["rt", "sync"] } - -# Lazy `OnceCell` for the captured tokio Handle. -once_cell = "1" - -# `catch_unwind` wrapper around async futures (pattern #8 of the -# napi-rs patterns doc). Transitively a dep of the kernel already, but -# declared here so we can `use FutureExt;` directly. -futures = { version = "0.3", default-features = false, features = ["std"] } - -# Arrow IPC encoding of result batches across the napi boundary. -# `arrow-array` / `arrow-schema` come in via the kernel's public types -# (`RecordBatch`, `SchemaRef`); `arrow-ipc` is for the `StreamWriter` -# we use on the encode side. Versions kept in lock-step with the -# kernel's `arrow-*` deps to avoid two arrow versions in the dep graph. -arrow-array = "57" -arrow-schema = "57" -arrow-ipc = "57" - -[build-dependencies] -napi-build = "2" - -[profile.release] -lto = true -strip = "symbols" diff --git a/native/sea/README.md b/native/sea/README.md new file mode 100644 index 00000000..5efab5c3 --- /dev/null +++ b/native/sea/README.md @@ -0,0 +1,41 @@ +# `native/sea/` — consumer-side directory for the Rust napi binding + +**The Rust binding source lives in the kernel repo** at +`databricks-sql-kernel/napi/`, as a workspace sibling of `pyo3/`. +See `databricks-sql-kernel`'s root `Cargo.toml` `[workspace] members`. + +## Why + +Per the architectural decision recorded in +`sea-workflow/decisions.md` (D-006), every language binding (PyO3, +napi-rs, future cgo) is a workspace member of the kernel crate. This +keeps Arrow version pinning lockstep, the path dep clean (`path = ".."`), +and CI single (`cargo build --workspace`). The pattern matches polars, +ruff, arrow-rs. + +## What lives here + +- `index.d.ts` — generated TypeScript declarations consumed by `lib/sea/` +- `index.linux-x64-gnu.node` (and other platform variants) — symlinked + or copied build artifacts from the kernel workspace at run time + +## How to build the binding for local dev + +```bash +# From the nodejs repo root: +npm run build:native +# which delegates to the kernel workspace: +# cd $DATABRICKS_SQL_KERNEL_REPO/napi && napi build --release +# and copies the artifact back here +``` + +`$DATABRICKS_SQL_KERNEL_REPO` defaults to a path published with the +release flow; for dev it points at a local checkout of +`databricks-sql-kernel`. + +## How to consume in production + +At release time the kernel CI publishes `@databricks/sea-native-` +npm packages with the `.node` binaries. The nodejs driver declares them +as `optionalDependencies` in `package.json`; `SeaNativeLoader.ts` +resolves the right one at runtime. diff --git a/native/sea/build.rs b/native/sea/build.rs deleted file mode 100644 index 398bb2da..00000000 --- a/native/sea/build.rs +++ /dev/null @@ -1,17 +0,0 @@ -// Copyright (c) 2026 Databricks, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -fn main() { - napi_build::setup(); -} diff --git a/native/sea/index.js b/native/sea/index.js deleted file mode 100644 index c7551305..00000000 --- a/native/sea/index.js +++ /dev/null @@ -1,318 +0,0 @@ -/* tslint:disable */ -/* eslint-disable */ -/* prettier-ignore */ - -/* auto-generated by NAPI-RS */ - -const { existsSync, readFileSync } = require('fs') -const { join } = require('path') - -const { platform, arch } = process - -let nativeBinding = null -let localFileExisted = false -let loadError = null - -function isMusl() { - // For Node 10 - if (!process.report || typeof process.report.getReport !== 'function') { - try { - const lddPath = require('child_process').execSync('which ldd').toString().trim() - return readFileSync(lddPath, 'utf8').includes('musl') - } catch (e) { - return true - } - } else { - const { glibcVersionRuntime } = process.report.getReport().header - return !glibcVersionRuntime - } -} - -switch (platform) { - case 'android': - switch (arch) { - case 'arm64': - localFileExisted = existsSync(join(__dirname, 'index.android-arm64.node')) - try { - if (localFileExisted) { - nativeBinding = require('./index.android-arm64.node') - } else { - nativeBinding = require('@databricks/sea-native-linux-x64-gnu-android-arm64') - } - } catch (e) { - loadError = e - } - break - case 'arm': - localFileExisted = existsSync(join(__dirname, 'index.android-arm-eabi.node')) - try { - if (localFileExisted) { - nativeBinding = require('./index.android-arm-eabi.node') - } else { - nativeBinding = require('@databricks/sea-native-linux-x64-gnu-android-arm-eabi') - } - } catch (e) { - loadError = e - } - break - default: - throw new Error(`Unsupported architecture on Android ${arch}`) - } - break - case 'win32': - switch (arch) { - case 'x64': - localFileExisted = existsSync( - join(__dirname, 'index.win32-x64-msvc.node') - ) - try { - if (localFileExisted) { - nativeBinding = require('./index.win32-x64-msvc.node') - } else { - nativeBinding = require('@databricks/sea-native-linux-x64-gnu-win32-x64-msvc') - } - } catch (e) { - loadError = e - } - break - case 'ia32': - localFileExisted = existsSync( - join(__dirname, 'index.win32-ia32-msvc.node') - ) - try { - if (localFileExisted) { - nativeBinding = require('./index.win32-ia32-msvc.node') - } else { - nativeBinding = require('@databricks/sea-native-linux-x64-gnu-win32-ia32-msvc') - } - } catch (e) { - loadError = e - } - break - case 'arm64': - localFileExisted = existsSync( - join(__dirname, 'index.win32-arm64-msvc.node') - ) - try { - if (localFileExisted) { - nativeBinding = require('./index.win32-arm64-msvc.node') - } else { - nativeBinding = require('@databricks/sea-native-linux-x64-gnu-win32-arm64-msvc') - } - } catch (e) { - loadError = e - } - break - default: - throw new Error(`Unsupported architecture on Windows: ${arch}`) - } - break - case 'darwin': - localFileExisted = existsSync(join(__dirname, 'index.darwin-universal.node')) - try { - if (localFileExisted) { - nativeBinding = require('./index.darwin-universal.node') - } else { - nativeBinding = require('@databricks/sea-native-linux-x64-gnu-darwin-universal') - } - break - } catch {} - switch (arch) { - case 'x64': - localFileExisted = existsSync(join(__dirname, 'index.darwin-x64.node')) - try { - if (localFileExisted) { - nativeBinding = require('./index.darwin-x64.node') - } else { - nativeBinding = require('@databricks/sea-native-linux-x64-gnu-darwin-x64') - } - } catch (e) { - loadError = e - } - break - case 'arm64': - localFileExisted = existsSync( - join(__dirname, 'index.darwin-arm64.node') - ) - try { - if (localFileExisted) { - nativeBinding = require('./index.darwin-arm64.node') - } else { - nativeBinding = require('@databricks/sea-native-linux-x64-gnu-darwin-arm64') - } - } catch (e) { - loadError = e - } - break - default: - throw new Error(`Unsupported architecture on macOS: ${arch}`) - } - break - case 'freebsd': - if (arch !== 'x64') { - throw new Error(`Unsupported architecture on FreeBSD: ${arch}`) - } - localFileExisted = existsSync(join(__dirname, 'index.freebsd-x64.node')) - try { - if (localFileExisted) { - nativeBinding = require('./index.freebsd-x64.node') - } else { - nativeBinding = require('@databricks/sea-native-linux-x64-gnu-freebsd-x64') - } - } catch (e) { - loadError = e - } - break - case 'linux': - switch (arch) { - case 'x64': - if (isMusl()) { - localFileExisted = existsSync( - join(__dirname, 'index.linux-x64-musl.node') - ) - try { - if (localFileExisted) { - nativeBinding = require('./index.linux-x64-musl.node') - } else { - nativeBinding = require('@databricks/sea-native-linux-x64-gnu-linux-x64-musl') - } - } catch (e) { - loadError = e - } - } else { - localFileExisted = existsSync( - join(__dirname, 'index.linux-x64-gnu.node') - ) - try { - if (localFileExisted) { - nativeBinding = require('./index.linux-x64-gnu.node') - } else { - nativeBinding = require('@databricks/sea-native-linux-x64-gnu-linux-x64-gnu') - } - } catch (e) { - loadError = e - } - } - break - case 'arm64': - if (isMusl()) { - localFileExisted = existsSync( - join(__dirname, 'index.linux-arm64-musl.node') - ) - try { - if (localFileExisted) { - nativeBinding = require('./index.linux-arm64-musl.node') - } else { - nativeBinding = require('@databricks/sea-native-linux-x64-gnu-linux-arm64-musl') - } - } catch (e) { - loadError = e - } - } else { - localFileExisted = existsSync( - join(__dirname, 'index.linux-arm64-gnu.node') - ) - try { - if (localFileExisted) { - nativeBinding = require('./index.linux-arm64-gnu.node') - } else { - nativeBinding = require('@databricks/sea-native-linux-x64-gnu-linux-arm64-gnu') - } - } catch (e) { - loadError = e - } - } - break - case 'arm': - if (isMusl()) { - localFileExisted = existsSync( - join(__dirname, 'index.linux-arm-musleabihf.node') - ) - try { - if (localFileExisted) { - nativeBinding = require('./index.linux-arm-musleabihf.node') - } else { - nativeBinding = require('@databricks/sea-native-linux-x64-gnu-linux-arm-musleabihf') - } - } catch (e) { - loadError = e - } - } else { - localFileExisted = existsSync( - join(__dirname, 'index.linux-arm-gnueabihf.node') - ) - try { - if (localFileExisted) { - nativeBinding = require('./index.linux-arm-gnueabihf.node') - } else { - nativeBinding = require('@databricks/sea-native-linux-x64-gnu-linux-arm-gnueabihf') - } - } catch (e) { - loadError = e - } - } - break - case 'riscv64': - if (isMusl()) { - localFileExisted = existsSync( - join(__dirname, 'index.linux-riscv64-musl.node') - ) - try { - if (localFileExisted) { - nativeBinding = require('./index.linux-riscv64-musl.node') - } else { - nativeBinding = require('@databricks/sea-native-linux-x64-gnu-linux-riscv64-musl') - } - } catch (e) { - loadError = e - } - } else { - localFileExisted = existsSync( - join(__dirname, 'index.linux-riscv64-gnu.node') - ) - try { - if (localFileExisted) { - nativeBinding = require('./index.linux-riscv64-gnu.node') - } else { - nativeBinding = require('@databricks/sea-native-linux-x64-gnu-linux-riscv64-gnu') - } - } catch (e) { - loadError = e - } - } - break - case 's390x': - localFileExisted = existsSync( - join(__dirname, 'index.linux-s390x-gnu.node') - ) - try { - if (localFileExisted) { - nativeBinding = require('./index.linux-s390x-gnu.node') - } else { - nativeBinding = require('@databricks/sea-native-linux-x64-gnu-linux-s390x-gnu') - } - } catch (e) { - loadError = e - } - break - default: - throw new Error(`Unsupported architecture on Linux: ${arch}`) - } - break - default: - throw new Error(`Unsupported OS: ${platform}, architecture: ${arch}`) -} - -if (!nativeBinding) { - if (loadError) { - throw loadError - } - throw new Error(`Failed to load native binding`) -} - -const { Connection, openSession, Statement, version } = nativeBinding - -module.exports.Connection = Connection -module.exports.openSession = openSession -module.exports.Statement = Statement -module.exports.version = version diff --git a/native/sea/package.json b/native/sea/package.json deleted file mode 100644 index 96d116dd..00000000 --- a/native/sea/package.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "name": "@databricks/sea-native-linux-x64-gnu", - "version": "0.1.0", - "description": "Databricks SQL Node.js SEA native binding (linux-x64-gnu).", - "main": "index.js", - "types": "index.d.ts", - "files": [ - "index.js", - "index.d.ts", - "*.node" - ], - "license": "Apache-2.0", - "engines": { - "node": ">=14.0.0" - }, - "napi": { - "binaryName": "sea-native", - "targets": [ - "x86_64-unknown-linux-gnu" - ] - }, - "private": true -} diff --git a/native/sea/src/connection.rs b/native/sea/src/connection.rs deleted file mode 100644 index 4afbd724..00000000 --- a/native/sea/src/connection.rs +++ /dev/null @@ -1,166 +0,0 @@ -// Copyright (c) 2026 Databricks, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//! Opaque `Connection` wrapper around the kernel's `Session`. -//! -//! The kernel collapses ADBC's `Database` + `Connection` into a single -//! `Session`. We keep the wrapper name `Connection` on the JS side because -//! that matches the existing Node driver's mental model. -//! -//! M0 surface (Round 2): -//! - `Connection.executeStatement(sql, options)` — builds a kernel -//! `Statement`, sets the spec, awaits `execute()`, wraps the result -//! in a JS-visible `Statement` opaque handle. -//! - `Connection.close()` — explicit async close. Drop schedules a -//! fire-and-forget close on the captured runtime handle if explicit -//! close was never called. - -use std::collections::HashMap; -use std::sync::Arc; -use tokio::sync::Mutex; - -use databricks_sql_kernel::Session; - -use crate::error::napi_err_from_kernel; -use crate::runtime; -use crate::statement::Statement; -use crate::util::guarded; - -/// JS-visible per-execute options. M0 only carries -/// initialCatalog / initialSchema / sessionConfig — parameters and -/// per-statement overrides land in M1. -#[napi(object)] -pub struct ExecuteOptions { - /// Default catalog applied to this statement via session conf. - pub initial_catalog: Option, - /// Default schema applied to this statement via session conf. - pub initial_schema: Option, - /// Per-statement session conf overrides (forwarded to SEA - /// `parameters` / Thrift `confOverlay`). - pub session_config: Option>, -} - -/// Opaque connection handle wrapping a kernel `Session`. -/// -/// `inner` is `Arc>>` so: -/// - the Drop impl can clone the `Arc` and `.take()` the session on a -/// background tokio task without holding `&mut self` (which Drop is -/// forbidden from doing across an `await`), -/// - `executeStatement` can share immutable access to the session via -/// the `Arc` clones the kernel makes internally -/// (`Session::statement()` only needs `&self`). -#[napi] -pub struct Connection { - pub(crate) inner: Arc>>, -} - -#[napi] -impl Connection { - /// Execute a SQL statement and return a Statement handle that - /// streams batches via `fetchNextBatch()`. - #[napi] - pub async fn execute_statement( - &self, - sql: String, - options: ExecuteOptions, - ) -> napi::Result { - let inner = Arc::clone(&self.inner); - guarded(async move { - let guard = inner.lock().await; - let session = guard.as_ref().ok_or_else(|| { - napi::Error::new(napi::Status::InvalidArg, "connection already closed") - })?; - - // Build a per-statement spec on the kernel's mutable - // Statement. Session conf overrides surface through the - // statement_conf overlay; M0 has no parameter binding. - let mut stmt = session.statement(); - stmt.spec().sql(sql); - - let mut overlay: HashMap = - options.session_config.unwrap_or_default(); - if let Some(catalog) = options.initial_catalog { - overlay.insert("default_catalog".to_string(), catalog); - } - if let Some(schema) = options.initial_schema { - overlay.insert("default_schema".to_string(), schema); - } - if !overlay.is_empty() { - stmt.spec().statement_conf(overlay); - } - - let executed = stmt.execute().await.map_err(napi_err_from_kernel)?; - Ok(Statement::from_executed(executed)) - }) - .await - } - - /// Explicit close. Marks the connection wrapper as closed so - /// subsequent calls on this `Connection` return `InvalidArg`, then - /// schedules a fire-and-forget server-side close on the runtime. - /// - /// **Why fire-and-forget and not `Session::close().await`:** the - /// kernel's `Session::close(self).await` body holds a - /// `tracing::EnteredSpan` (a `!Send` type) across an `.await`, so - /// the future is not `Send`. napi-rs's `execute_tokio_future` glue - /// rejects non-`Send` futures, and `Handle::spawn` does too. The - /// kernel's `SessionInner::Drop` already spawns the - /// `delete_session` RPC on the same runtime handle the napi - /// binding captured, so dropping the value is functionally - /// equivalent — the difference is that JS callers can't observe a - /// `delete_session` failure from `close()`. Tracked as a kernel- - /// side follow-up (clone the span rather than entering it) in - /// Round 3 findings. - #[napi] - pub async fn close(&self) -> napi::Result<()> { - let inner = Arc::clone(&self.inner); - guarded(async move { - let _taken = { - let mut guard = inner.lock().await; - guard.take() - }; - // `_taken` drops here. Kernel's `SessionInner::Drop` - // spawns `delete_session` on its captured handle. - Ok(()) - }) - .await - } -} - -impl Drop for Connection { - fn drop(&mut self) { - // Fire-and-forget close on the captured runtime. If `close()` - // was already called, `inner` holds `None` and the spawned - // task is a trivial no-op. - let Some(handle) = runtime::try_get_handle() else { - // No async entry point ever ran — there's nothing to close. - return; - }; - let inner = Arc::clone(&self.inner); - handle.spawn(async move { - // Drop the session value on the runtime. The kernel's - // `SessionInner::Drop` already spawns a fire-and-forget - // `delete_session` against its own captured handle. We do - // NOT call `Session::close().await` here because that - // method holds a `tracing::EnteredSpan` (`!Send`) across - // its body, which would conflict with `Handle::spawn`'s - // `Send` bound on the future. - let _taken = { - let mut guard = inner.lock().await; - guard.take() - }; - // `_taken` drops here; kernel's SessionInner::Drop fires. - }); - } -} diff --git a/native/sea/src/database.rs b/native/sea/src/database.rs deleted file mode 100644 index 7f86760e..00000000 --- a/native/sea/src/database.rs +++ /dev/null @@ -1,90 +0,0 @@ -// Copyright (c) 2026 Databricks, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//! `openSession()` — the binding's session-construction entry point. -//! -//! The kernel collapses ADBC's `Database` + `Connection` into a single -//! `Session`. The TS adapter layer reconstructs a `DBSQLClient` / -//! `Database` wrapper on top of this binding, so the napi surface itself -//! stays flat: one free function, one opaque `Connection` class. -//! -//! Rationale for a free function over a static class method: -//! - napi-rs v2's static-method codegen for async functions returning a -//! `#[napi]` struct is fragile — the runtime registration sometimes -//! omits the method from the class object. Free `#[napi]` functions -//! go through a different, more stable codegen path. -//! - There is no kernel-side `Database` state to wrap; everything -//! meaningful lives on `Session`. A wrapper class with no fields adds -//! a JS object allocation per session for no benefit. - -use std::sync::Arc; -use tokio::sync::Mutex; - -use databricks_sql_kernel::{AuthConfig, Session}; - -use crate::connection::Connection; -use crate::error::napi_err_from_kernel; -use crate::runtime; -use crate::util::guarded; - -/// JS-visible options for opening a Databricks SQL session over PAT. -/// -/// M0 supports PAT only — `token` is required. OAuth M2M / U2M variants -/// land in M1 along with a discriminated-union shape on the JS side. -#[napi(object)] -pub struct ConnectionOptions { - /// Workspace host, e.g. `adb-…azuredatabricks.net`. The kernel - /// normalises this — bare hostnames get `https://` prepended. - pub host_name: String, - /// JDBC-style HTTP path, e.g. `/sql/1.0/warehouses/abc123`. The - /// kernel parses out the warehouse id. - pub http_path: String, - /// Personal access token. Must be non-empty (the kernel rejects - /// empty PATs at session construction). - pub token: String, -} - -/// Open a Databricks SQL session over PAT auth and return an opaque -/// `Connection` wrapping the kernel `Session`. -/// -/// The JS-visible name is `openSession` (napi-rs converts snake_case -/// to camelCase for free functions). -#[napi] -pub async fn open_session(options: ConnectionOptions) -> napi::Result { - guarded(async move { - // Cache the napi-rs tokio Handle on the very first async call - // so Drop impls (which run on the V8 GC thread, outside any - // tokio context) can still `spawn` cleanup tasks onto the - // runtime that's driving this future. - let _ = runtime::get_handle(); - - // SessionConfig is `#[non_exhaustive]` — go through the - // builder, which is the only public path that constructs it. - // `http_path()` is the convenience setter that maps a bare - // hostname + `/sql/1.0/warehouses/{id}` path into the kernel's - // `ConnectionConfig`. - let session = Session::builder() - .http_path(options.host_name, options.http_path) - .auth(AuthConfig::Pat { - token: options.token, - }) - .open() - .await - .map_err(napi_err_from_kernel)?; - Ok(Connection { - inner: Arc::new(Mutex::new(Some(session))), - }) - }) - .await -} diff --git a/native/sea/src/error.rs b/native/sea/src/error.rs deleted file mode 100644 index d06e1600..00000000 --- a/native/sea/src/error.rs +++ /dev/null @@ -1,153 +0,0 @@ -// Copyright (c) 2026 Databricks, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//! Kernel-error → `napi::Error` mapping. -//! -//! The kernel returns a richly-typed [`Error`](databricks_sql_kernel::Error) -//! with `code`, `sql_state`, `error_code`, `vendor_code`, `http_status`, -//! `retryable`, and `query_id` fields. The napi `Error` type only -//! carries `status` + `reason` directly — to attach the extra fields -//! as own-properties on the JS error object we'd need an `Env` -//! reference, which `#[napi] async fn` bodies don't have access to -//! cheaply. -//! -//! Compromise (one helper, DRY): encode the structured metadata into -//! the `reason` field as a JSON envelope prefixed with a sentinel -//! `__databricks_error__:` token. The TS adapter detects the sentinel, -//! parses the payload, and reconstructs the typed error class -//! (`DBSQLError`, `AuthError`, …). Plain-string errors from the -//! binding's own code paths fall through the sentinel detection -//! unchanged. -//! -//! Round 3 may switch to the `Env::create_error` + own-properties -//! pattern once we have a stable point in each entry where `env: Env` -//! is available (likely by wrapping the async glue in a sync entry -//! point that calls `tokio::spawn` after capturing `env`). - -use databricks_sql_kernel::{Error as KernelError, ErrorCode}; -use napi::{Error as NapiError, Status}; - -/// Sentinel that tells the TS adapter the `reason` string is a JSON -/// envelope rather than a plain message. Has to be ASCII-only so it -/// survives any `String` round-trip the napi layer might do. -pub(crate) const ERROR_SENTINEL: &str = "__databricks_error__:"; - -/// Map a kernel [`Error`] into a `napi::Error`. Preserves the kernel -/// `ErrorCode` (mapped to the closest napi `Status`), and stuffs the -/// remaining structured fields into a JSON envelope on the reason so -/// the TS layer can reconstruct the typed error class. -pub(crate) fn napi_err_from_kernel(e: KernelError) -> NapiError { - let status = status_from_kernel_code(e.code); - - // Build a minimal JSON envelope. We hand-build it (no serde_json - // dep) — the field set is small and fixed, and avoiding serde - // keeps the crate dep graph trim. - let mut envelope = String::with_capacity(e.message.len() + 128); - envelope.push_str(ERROR_SENTINEL); - envelope.push('{'); - push_json_str_field(&mut envelope, "code", error_code_str(e.code)); - envelope.push(','); - push_json_str_field(&mut envelope, "message", &e.message); - if let Some(s) = &e.sql_state { - envelope.push(','); - push_json_str_field(&mut envelope, "sqlState", s); - } - if let Some(ec) = &e.error_code { - envelope.push(','); - push_json_str_field(&mut envelope, "errorCode", ec); - } - if let Some(vc) = e.vendor_code { - envelope.push(','); - envelope.push_str("\"vendorCode\":"); - envelope.push_str(&vc.to_string()); - } - if let Some(hs) = e.http_status { - envelope.push(','); - envelope.push_str("\"httpStatus\":"); - envelope.push_str(&hs.to_string()); - } - if e.retryable { - envelope.push_str(",\"retryable\":true"); - } - if let Some(qid) = &e.query_id { - envelope.push(','); - push_json_str_field(&mut envelope, "queryId", qid); - } - envelope.push('}'); - - NapiError::new(status, envelope) -} - -/// Map kernel `ErrorCode` → napi `Status`. The status is mostly -/// cosmetic on the napi side (the TS layer dispatches on `code` from -/// the envelope); we pick the closest match so unwrapped errors still -/// look reasonable in raw napi consumers. -fn status_from_kernel_code(code: ErrorCode) -> Status { - match code { - ErrorCode::InvalidArgument | ErrorCode::InvalidStatementHandle => Status::InvalidArg, - ErrorCode::Cancelled => Status::Cancelled, - _ => Status::GenericFailure, - } -} - -/// String tag for each kernel `ErrorCode` — stable across kernel -/// versions because v0's `ErrorCode` is `#[non_exhaustive]` and we -/// pattern-match exhaustively against the known set. -fn error_code_str(code: ErrorCode) -> &'static str { - match code { - ErrorCode::InvalidArgument => "InvalidArgument", - ErrorCode::Unauthenticated => "Unauthenticated", - ErrorCode::PermissionDenied => "PermissionDenied", - ErrorCode::NotFound => "NotFound", - ErrorCode::ResourceExhausted => "ResourceExhausted", - ErrorCode::Unavailable => "Unavailable", - ErrorCode::Timeout => "Timeout", - ErrorCode::Cancelled => "Cancelled", - ErrorCode::DataLoss => "DataLoss", - ErrorCode::Internal => "Internal", - ErrorCode::InvalidStatementHandle => "InvalidStatementHandle", - ErrorCode::NetworkError => "NetworkError", - ErrorCode::SqlError => "SqlError", - // Forward-compat: ErrorCode is `#[non_exhaustive]`. Any new - // variant the kernel adds in v0.x lands here until we mirror - // it in this match. The TS layer treats Unknown as a generic - // failure. - _ => "Unknown", - } -} - -/// Append `"key":"value"` to the JSON buffer, escaping the value's -/// `"` and `\` characters and control chars to keep the envelope -/// JSON-parseable. The narrow set of escapes is sufficient for the -/// human-readable error messages the kernel produces (no embedded -/// binary blobs, no Unicode surrogate pairs). -fn push_json_str_field(out: &mut String, key: &str, value: &str) { - out.push('"'); - out.push_str(key); - out.push_str("\":\""); - for ch in value.chars() { - match ch { - '"' => out.push_str("\\\""), - '\\' => out.push_str("\\\\"), - '\n' => out.push_str("\\n"), - '\r' => out.push_str("\\r"), - '\t' => out.push_str("\\t"), - c if (c as u32) < 0x20 => { - out.push_str(&format!("\\u{:04x}", c as u32)); - } - c => out.push(c), - } - } - out.push('"'); -} diff --git a/native/sea/src/lib.rs b/native/sea/src/lib.rs deleted file mode 100644 index 6de102ea..00000000 --- a/native/sea/src/lib.rs +++ /dev/null @@ -1,44 +0,0 @@ -// Copyright (c) 2026 Databricks, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//! `databricks-sea-native` — napi-rs binding crate for the Databricks -//! SQL Node.js driver's SEA (Statement Execution API) path. -//! -//! Round 2 surface: `Database.open` → `Connection.execute_statement` -//! → `Statement.fetch_next_batch` / `schema` / `cancel` / `close`. -//! Results cross the FFI as Arrow IPC bytes (see `result.rs`); the -//! TS adapter decodes them via `apache-arrow`. - -#![deny(unsafe_op_in_unsafe_fn)] - -#[macro_use] -extern crate napi_derive; - -pub(crate) mod connection; -pub(crate) mod database; -pub(crate) mod error; -pub(crate) mod logger; -pub(crate) mod result; -pub(crate) mod runtime; -pub(crate) mod statement; -pub(crate) mod util; - -/// Returns the native binding's crate version (`CARGO_PKG_VERSION`). -/// -/// Originally the round-1b smoke test; kept as a cheap "is the binding -/// loaded?" probe for the JS-side loader's structured diagnostics. -#[napi] -pub fn version() -> String { - env!("CARGO_PKG_VERSION").to_string() -} diff --git a/native/sea/src/logger.rs b/native/sea/src/logger.rs deleted file mode 100644 index 2bfcd078..00000000 --- a/native/sea/src/logger.rs +++ /dev/null @@ -1,17 +0,0 @@ -// Copyright (c) 2026 Databricks, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//! `tracing` → JS `DBSQLLogger` bridge via `ThreadsafeFunction`. -//! -//! Round 3 work. Empty in Round 1b. diff --git a/native/sea/src/result.rs b/native/sea/src/result.rs deleted file mode 100644 index 488c0851..00000000 --- a/native/sea/src/result.rs +++ /dev/null @@ -1,36 +0,0 @@ -// Copyright (c) 2026 Databricks, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//! Arrow IPC payload types crossed across the napi boundary. -//! -//! Per sea-design.md Layer 2: "The binding ships the batch across the -//! FFI as Arrow IPC bytes. The adapter converts those bytes into -//! JavaScript rows…" — so the napi boundary is intentionally narrow: -//! one envelope per batch, one envelope per schema. - -use napi::bindgen_prelude::Buffer; - -/// A single Arrow IPC stream payload encoding one record batch (plus -/// the schema header so the JS-side reader is stateless). -#[napi(object)] -pub struct ArrowBatch { - pub ipc_bytes: Buffer, -} - -/// An Arrow IPC stream payload encoding just the result schema (no -/// record-batch messages). Returned by `Statement.schema()`. -#[napi(object)] -pub struct ArrowSchema { - pub ipc_bytes: Buffer, -} diff --git a/native/sea/src/runtime.rs b/native/sea/src/runtime.rs deleted file mode 100644 index 7f0ee42d..00000000 --- a/native/sea/src/runtime.rs +++ /dev/null @@ -1,56 +0,0 @@ -// Copyright (c) 2026 Databricks, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//! Captured tokio `Handle` for napi-rs's process-global runtime. -//! -//! Per the napi-rs patterns doc (pattern #2): the first time any -//! `#[napi] async fn` runs, we are guaranteed to be on napi-rs's tokio -//! runtime. We snapshot the current `Handle` then and stash a clone in -//! a process-static `OnceCell`. Every subsequent kernel construction -//! reads the captured handle and hands a clone to the kernel, so -//! Drop-time cleanup (which runs on the V8 GC thread, *outside* any -//! tokio context) can still `spawn` cleanup tasks onto the same -//! runtime napi-rs is driving. -//! -//! `Handle::current()` MUST NOT be called from a synchronous JS-thread -//! entry point or from module init — both run before napi-rs has -//! constructed its runtime and would panic. `get()` returns `None` in -//! that case so callers can surface a useful error rather than abort. - -use once_cell::sync::OnceCell; -use tokio::runtime::Handle; - -static RUNTIME_HANDLE: OnceCell = OnceCell::new(); - -/// Capture the current tokio runtime handle on first call, return a -/// reference to the captured clone on subsequent calls. -/// -/// MUST be called from inside a `#[napi] async fn` body (or any other -/// tokio runtime context); otherwise `Handle::current()` panics on the -/// very first call. Subsequent calls are infallible and lock-free. -/// -/// Round 1b has no async entry points that exercise this yet; Round 2 -/// will call it from `Database::open()` and other `#[napi] async fn`s. -#[allow(dead_code)] -pub(crate) fn get_handle() -> &'static Handle { - RUNTIME_HANDLE.get_or_init(Handle::current) -} - -/// Non-panicking accessor — returns `None` if `get_handle()` has not -/// been called yet. Drop impls and other GC-thread call sites use this -/// to short-circuit cleanup when no async entry point has ever run -/// (i.e. there is no kernel state that needs closing either). -pub(crate) fn try_get_handle() -> Option<&'static Handle> { - RUNTIME_HANDLE.get() -} diff --git a/native/sea/src/statement.rs b/native/sea/src/statement.rs deleted file mode 100644 index 6d7b8761..00000000 --- a/native/sea/src/statement.rs +++ /dev/null @@ -1,212 +0,0 @@ -// Copyright (c) 2026 Databricks, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//! Opaque `Statement` wrapper around the kernel's `ExecutedStatement`. -//! -//! M0 surface (Round 2): -//! - `Statement.fetchNextBatch() -> Option` — drives -//! `ResultStream::next_batch().await`, serialises the borrowed -//! `RecordBatch` to Arrow IPC bytes, returns them to JS. -//! - `Statement.schema() -> ArrowSchema` — returns the cached schema -//! from the kernel side, serialised as a schema-only IPC payload. -//! - `Statement.cancel()` / `Statement.close()` — forwards to -//! `ExecutedStatement::cancel/close` via the -//! `ExecutedStatementHandle` trait. Drop fires-and-forgets close -//! if not already explicitly closed. - -use std::sync::Arc; -use tokio::sync::Mutex; - -use arrow_ipc::writer::StreamWriter; -use databricks_sql_kernel::{ExecutedStatement, ExecutedStatementHandle, ResultBatch}; - -use crate::error::napi_err_from_kernel; -use crate::result::{ArrowBatch, ArrowSchema}; -use crate::runtime; -use crate::util::guarded; - -/// Opaque executed-statement handle. -/// -/// `inner` is wrapped in `Arc>>` so: -/// - `fetch_next_batch` can `await` `ResultStream::next_batch` which -/// requires `&mut ExecutedStatement` (via `result_stream_mut`), -/// - `cancel` / `close` (which take `&self` on the kernel side via the -/// `ExecutedStatementHandle` trait) can run concurrently with each -/// other from a JS perspective without panicking, -/// - `Drop` can hand the inner handle off to a tokio task without -/// touching `&mut self` across an `await`. -#[napi] -pub struct Statement { - inner: Arc>>, -} - -impl Statement { - /// Crate-internal constructor — called from - /// `Connection::execute_statement` once the kernel hands back the - /// `ExecutedStatement`. - pub(crate) fn from_executed(executed: ExecutedStatement) -> Self { - Self { - inner: Arc::new(Mutex::new(Some(executed))), - } - } -} - -#[napi] -impl Statement { - /// Pull the next batch of results. Returns `None` when the stream - /// is exhausted. The returned `ArrowBatch.ipcBytes` is a complete - /// Arrow IPC stream (schema header + 1 record-batch message) - /// suitable for handing to `apache-arrow`'s `RecordBatchReader`. - #[napi] - pub async fn fetch_next_batch(&self) -> napi::Result> { - let inner = Arc::clone(&self.inner); - guarded(async move { - let mut guard = inner.lock().await; - let executed = guard.as_mut().ok_or_else(|| { - napi::Error::new(napi::Status::InvalidArg, "statement already closed") - })?; - - let stream = executed.result_stream_mut(); - // Capture the schema before borrowing the next batch — we - // include the schema header in every IPC payload so the - // JS-side consumer can decode each batch independently - // without carrying state across calls. - let schema = stream.schema(); - let maybe_batch = stream.next_batch().await.map_err(napi_err_from_kernel)?; - let Some(batch) = maybe_batch else { - return Ok(None); - }; - // `ResultBatch` is `#[non_exhaustive]`; v0 only ever - // yields `Arrow`. The error arm exists for forward - // compat — v1+ may add ColumnarThrift / JsonRows / etc., - // and we want the binding to surface that as a typed - // error rather than silently misbehaving. - let record_batch = match batch { - ResultBatch::Arrow(rb) => rb, - _ => { - return Err(napi::Error::new( - napi::Status::GenericFailure, - "non-Arrow ResultBatch variant — binding needs upgrade", - )); - } - }; - let bytes = encode_ipc_stream(&schema, Some(record_batch))?; - Ok(Some(ArrowBatch { - ipc_bytes: bytes.into(), - })) - }) - .await - } - - /// Result schema as an Arrow IPC payload (schema header only, no - /// record-batch message). Available before any batches have been - /// fetched. - #[napi] - pub async fn schema(&self) -> napi::Result { - let inner = Arc::clone(&self.inner); - guarded(async move { - let guard = inner.lock().await; - let executed = guard.as_ref().ok_or_else(|| { - napi::Error::new(napi::Status::InvalidArg, "statement already closed") - })?; - let schema = executed.schema(); - let bytes = encode_ipc_stream(&schema, None)?; - Ok(ArrowSchema { - ipc_bytes: bytes.into(), - }) - }) - .await - } - - /// Server-side cancel. No-op if already finished. - #[napi] - pub async fn cancel(&self) -> napi::Result<()> { - let inner = Arc::clone(&self.inner); - guarded(async move { - let guard = inner.lock().await; - let executed = guard.as_ref().ok_or_else(|| { - napi::Error::new(napi::Status::InvalidArg, "statement already closed") - })?; - executed.cancel().await.map_err(napi_err_from_kernel) - }) - .await - } - - /// Explicit close. Awaits the server-side close so the JS caller - /// can observe failures. - #[napi] - pub async fn close(&self) -> napi::Result<()> { - let inner = Arc::clone(&self.inner); - guarded(async move { - // Take the handle out so `Drop` knows there's nothing left - // to clean up. - let executed = { - let mut guard = inner.lock().await; - guard.take() - }; - if let Some(executed) = executed { - executed.close().await.map_err(napi_err_from_kernel)?; - } - Ok(()) - }) - .await - } -} - -impl Drop for Statement { - fn drop(&mut self) { - let Some(handle) = runtime::try_get_handle() else { - return; - }; - let inner = Arc::clone(&self.inner); - handle.spawn(async move { - // Drop the executed statement on the runtime. The kernel's - // `ExecutedStatement::Drop` already spawns a fire-and-forget - // `close_statement` against its own captured handle, so we - // just need to ensure the value is dropped inside a tokio - // context (the kernel's Drop reads `runtime_handle.clone()` - // and spawns; that handle is the same one we captured here). - let _taken = { - let mut guard = inner.lock().await; - guard.take() - }; - }); - } -} - -/// Encode an Arrow schema (and optional one record batch) as an IPC -/// stream payload. Used for both `schema()` (schema only) and -/// `fetchNextBatch()` (schema + one batch). Returning a self-contained -/// IPC stream per call is wasteful header-wise but lets the JS adapter -/// stay stateless — it decodes each `ipcBytes` independently via the -/// same `apache-arrow` `RecordBatchReader` path. -fn encode_ipc_stream( - schema: &arrow_schema::SchemaRef, - batch: Option<&arrow_array::RecordBatch>, -) -> napi::Result> { - let mut buf: Vec = Vec::new(); - { - let mut writer = StreamWriter::try_new(&mut buf, schema) - .map_err(|e| napi::Error::new(napi::Status::GenericFailure, e.to_string()))?; - if let Some(rb) = batch { - writer - .write(rb) - .map_err(|e| napi::Error::new(napi::Status::GenericFailure, e.to_string()))?; - } - writer - .finish() - .map_err(|e| napi::Error::new(napi::Status::GenericFailure, e.to_string()))?; - } - Ok(buf) -} diff --git a/native/sea/src/util.rs b/native/sea/src/util.rs deleted file mode 100644 index 4ba7e346..00000000 --- a/native/sea/src/util.rs +++ /dev/null @@ -1,62 +0,0 @@ -// Copyright (c) 2026 Databricks, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//! Shared helpers — one place for the `catch_unwind` wrapping that -//! every async entry point goes through (pattern #8 in the napi-rs -//! patterns doc). One helper, called once per entry point — DRY. -//! -//! Why a helper rather than a macro: helper + `async move {}` reads -//! better at call sites and keeps the stack trace shallow when a panic -//! actually fires (a macro would expand into the caller's body). - -use std::any::Any; -use std::future::Future; -use std::panic::AssertUnwindSafe; - -use futures::FutureExt; -use napi::{Error as NapiError, Result as NapiResult, Status}; - -/// Run `fut` and convert any panic the future raises into a -/// `napi::Error` so the JS caller sees a rejected promise instead of -/// the Node process aborting. -/// -/// `catch_unwind` does not catch `std::process::abort`, double-panic, -/// or allocator OOM — those still bring down the process. That's by -/// design: a corrupted process state isn't something we can pretend to -/// recover from. -pub(crate) async fn guarded(fut: F) -> NapiResult -where - F: Future>, -{ - match AssertUnwindSafe(fut).catch_unwind().await { - Ok(res) => res, - Err(panic) => Err(NapiError::new( - Status::GenericFailure, - format!("panic in native binding: {}", panic_payload_msg(panic)), - )), - } -} - -/// Best-effort downcast of a panic payload to a human-readable string. -/// `panic!("…")` produces `&'static str` or `String`; the rest fall -/// through to a generic marker so the JS caller still sees *something*. -fn panic_payload_msg(p: Box) -> String { - if let Some(s) = p.downcast_ref::<&'static str>() { - return (*s).to_string(); - } - if let Some(s) = p.downcast_ref::() { - return s.clone(); - } - "non-string panic payload".to_string() -} diff --git a/package.json b/package.json index 14d4d200..a60ca74f 100644 --- a/package.json +++ b/package.json @@ -17,8 +17,8 @@ "test": "nyc --report-dir=${NYC_REPORT_DIR:-coverage_unit} mocha --config tests/unit/.mocharc.js", "update-version": "node bin/update-version.js && prettier --write ./lib/version.ts", "build": "npm run update-version && tsc --project tsconfig.build.json", - "build:native": "cd native/sea && napi build --platform --release", - "build:native:debug": "cd native/sea && napi build --platform", + "build:native": "bash -c 'cd ${DATABRICKS_SQL_KERNEL_REPO:-../../databricks-sql-kernel-sea-WT/napi-binding}/napi && npx --yes @napi-rs/cli@2 build --release && cp index.node $OLDPWD/native/sea/index.linux-x64-gnu.node && cp index.d.ts $OLDPWD/native/sea/'", + "build:native:debug": "bash -c 'cd ${DATABRICKS_SQL_KERNEL_REPO:-../../databricks-sql-kernel-sea-WT/napi-binding}/napi && npx --yes @napi-rs/cli@2 build && cp index.node $OLDPWD/native/sea/index.linux-x64-gnu.node && cp index.d.ts $OLDPWD/native/sea/'", "watch": "tsc --project tsconfig.build.json --watch", "type-check": "tsc --noEmit", "prettier": "prettier . --check", @@ -93,4 +93,4 @@ "optionalDependencies": { "lz4": "^0.6.5" } -} +} \ No newline at end of file