From 3784b61652f1dacaf995863720594e20d1ebe5e6 Mon Sep 17 00:00:00 2001 From: Caolan McMahon Date: Tue, 20 Jan 2026 16:11:13 +0000 Subject: [PATCH 1/2] Use relative pointers in block encoding This patch rewrites all DeltaOps to use `seq` values relative to the `seq` number of the enclosing block. This takes advantage of variable width integer encoding to reduce block size when pointers reference nearby blocks (a common case). --- lib/encoding.js | 59 ++++++++++++++++++++++++++++++++++-- lib/write.js | 2 +- spec/hyperschema/index.js | 18 +++++------ spec/hyperschema/schema.json | 6 ++-- 4 files changed, 70 insertions(+), 15 deletions(-) diff --git a/lib/encoding.js b/lib/encoding.js index 23b99fe..02a9feb 100644 --- a/lib/encoding.js +++ b/lib/encoding.js @@ -15,8 +15,9 @@ exports.Block = Block1 exports.encodeBlock = encodeBlock exports.decodeBlock = decodeBlock -function encodeBlock(block) { +function encodeBlock(block, seq) { if (block.type === 1) { + block = makeDeltasRelative(block, seq) return c.encode(Block1, block) } @@ -38,7 +39,9 @@ function decodeBlock(buffer, seq) { state.start = 0 if (type === 1) { - return Block1.decode(state) + let block = Block1.decode(state) + block = makeDeltasAbsolute(block, seq) + return block } if (type === 0) { @@ -142,3 +145,55 @@ function toCohort(seq, pointers, cohorts) { } ] } + +function makeDeltasRelative(block, seq) { + if (block.tree) { + for (const tree of block.tree) { + // The deltas themselves are not owned by the block at this + // point and need to be copied on write. + tree.keys = tree.keys.map((d) => toRelativeDelta(seq, d)) + tree.children = tree.children.map((d) => toRelativeDelta(seq, d)) + } + } + if (block.cohorts) { + // cohorts array is owned by block (see write.js prepareCohorts) + // and can be mutated + for (let i = 0; i < block.cohorts.length; i++) { + // but the deltas array in each cohort is not owned by block + block.cohorts[i] = block.cohorts[i].map((d) => toRelativeDelta(seq, d)) + } + } + return block +} + +function makeDeltasAbsolute(block, seq) { + // This block is still owned by the encoding module at this point and + // everything is safe to mutate. + if (block.tree) { + for (const tree of block.tree) { + for (const d of tree.keys) toAbsoluteDelta(seq, d) + for (const d of tree.children) toAbsoluteDelta(seq, d) + } + } + if (block.cohorts) { + for (const cohort of block.cohorts) { + for (const d of cohort) toAbsoluteDelta(seq, d) + } + } + return block +} + +function toRelativeDelta(seq, delta) { + if (!delta.pointer) { + return delta + } + const pointer = { ...delta.pointer, seq: delta.pointer.seq - seq } + return { ...delta, pointer } +} + +function toAbsoluteDelta(seq, delta) { + if (delta.pointer) { + delta.pointer.seq += seq + } + return delta +} diff --git a/lib/write.js b/lib/write.js index 6528ed8..9932a8d 100644 --- a/lib/write.js +++ b/lib/write.js @@ -497,7 +497,7 @@ module.exports = class WriteBatch { for (let i = 0; i < blocks.length; i++) { blocks[i].checkpoint = context.checkpoint - buffers[i] = encodeBlock(blocks[i]) + buffers[i] = encodeBlock(blocks[i], context.core.length + i) } if (this.closed) { diff --git a/spec/hyperschema/index.js b/spec/hyperschema/index.js index 630a69e..3faefdc 100644 --- a/spec/hyperschema/index.js +++ b/spec/hyperschema/index.js @@ -36,13 +36,13 @@ const encoding0 = { } } -// @bee/tree-pointer +// @bee/relative-tree-pointer const encoding1 = { preencode(state, m) { state.end++ // flags are fixed size if (m.core) c.uint.preencode(state, m.core) - if (m.seq) c.uint.preencode(state, m.seq) + if (m.seq) c.int.preencode(state, m.seq) if (m.offset) c.uint.preencode(state, m.offset) }, encode(state, m) { @@ -51,7 +51,7 @@ const encoding1 = { c.uint8.encode(state, flags) if (m.core) c.uint.encode(state, m.core) - if (m.seq) c.uint.encode(state, m.seq) + if (m.seq) c.int.encode(state, m.seq) if (m.offset) c.uint.encode(state, m.offset) }, decode(state) { @@ -59,22 +59,22 @@ const encoding1 = { return { core: (flags & 1) !== 0 ? c.uint.decode(state) : 0, - seq: (flags & 2) !== 0 ? c.uint.decode(state) : 0, + seq: (flags & 2) !== 0 ? c.int.decode(state) : 0, offset: (flags & 4) !== 0 ? c.uint.decode(state) : 0 } } } -// @bee/tree-pointer (inline) +// @bee/relative-tree-pointer (inline) const encoding1_inline = { preencode(state, m) { if (m.core) c.uint.preencode(state, m.core) - if (m.seq) c.uint.preencode(state, m.seq) + if (m.seq) c.int.preencode(state, m.seq) if (m.offset) c.uint.preencode(state, m.offset) }, encode(state, m) { if (m.core) c.uint.encode(state, m.core) - if (m.seq) c.uint.encode(state, m.seq) + if (m.seq) c.int.encode(state, m.seq) if (m.offset) c.uint.encode(state, m.offset) }, decode(state, inlining) { @@ -82,7 +82,7 @@ const encoding1_inline = { return { core: (flags & 1) !== 0 ? c.uint.decode(state) : 0, - seq: (flags & 2) !== 0 ? c.uint.decode(state) : 0, + seq: (flags & 2) !== 0 ? c.int.decode(state) : 0, offset: (flags & 4) !== 0 ? c.uint.decode(state) : 0 } } @@ -527,7 +527,7 @@ function getEncoding(name) { switch (name) { case '@bee/tree-pointer-0': return encoding0 - case '@bee/tree-pointer': + case '@bee/relative-tree-pointer': return encoding1 case '@bee/tree-delta': return encoding2 diff --git a/spec/hyperschema/schema.json b/spec/hyperschema/schema.json index 5207ec0..ec8e93d 100644 --- a/spec/hyperschema/schema.json +++ b/spec/hyperschema/schema.json @@ -28,7 +28,7 @@ ] }, { - "name": "tree-pointer", + "name": "relative-tree-pointer", "namespace": "bee", "compact": true, "flagsPosition": 0, @@ -40,7 +40,7 @@ }, { "name": "seq", - "type": "uint", + "type": "int", "version": 1 }, { @@ -69,7 +69,7 @@ { "name": "pointer", "inline": true, - "type": "@bee/tree-pointer", + "type": "@bee/relative-tree-pointer", "version": 1 } ] From b727bca36c8e12675b9c68b2417a70be73bf22ce Mon Sep 17 00:00:00 2001 From: Caolan McMahon Date: Mon, 2 Feb 2026 13:05:36 +0000 Subject: [PATCH 2/2] Limit relative pointers to core=0 to enable uint --- lib/encoding.js | 8 ++-- spec/hyperschema/index.js | 12 +++--- spec/hyperschema/schema.json | 2 +- test/all.js | 1 + test/compression.js | 83 ++++++++++++++++++++++++++++++++++++ 5 files changed, 95 insertions(+), 11 deletions(-) create mode 100644 test/compression.js diff --git a/lib/encoding.js b/lib/encoding.js index 02a9feb..3080f14 100644 --- a/lib/encoding.js +++ b/lib/encoding.js @@ -184,16 +184,16 @@ function makeDeltasAbsolute(block, seq) { } function toRelativeDelta(seq, delta) { - if (!delta.pointer) { + if (!delta.pointer || delta.pointer.core !== 0) { return delta } - const pointer = { ...delta.pointer, seq: delta.pointer.seq - seq } + const pointer = { ...delta.pointer, seq: seq - delta.pointer.seq } return { ...delta, pointer } } function toAbsoluteDelta(seq, delta) { - if (delta.pointer) { - delta.pointer.seq += seq + if (delta.pointer && delta.pointer.core === 0) { + delta.pointer.seq = seq - delta.pointer.seq } return delta } diff --git a/spec/hyperschema/index.js b/spec/hyperschema/index.js index 3faefdc..d69e8d9 100644 --- a/spec/hyperschema/index.js +++ b/spec/hyperschema/index.js @@ -42,7 +42,7 @@ const encoding1 = { state.end++ // flags are fixed size if (m.core) c.uint.preencode(state, m.core) - if (m.seq) c.int.preencode(state, m.seq) + if (m.seq) c.uint.preencode(state, m.seq) if (m.offset) c.uint.preencode(state, m.offset) }, encode(state, m) { @@ -51,7 +51,7 @@ const encoding1 = { c.uint8.encode(state, flags) if (m.core) c.uint.encode(state, m.core) - if (m.seq) c.int.encode(state, m.seq) + if (m.seq) c.uint.encode(state, m.seq) if (m.offset) c.uint.encode(state, m.offset) }, decode(state) { @@ -59,7 +59,7 @@ const encoding1 = { return { core: (flags & 1) !== 0 ? c.uint.decode(state) : 0, - seq: (flags & 2) !== 0 ? c.int.decode(state) : 0, + seq: (flags & 2) !== 0 ? c.uint.decode(state) : 0, offset: (flags & 4) !== 0 ? c.uint.decode(state) : 0 } } @@ -69,12 +69,12 @@ const encoding1 = { const encoding1_inline = { preencode(state, m) { if (m.core) c.uint.preencode(state, m.core) - if (m.seq) c.int.preencode(state, m.seq) + if (m.seq) c.uint.preencode(state, m.seq) if (m.offset) c.uint.preencode(state, m.offset) }, encode(state, m) { if (m.core) c.uint.encode(state, m.core) - if (m.seq) c.int.encode(state, m.seq) + if (m.seq) c.uint.encode(state, m.seq) if (m.offset) c.uint.encode(state, m.offset) }, decode(state, inlining) { @@ -82,7 +82,7 @@ const encoding1_inline = { return { core: (flags & 1) !== 0 ? c.uint.decode(state) : 0, - seq: (flags & 2) !== 0 ? c.int.decode(state) : 0, + seq: (flags & 2) !== 0 ? c.uint.decode(state) : 0, offset: (flags & 4) !== 0 ? c.uint.decode(state) : 0 } } diff --git a/spec/hyperschema/schema.json b/spec/hyperschema/schema.json index ec8e93d..bbfa114 100644 --- a/spec/hyperschema/schema.json +++ b/spec/hyperschema/schema.json @@ -40,7 +40,7 @@ }, { "name": "seq", - "type": "int", + "type": "uint", "version": 1 }, { diff --git a/test/all.js b/test/all.js index 751970b..194e25d 100644 --- a/test/all.js +++ b/test/all.js @@ -11,6 +11,7 @@ async function runTests() { await import('./diff.js') await import('./fuzz.js') await import('./undo.js') + await import('./compression.js') test.resume() } diff --git a/test/compression.js b/test/compression.js new file mode 100644 index 0000000..cfc1731 --- /dev/null +++ b/test/compression.js @@ -0,0 +1,83 @@ +const test = require('brittle') +const b4a = require('b4a') +const Bee = require('../') +const Corestore = require('corestore') + +test('multi writer relative pointers', async function (t) { + const path = await t.tmp() + let store = new Corestore(path) + let a = new Bee(store.namespace('a')) + let b = new Bee(store.namespace('b')) + + t.teardown(() => a.close()) + t.teardown(() => b.close()) + t.teardown(() => store.close()) + + await a.ready() + await b.ready() + + async function reopen() { + await a.close() + await b.close() + await store.close() + store = new Corestore(path) + a = new Bee(store.namespace('a')) + b = new Bee(store.namespace('b')) + await a.ready() + await b.ready() + } + + // Relative pointer for first entry is equivalent to absolute pointer + { + const k = b4a.from('1') + const w = a.write() + + w.tryPut(k, k) + await w.flush() + } + + // Introduce relative pointers that differ to the absolute pointers + { + const k = b4a.from('3') + const w = a.write() + + w.tryPut(k, k) + await w.flush() + } + { + const k = b4a.from('4') + const w = a.write() + + w.tryPut(k, k) + await w.flush() + } + + // close and re-open dbs + await reopen() + + // Build on top of remote hyperbee that has used a relative pointer + { + const k = b4a.from('2') + const w = b.write({ key: a.core.key, length: a.core.length }) + + w.tryPut(k, k) + await w.flush() + } + + // close and re-open dbs + await reopen() + + async function getKeys(hb) { + const keys = [] + for await (const data of hb.createReadStream()) { + keys.push(data.key) + } + return keys + } + + t.alike(await getKeys(a), [b4a.from('1'), b4a.from('3'), b4a.from('4')]) + + t.alike(await getKeys(b), [b4a.from('1'), b4a.from('2'), b4a.from('3'), b4a.from('4')]) + + t.pass('finished') +})