diff --git a/src/40select.js b/src/40select.js index e55a49474d..fe95b0ca4c 100755 --- a/src/40select.js +++ b/src/40select.js @@ -187,6 +187,7 @@ yy.Select = class Select { query.rownums = []; query.grouprownums = []; query.windowaggrs = []; // For window aggregate functions (COUNT/MAX/MIN/SUM/AVG with OVER) + query.windowfns = []; // For positional window functions (LEAD/LAG/FIRST_VALUE/LAST_VALUE) // Check if INTO OBJECT() is used - this affects how arrow expressions are compiled if (this.into instanceof yy.FuncValue && this.into.funcid.toUpperCase() === 'OBJECT') { @@ -509,6 +510,80 @@ yy.Select = class Select { } } + // Handle positional window functions - LEAD/LAG/FIRST_VALUE/LAST_VALUE + if (query.windowfns && query.windowfns.length > 0) { + for (var j = 0, jlen = query.windowfns.length; j < jlen; j++) { + var wfConfig = query.windowfns[j]; + var partitions = {}; + + // Group rows by partition key + for (var i = 0, ilen = res.length; i < ilen; i++) { + var partitionKey = + wfConfig.partitionColumns && wfConfig.partitionColumns.length > 0 + ? wfConfig.partitionColumns + .map(function (col) { + return res[i][col]; + }) + .join('|') + : '__all__'; + + if (!partitions[partitionKey]) partitions[partitionKey] = []; + partitions[partitionKey].push(i); + } + + // Process each partition + for (var partitionKey in partitions) { + var rowIndices = partitions[partitionKey]; + + // Sort row indices within partition by ORDER BY columns + if (wfConfig.orderColumns && wfConfig.orderColumns.length > 0) { + rowIndices.sort(function (a, b) { + for (var oi = 0; oi < wfConfig.orderColumns.length; oi++) { + var ocol = wfConfig.orderColumns[oi]; + var va = res[a][ocol.columnid]; + var vb = res[b][ocol.columnid]; + if (va == null && vb == null) continue; + if (va == null) return ocol.direction === 'ASC' ? -1 : 1; + if (vb == null) return ocol.direction === 'ASC' ? 1 : -1; + if (va < vb) return ocol.direction === 'ASC' ? -1 : 1; + if (va > vb) return ocol.direction === 'ASC' ? 1 : -1; + } + return 0; + }); + } + + // Compute values for each row in the partition + for (var k = 0; k < rowIndices.length; k++) { + var idx = rowIndices[k]; + var colId = wfConfig.expressionColumnId; + var value; + + switch (wfConfig.funcid) { + case 'LEAD': + var leadIdx = k + wfConfig.offset; + value = + leadIdx < rowIndices.length + ? res[rowIndices[leadIdx]][colId] + : wfConfig.defaultValue; + break; + case 'LAG': + var lagIdx = k - wfConfig.offset; + value = lagIdx >= 0 ? res[rowIndices[lagIdx]][colId] : wfConfig.defaultValue; + break; + case 'FIRST_VALUE': + value = res[rowIndices[0]][colId]; + break; + case 'LAST_VALUE': + value = res[rowIndices[rowIndices.length - 1]][colId]; + break; + } + + res[idx][wfConfig.as] = value; + } + } + } + } + var res2 = modify(query, res); if (cb) { diff --git a/src/47over.js b/src/47over.js index d4fb22e4cd..65a161aa71 100755 --- a/src/47over.js +++ b/src/47over.js @@ -24,3 +24,57 @@ yy.Over = class Over { return s; } }; + +yy.PositionalWindowFunc = class PositionalWindowFunc { + constructor(params) { + Object.assign(this, params); + } + + toString() { + let s = this.funcid + '('; + if (this.args && this.args.length) { + s += this.args.map(a => a.toString()).join(','); + } + s += ')'; + if (this.over) s += ' ' + this.over.toString(); + return s; + } + + findAggregator(query) { + const defaultArg = this.args && this.args[2]; + let defaultValue = null; + if (defaultArg) { + if (defaultArg.value != null) { + defaultValue = defaultArg.value; + } else if (defaultArg.op === '-' && defaultArg.right) { + defaultValue = -defaultArg.right.value; + } + } + + query.windowfns.push({ + funcid: this.funcid, + as: this.as, + expressionColumnId: this.args && this.args[0] ? this.args[0].columnid : null, + offset: this.args && this.args[1] ? this.args[1].value : 1, + defaultValue: defaultValue, + partitionColumns: + this.over && this.over.partition + ? this.over.partition.map(p => p.columnid || p.toString()) + : [], + orderColumns: + this.over && this.over.order + ? this.over.order.map(o => ({ + columnid: + o.expression && o.expression.columnid + ? o.expression.columnid + : o.columnid || o.toString(), + direction: o.direction || 'ASC', + })) + : [], + }); + } + + toJS() { + return 'undefined'; + } +}; diff --git a/src/alasqlparser.jison b/src/alasqlparser.jison index a0b4e895b6..7033021a22 100755 --- a/src/alasqlparser.jison +++ b/src/alasqlparser.jison @@ -1544,8 +1544,11 @@ FuncValue { var funcid = $1; var exprlist = $4; - if(exprlist.length > 1 && (funcid.toUpperCase() == 'MIN' || funcid.toUpperCase() == 'MAX')) { + var fidU = funcid.toUpperCase(); + if(exprlist.length > 1 && (fidU == 'MIN' || fidU == 'MAX')) { $$ = new yy.FuncValue({funcid: funcid, args: exprlist, over: $6}); + } else if(fidU == 'LEAD' || fidU == 'LAG' || fidU == 'FIRST_VALUE' || fidU == 'LAST_VALUE') { + $$ = new yy.PositionalWindowFunc({funcid: fidU, args: exprlist, over: $6}); } else if(alasql.aggr[$1]) { $$ = new yy.AggrValue({aggregatorid: 'REDUCE', funcid: funcid, expression: exprlist[0], args: exprlist, distinct:($3=='DISTINCT'), over: $6 }); diff --git a/src/alasqlparser.js b/src/alasqlparser.js index 62071b9f8b..d0d3624c06 100755 --- a/src/alasqlparser.js +++ b/src/alasqlparser.js @@ -945,8 +945,11 @@ case 367: var funcid = $$[$0-5]; var exprlist = $$[$0-2]; - if(exprlist.length > 1 && (funcid.toUpperCase() == 'MIN' || funcid.toUpperCase() == 'MAX')) { + var fidU = funcid.toUpperCase(); + if(exprlist.length > 1 && (fidU == 'MIN' || fidU == 'MAX')) { this.$ = new yy.FuncValue({funcid: funcid, args: exprlist, over: $$[$0]}); + } else if(fidU == 'LEAD' || fidU == 'LAG' || fidU == 'FIRST_VALUE' || fidU == 'LAST_VALUE') { + this.$ = new yy.PositionalWindowFunc({funcid: fidU, args: exprlist, over: $$[$0]}); } else if(alasql.aggr[$$[$0-5]]) { this.$ = new yy.AggrValue({aggregatorid: 'REDUCE', funcid: funcid, expression: exprlist[0], args: exprlist, distinct:($$[$0-3]=='DISTINCT'), over: $$[$0] }); diff --git a/test/test2409.js b/test/test2409.js new file mode 100644 index 0000000000..093fd537cf --- /dev/null +++ b/test/test2409.js @@ -0,0 +1,273 @@ +if (typeof exports === 'object') { + var assert = require('assert'); + var alasql = require('..'); +} + +describe('Test 2409 - LEAD/LAG/FIRST_VALUE/LAST_VALUE Window Functions', function () { + var data = [ + {dept: 'Sales', emp: 'Alice', salary: 1000}, + {dept: 'Sales', emp: 'Bob', salary: 1200}, + {dept: 'Sales', emp: 'Carol', salary: 1500}, + {dept: 'IT', emp: 'Dave', salary: 2000}, + {dept: 'IT', emp: 'Eve', salary: 2500}, + ]; + + describe('LEAD', function () { + it('returns the next row value with default offset', function () { + var res = alasql( + 'SELECT emp, salary, LEAD(salary) OVER (ORDER BY salary) AS next_salary FROM ? ORDER BY salary', + [data] + ); + assert.deepStrictEqual(res, [ + {emp: 'Alice', salary: 1000, next_salary: 1200}, + {emp: 'Bob', salary: 1200, next_salary: 1500}, + {emp: 'Carol', salary: 1500, next_salary: 2000}, + {emp: 'Dave', salary: 2000, next_salary: 2500}, + {emp: 'Eve', salary: 2500, next_salary: null}, + ]); + }); + + it('honours an explicit offset', function () { + var res = alasql( + 'SELECT emp, salary, LEAD(salary, 2) OVER (ORDER BY salary) AS next2_salary FROM ? ORDER BY salary', + [data] + ); + assert.deepStrictEqual(res, [ + {emp: 'Alice', salary: 1000, next2_salary: 1500}, + {emp: 'Bob', salary: 1200, next2_salary: 2000}, + {emp: 'Carol', salary: 1500, next2_salary: 2500}, + {emp: 'Dave', salary: 2000, next2_salary: null}, + {emp: 'Eve', salary: 2500, next2_salary: null}, + ]); + }); + + it('uses a custom default when no row is ahead', function () { + var res = alasql( + 'SELECT emp, salary, LEAD(salary, 1, 0) OVER (ORDER BY salary) AS next_salary FROM ? ORDER BY salary', + [data] + ); + assert.deepStrictEqual(res, [ + {emp: 'Alice', salary: 1000, next_salary: 1200}, + {emp: 'Bob', salary: 1200, next_salary: 1500}, + {emp: 'Carol', salary: 1500, next_salary: 2000}, + {emp: 'Dave', salary: 2000, next_salary: 2500}, + {emp: 'Eve', salary: 2500, next_salary: 0}, + ]); + }); + + it('restarts within each PARTITION BY group', function () { + var res = alasql( + 'SELECT dept, emp, salary, LEAD(salary) OVER (PARTITION BY dept ORDER BY salary) AS next_salary FROM ? ORDER BY dept, salary', + [data] + ); + assert.deepStrictEqual(res, [ + {dept: 'IT', emp: 'Dave', salary: 2000, next_salary: 2500}, + {dept: 'IT', emp: 'Eve', salary: 2500, next_salary: null}, + {dept: 'Sales', emp: 'Alice', salary: 1000, next_salary: 1200}, + {dept: 'Sales', emp: 'Bob', salary: 1200, next_salary: 1500}, + {dept: 'Sales', emp: 'Carol', salary: 1500, next_salary: null}, + ]); + }); + + it('walks DESC ordering correctly', function () { + var res = alasql( + 'SELECT emp, salary, LEAD(salary) OVER (ORDER BY salary DESC) AS next_sal FROM ? ORDER BY salary DESC', + [data] + ); + assert.deepStrictEqual(res, [ + {emp: 'Eve', salary: 2500, next_sal: 2000}, + {emp: 'Dave', salary: 2000, next_sal: 1500}, + {emp: 'Carol', salary: 1500, next_sal: 1200}, + {emp: 'Bob', salary: 1200, next_sal: 1000}, + {emp: 'Alice', salary: 1000, next_sal: null}, + ]); + }); + + it('returns the default when offset exceeds partition size', function () { + var smallData = [ + {id: 1, val: 100}, + {id: 2, val: 200}, + ]; + var res = alasql( + 'SELECT id, val, LEAD(val, 5) OVER (ORDER BY id) AS far_ahead FROM ? ORDER BY id', + [smallData] + ); + assert.deepStrictEqual(res, [ + {id: 1, val: 100, far_ahead: null}, + {id: 2, val: 200, far_ahead: null}, + ]); + }); + }); + + describe('LAG', function () { + it('returns the previous row value with default offset', function () { + var res = alasql( + 'SELECT emp, salary, LAG(salary) OVER (ORDER BY salary) AS prev_salary FROM ? ORDER BY salary', + [data] + ); + assert.deepStrictEqual(res, [ + {emp: 'Alice', salary: 1000, prev_salary: null}, + {emp: 'Bob', salary: 1200, prev_salary: 1000}, + {emp: 'Carol', salary: 1500, prev_salary: 1200}, + {emp: 'Dave', salary: 2000, prev_salary: 1500}, + {emp: 'Eve', salary: 2500, prev_salary: 2000}, + ]); + }); + + it('honours offset and default value (including negatives)', function () { + var res = alasql( + 'SELECT emp, salary, LAG(salary, 2, -1) OVER (ORDER BY salary) AS prev2_salary FROM ? ORDER BY salary', + [data] + ); + assert.deepStrictEqual(res, [ + {emp: 'Alice', salary: 1000, prev2_salary: -1}, + {emp: 'Bob', salary: 1200, prev2_salary: -1}, + {emp: 'Carol', salary: 1500, prev2_salary: 1000}, + {emp: 'Dave', salary: 2000, prev2_salary: 1200}, + {emp: 'Eve', salary: 2500, prev2_salary: 1500}, + ]); + }); + + it('restarts within each PARTITION BY group', function () { + var res = alasql( + 'SELECT dept, emp, salary, LAG(salary) OVER (PARTITION BY dept ORDER BY salary) AS prev_salary FROM ? ORDER BY dept, salary', + [data] + ); + assert.deepStrictEqual(res, [ + {dept: 'IT', emp: 'Dave', salary: 2000, prev_salary: null}, + {dept: 'IT', emp: 'Eve', salary: 2500, prev_salary: 2000}, + {dept: 'Sales', emp: 'Alice', salary: 1000, prev_salary: null}, + {dept: 'Sales', emp: 'Bob', salary: 1200, prev_salary: 1000}, + {dept: 'Sales', emp: 'Carol', salary: 1500, prev_salary: 1200}, + ]); + }); + + it('returns null when the source value itself is null', function () { + var dataWithNulls = [ + {id: 1, val: 10}, + {id: 2, val: null}, + {id: 3, val: 30}, + ]; + var res = alasql( + 'SELECT id, val, LAG(val) OVER (ORDER BY id) AS prev_val FROM ? ORDER BY id', + [dataWithNulls] + ); + assert.deepStrictEqual(res, [ + {id: 1, val: 10, prev_val: null}, + {id: 2, val: null, prev_val: 10}, + {id: 3, val: 30, prev_val: null}, + ]); + }); + }); + + describe('FIRST_VALUE', function () { + it('returns the partition-wide minimum-by-order value', function () { + var res = alasql( + 'SELECT emp, salary, FIRST_VALUE(salary) OVER (ORDER BY salary) AS first_sal FROM ? ORDER BY salary', + [data] + ); + assert.deepStrictEqual(res, [ + {emp: 'Alice', salary: 1000, first_sal: 1000}, + {emp: 'Bob', salary: 1200, first_sal: 1000}, + {emp: 'Carol', salary: 1500, first_sal: 1000}, + {emp: 'Dave', salary: 2000, first_sal: 1000}, + {emp: 'Eve', salary: 2500, first_sal: 1000}, + ]); + }); + + it('restarts within each PARTITION BY group', function () { + var res = alasql( + 'SELECT dept, emp, salary, FIRST_VALUE(salary) OVER (PARTITION BY dept ORDER BY salary) AS first_sal FROM ? ORDER BY dept, salary', + [data] + ); + assert.deepStrictEqual(res, [ + {dept: 'IT', emp: 'Dave', salary: 2000, first_sal: 2000}, + {dept: 'IT', emp: 'Eve', salary: 2500, first_sal: 2000}, + {dept: 'Sales', emp: 'Alice', salary: 1000, first_sal: 1000}, + {dept: 'Sales', emp: 'Bob', salary: 1200, first_sal: 1000}, + {dept: 'Sales', emp: 'Carol', salary: 1500, first_sal: 1000}, + ]); + }); + + it('works on non-numeric columns', function () { + var res = alasql( + 'SELECT dept, emp, FIRST_VALUE(emp) OVER (PARTITION BY dept ORDER BY salary) AS first_emp FROM ? ORDER BY dept, salary', + [data] + ); + assert.deepStrictEqual(res, [ + {dept: 'IT', emp: 'Dave', first_emp: 'Dave'}, + {dept: 'IT', emp: 'Eve', first_emp: 'Dave'}, + {dept: 'Sales', emp: 'Alice', first_emp: 'Alice'}, + {dept: 'Sales', emp: 'Bob', first_emp: 'Alice'}, + {dept: 'Sales', emp: 'Carol', first_emp: 'Alice'}, + ]); + }); + }); + + describe('LAST_VALUE', function () { + it('returns the partition-wide maximum-by-order value', function () { + var res = alasql( + 'SELECT emp, salary, LAST_VALUE(salary) OVER (ORDER BY salary) AS last_sal FROM ? ORDER BY salary', + [data] + ); + assert.deepStrictEqual(res, [ + {emp: 'Alice', salary: 1000, last_sal: 2500}, + {emp: 'Bob', salary: 1200, last_sal: 2500}, + {emp: 'Carol', salary: 1500, last_sal: 2500}, + {emp: 'Dave', salary: 2000, last_sal: 2500}, + {emp: 'Eve', salary: 2500, last_sal: 2500}, + ]); + }); + + it('restarts within each PARTITION BY group', function () { + var res = alasql( + 'SELECT dept, emp, salary, LAST_VALUE(salary) OVER (PARTITION BY dept ORDER BY salary) AS last_sal FROM ? ORDER BY dept, salary', + [data] + ); + assert.deepStrictEqual(res, [ + {dept: 'IT', emp: 'Dave', salary: 2000, last_sal: 2500}, + {dept: 'IT', emp: 'Eve', salary: 2500, last_sal: 2500}, + {dept: 'Sales', emp: 'Alice', salary: 1000, last_sal: 1500}, + {dept: 'Sales', emp: 'Bob', salary: 1200, last_sal: 1500}, + {dept: 'Sales', emp: 'Carol', salary: 1500, last_sal: 1500}, + ]); + }); + }); + + describe('Combined window functions', function () { + it('evaluates LEAD/LAG/FIRST_VALUE/LAST_VALUE in a single query', function () { + var res = alasql( + 'SELECT emp, salary, LEAD(salary) OVER (ORDER BY salary) AS next_sal, LAG(salary) OVER (ORDER BY salary) AS prev_sal, FIRST_VALUE(salary) OVER (ORDER BY salary) AS first_sal, LAST_VALUE(salary) OVER (ORDER BY salary) AS last_sal FROM ? ORDER BY salary', + [data] + ); + assert.deepStrictEqual(res, [ + { + emp: 'Alice', + salary: 1000, + next_sal: 1200, + prev_sal: null, + first_sal: 1000, + last_sal: 2500, + }, + {emp: 'Bob', salary: 1200, next_sal: 1500, prev_sal: 1000, first_sal: 1000, last_sal: 2500}, + { + emp: 'Carol', + salary: 1500, + next_sal: 2000, + prev_sal: 1200, + first_sal: 1000, + last_sal: 2500, + }, + { + emp: 'Dave', + salary: 2000, + next_sal: 2500, + prev_sal: 1500, + first_sal: 1000, + last_sal: 2500, + }, + {emp: 'Eve', salary: 2500, next_sal: null, prev_sal: 2000, first_sal: 1000, last_sal: 2500}, + ]); + }); + }); +});