From bafc737be31af50e3aaf14a063f0f0063f0162c6 Mon Sep 17 00:00:00 2001 From: Peter H Date: Thu, 8 Feb 2018 20:33:33 -0500 Subject: [PATCH 1/4] Added support for double quotes for a term --- README.md | 5 ++++- index.js | 58 +++++++++++++++++++++++++++++++++++++++++++++++++--- test/test.js | 55 +++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 114 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 0243fff..c7ec142 100644 --- a/README.md +++ b/README.md @@ -9,13 +9,16 @@ This function converts a boolean query to a 2 dimensional array with all possibi Input | Output -------- | --------- `a AND b` | `[[a, b]]` +`a AND b` | `[[a, b]]` `a OR b` | `[[a], [b]]` `a AND b AND c` | `[[a, b, c]]` `a AND b OR c` | `[[a, b], [c]]` `a AND (b OR c)` | `[[a, b], [a, c]]` `a AND (b OR c) AND (d OR e)` | `[[a, b, d], [a, b, e], [a, c, d], [a, c, e]]` -Whereas `a`, `b` and `c` represent words, forming a complex query pattern. +Whereas `a`, `b` and `c` represent terms, forming a complex query pattern. + +A term can be written as a single word, or a phrase can be represented by using double quotes to wrap multiple words (e.g. term `a` could be `"foo bar"`). This function works recursively trough all brackets and generates an array of all possible combinations of a matching query. diff --git a/index.js b/index.js index 162c77e..7197728 100644 --- a/index.js +++ b/index.js @@ -24,6 +24,56 @@ function _arraysAreEqual(arrA, arrB) { return true; } +function parseBooleanQuery(searchPhrase) { + + searchPhrase = escapeCharactersInQuotes(searchPhrase); + + var permutations = _parseBooleanQuery(searchPhrase); + + permutations = unescapeCharactersInQuotes(permutations); + + return permutations; +} + +function escapeCharactersInQuotes(searchPhrase){ + searchPhrase = searchPhrase.replace(/(".+?")/g, function(match, group1, offset, input_string) { + // remove spaces + var encoded = encodeURI(group1.trim()); + // remove parenthesis + encoded = encoded.replace(/\(/g, '('); + encoded = encoded.replace(/\)/g, ')'); + return encoded; + }); + return searchPhrase; +} + +function unescapeCharactersInQuotes(permutations){ + var decodedPermutations = []; + var termSet = new Set(); + + permutations.forEach(function(element){ + var decodedElement = []; + element.forEach(function(term){ + // restore parenthesis that may have been encoded + var decoded = term.replace(/(/g, '('); + decoded = decoded.replace(/)/g, ')'); + // restore spaces that may have been encoded + decoded = decodeURI(decoded); + // strip leading NOT for termSet + var searchTerm = decoded; + + // strip off quotes + decoded = decoded.replace(/^"(.*)"$/, function(match, group1, offset, original){ + return group1; + }); + decodedElement.push(decoded); + termSet.add(searchTerm); + }); + decodedPermutations.push(decodedElement); + }); + return decodedPermutations; +} + // This function converts a boolean query to a 2 dimensional array. // a AND (b OR c) // Becomes: @@ -34,7 +84,7 @@ function _arraysAreEqual(arrA, arrB) { // There are more efficient ways to match content to this query, though this is // the one that is most easy to maintain and limits risk of side-effects. // Especially when considering recursively nested queries. -function parseBooleanQuery(searchPhrase) { +function _parseBooleanQuery(searchPhrase) { // Remove outer brackets if they exist. EX: (a OR b) -> a OR b searchPhrase = removeOuterBrackets(searchPhrase); @@ -68,7 +118,7 @@ function parseBooleanQuery(searchPhrase) { // If the string contains brackets, parse it recursively, and add it to // `nestedPaths`. if (containsBrackets(ands[i])) { - nestedPaths.push(parseBooleanQuery(ands[i])); + nestedPaths.push(_parseBooleanQuery(ands[i])); } // If it doesn't. Push the word to `andPath`. @@ -302,5 +352,7 @@ module.exports = { removeDoubleWhiteSpace: removeDoubleWhiteSpace, removeOuterBrackets: removeOuterBrackets, parseBooleanQuery: parseBooleanQuery, - containsBrackets: containsBrackets + containsBrackets: containsBrackets, + escapeCharactersInQuotes: escapeCharactersInQuotes, + unescapeCharactersInQuotes: unescapeCharactersInQuotes }; diff --git a/test/test.js b/test/test.js index 2821ee0..a06c24a 100644 --- a/test/test.js +++ b/test/test.js @@ -89,7 +89,42 @@ describe('String functions', function() { assert.equal('a b c', bparser.removeDoubleWhiteSpace("a\nb\tc")); }); }); + + describe('escapeCharactersInQuotes', function() { + it('Should modify inside quotes to have no spaces', function() { + assert.equal('a %22b%20c%22', bparser.escapeCharactersInQuotes('a "b c"')); + }); + + it('Should modify inside quotes to have no parenthesis', function() { + assert.equal('a %22(b-c)%22', bparser.escapeCharactersInQuotes('a "(b-c)"')); + }); + + it('Should modify multiple sets of quotes', function() { + assert.equal('a %22b%20c%22 d %22e%20f%22 g', bparser.escapeCharactersInQuotes('a "b c" d "e f" g')); + }); + + it('Should ignore dangling quotes', function() { + assert.equal('a "b c', bparser.escapeCharactersInQuotes('a "b c')); + }); + }); + + describe('unescapeCharactersInQuotes', function() { + it('Should restore spaces in terms', function() { + assert.deepEqual([['a'],['b c']], bparser.unescapeCharactersInQuotes([['a'],['%22b%20c%22']])); + }); + + it('Should restore parenthesis in terms', function() { + assert.deepEqual([['a'],['(b-c)']], bparser.unescapeCharactersInQuotes([['a'],['%22(b-c)%22']])); + }); + it('Should restore multiple sets of quotes', function() { + assert.deepEqual([['a'],['b c'],['d'],['e f'],['g']], bparser.unescapeCharactersInQuotes([['a'],['%22b%20c%22'],['d'],['%22e%20f%22'],['g']])); + }); + + it('Should ignore dangling quotes', function() { + assert.deepEqual([['a'],['"b'],['c']], bparser.unescapeCharactersInQuotes([['a'],['"b'],['c']])); + }); + }); }); describe('query merging functions', function() { @@ -189,6 +224,12 @@ describe('parse function', function() { it('Should parse a simple query a single depth of brackets', function() { assert.deepEqual([['a', 'c'], ['b', 'c']], bparser.parseBooleanQuery('(a OR b) AND c')); }); + it('Should parse a simple query a query with quoted terms', function() { + assert.deepEqual([['a', 'c'], ['b', 'c']], bparser.parseBooleanQuery('("a" OR b) AND c')); + }); + it('Should parse a more complex query a query with quoted terms', function() { + assert.deepEqual([['a b', 'e f'], ['c', 'e f']], bparser.parseBooleanQuery('("a b" OR c) AND "e f"')); + }); // This resolves to issue #3 on github it('Should parse a query, where the final bracket is not related to the first bracket', function() { @@ -220,5 +261,19 @@ describe('parse function', function() { recursiveSort(bparser.parseBooleanQuery(searchPhrase)) ); }); + it('..long shot with quotes', function(){ + var searchPhrase = '(("a " AND ("(b" OR "c)")) AND ("d AND" AND "e OR") AND ("(f)" OR g OR h)) OR i OR j'; + assert.deepEqual( + recursiveSort( + [['a ','(b','d AND','e OR','(f)'], + ['a ','(b','d AND','e OR','g'], + ['a ','(b','d AND','e OR','h'], + ['a ','c)','d AND','e OR','(f)'], + ['a ','c)','d AND','e OR','g'], + ['a ','c)','d AND','e OR','h'], + ['i'],['j']]), + recursiveSort(bparser.parseBooleanQuery(searchPhrase)) + ); + }); }); From 2446ec7602f66d7d6a97636d179421ed198569fb Mon Sep 17 00:00:00 2001 From: Peter H Date: Thu, 8 Feb 2018 21:20:53 -0500 Subject: [PATCH 2/4] removed references to termSet --- index.js | 4 ---- 1 file changed, 4 deletions(-) diff --git a/index.js b/index.js index 7197728..2f19755 100644 --- a/index.js +++ b/index.js @@ -49,7 +49,6 @@ function escapeCharactersInQuotes(searchPhrase){ function unescapeCharactersInQuotes(permutations){ var decodedPermutations = []; - var termSet = new Set(); permutations.forEach(function(element){ var decodedElement = []; @@ -59,15 +58,12 @@ function unescapeCharactersInQuotes(permutations){ decoded = decoded.replace(/)/g, ')'); // restore spaces that may have been encoded decoded = decodeURI(decoded); - // strip leading NOT for termSet - var searchTerm = decoded; // strip off quotes decoded = decoded.replace(/^"(.*)"$/, function(match, group1, offset, original){ return group1; }); decodedElement.push(decoded); - termSet.add(searchTerm); }); decodedPermutations.push(decodedElement); }); From 634d08f84e76cc6635595ffa5a92fe3a6e650a9d Mon Sep 17 00:00:00 2001 From: Peter H Date: Thu, 8 Feb 2018 22:38:24 -0500 Subject: [PATCH 3/4] Implemented a default split term, fixing issue #4 --- index.js | 30 +++++++++++++++++++++++++++++- test/test.js | 24 ++++++++++++++++++++++++ 2 files changed, 53 insertions(+), 1 deletion(-) diff --git a/index.js b/index.js index 2f19755..22af1a3 100644 --- a/index.js +++ b/index.js @@ -35,6 +35,29 @@ function parseBooleanQuery(searchPhrase) { return permutations; } +//var defaultSplitTerm = 'AND'; + +function injectOperatorBetweenTerms(searchPhrase) { + // Default to using AND + useAnd = (module.exports.defaultSplitTerm == 'AND'); + + // Remove leading and trailing whitespace + searchPhrase = searchPhrase.trim(); + + if(useAnd){ + // replace all spaces with ' AND ', then remove any extra ANDs + searchPhrase = searchPhrase.replace(/ /gi, ' AND '); + searchPhrase = searchPhrase.replace(/ AND AND AND /gi, ' AND '); + searchPhrase = searchPhrase.replace(/ AND OR AND /gi, ' OR '); + } else { + // replace all spaces with ' OR ', then remove any extra ORs + searchPhrase = searchPhrase.replace(/ /gi, ' OR '); + searchPhrase = searchPhrase.replace(/ OR AND OR /gi, ' AND '); + searchPhrase = searchPhrase.replace(/ OR OR OR /gi, ' OR '); + } + return searchPhrase; +} + function escapeCharactersInQuotes(searchPhrase){ searchPhrase = searchPhrase.replace(/(".+?")/g, function(match, group1, offset, input_string) { // remove spaces @@ -88,6 +111,9 @@ function _parseBooleanQuery(searchPhrase) { // remove double whitespaces searchPhrase = removeDoubleWhiteSpace(searchPhrase); + // Put ANDs inbetween all the terms that only have a space betwee them + searchPhrase = injectOperatorBetweenTerms(searchPhrase); + // Split the phrase on the term 'OR', but don't do this on 'OR' that's in // between brackets. EX: a OR (b OR c) should not parse the `OR` in between b // and c. @@ -350,5 +376,7 @@ module.exports = { parseBooleanQuery: parseBooleanQuery, containsBrackets: containsBrackets, escapeCharactersInQuotes: escapeCharactersInQuotes, - unescapeCharactersInQuotes: unescapeCharactersInQuotes + unescapeCharactersInQuotes: unescapeCharactersInQuotes, + injectOperatorBetweenTerms: injectOperatorBetweenTerms, + defaultSplitTerm: 'AND' }; diff --git a/test/test.js b/test/test.js index a06c24a..fe31440 100644 --- a/test/test.js +++ b/test/test.js @@ -125,6 +125,25 @@ describe('String functions', function() { assert.deepEqual([['a'],['"b'],['c']], bparser.unescapeCharactersInQuotes([['a'],['"b'],['c']])); }); }); + + describe('injectOperatorBetweenTerms()', function() { + it('should add in additional ANDs to the searchPhrase by default', function() { + assert.equal('a AND b', bparser.injectOperatorBetweenTerms('a b')); + assert.equal('a AND b', bparser.injectOperatorBetweenTerms('a AND b')); + assert.equal('a OR b', bparser.injectOperatorBetweenTerms('a OR b')); + }); + + it('should add in ORs to the searchPhrase, if specified', function() { + // Save off the old split term and override it to 'OR' + var oldSplitTerm = bparser.defaultSplitTerm; + bparser.defaultSplitTerm = 'OR'; + assert.equal('a OR b', bparser.injectOperatorBetweenTerms('a b')); + assert.equal('a AND b', bparser.injectOperatorBetweenTerms('a AND b')); + assert.equal('a OR b', bparser.injectOperatorBetweenTerms('a OR b')); + // Restore the old split term + bparser.defaultSplitTerm = oldSplitTerm; + }); + }); }); describe('query merging functions', function() { @@ -212,6 +231,11 @@ describe('query merging functions', function() { }); describe('parse function', function() { + it('Should parse a simple query without an operator', function() { + assert.deepEqual([['a', 'b']], bparser.parseBooleanQuery('a b')); + assert.deepEqual([['a', 'b','c']], bparser.parseBooleanQuery('a AND b c')); + assert.deepEqual([['a','b c']], bparser.parseBooleanQuery('a "b c"')); + }); it('Should parse a simple query without any brackets', function() { assert.deepEqual([['a', 'b']], bparser.parseBooleanQuery('a AND b')); assert.deepEqual([['a'], ['b']], bparser.parseBooleanQuery('a OR b')); From 57c7cf3025245a5f3c67e63b40bf2c38c2143978 Mon Sep 17 00:00:00 2001 From: Peter H Date: Fri, 9 Feb 2018 11:22:42 -0500 Subject: [PATCH 4/4] fixed handling of spaces on the inside of parenthesis for default split term --- index.js | 4 ++++ test/test.js | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/index.js b/index.js index 22af1a3..cf331e3 100644 --- a/index.js +++ b/index.js @@ -49,11 +49,15 @@ function injectOperatorBetweenTerms(searchPhrase) { searchPhrase = searchPhrase.replace(/ /gi, ' AND '); searchPhrase = searchPhrase.replace(/ AND AND AND /gi, ' AND '); searchPhrase = searchPhrase.replace(/ AND OR AND /gi, ' OR '); + searchPhrase = searchPhrase.replace(/\( AND /gi, '('); + searchPhrase = searchPhrase.replace(/ AND \)/gi, ')'); } else { // replace all spaces with ' OR ', then remove any extra ORs searchPhrase = searchPhrase.replace(/ /gi, ' OR '); searchPhrase = searchPhrase.replace(/ OR AND OR /gi, ' AND '); searchPhrase = searchPhrase.replace(/ OR OR OR /gi, ' OR '); + searchPhrase = searchPhrase.replace(/\( OR /gi, '('); + searchPhrase = searchPhrase.replace(/ OR \)/gi, ')'); } return searchPhrase; } diff --git a/test/test.js b/test/test.js index fe31440..ddddde1 100644 --- a/test/test.js +++ b/test/test.js @@ -131,6 +131,8 @@ describe('String functions', function() { assert.equal('a AND b', bparser.injectOperatorBetweenTerms('a b')); assert.equal('a AND b', bparser.injectOperatorBetweenTerms('a AND b')); assert.equal('a OR b', bparser.injectOperatorBetweenTerms('a OR b')); + assert.equal('((a AND (b OR c)) AND (d AND e) AND (f OR g OR h)) OR i OR j', bparser.injectOperatorBetweenTerms(' ( ( a AND ( b OR c ) ) AND ( d AND e ) AND ( f OR g OR h ) ) OR i OR j ')); + assert.equal('((a AND (b OR c)) AND (d AND e) AND (f OR g OR h)) OR i OR j', bparser.injectOperatorBetweenTerms('((a ( b OR c)) (d e) (f OR g OR h)) OR i OR j')); }); it('should add in ORs to the searchPhrase, if specified', function() { @@ -140,6 +142,8 @@ describe('String functions', function() { assert.equal('a OR b', bparser.injectOperatorBetweenTerms('a b')); assert.equal('a AND b', bparser.injectOperatorBetweenTerms('a AND b')); assert.equal('a OR b', bparser.injectOperatorBetweenTerms('a OR b')); + assert.equal('((a AND (b OR c)) AND (d AND e) AND (f OR g OR h)) OR i OR j', bparser.injectOperatorBetweenTerms(' ( ( a AND ( b OR c ) ) AND ( d AND e ) AND ( f OR g OR h ) ) OR i OR j ')); + assert.equal('((a AND (b OR c)) AND (d AND e) AND (f OR g OR h)) OR i OR j', bparser.injectOperatorBetweenTerms('((a AND (b c)) AND (d AND e) AND (f g h)) i j')); // Restore the old split term bparser.defaultSplitTerm = oldSplitTerm; });