diff --git a/README.md b/README.md index 0243fff..c7ec142 100644 --- a/README.md +++ b/README.md @@ -9,13 +9,16 @@ This function converts a boolean query to a 2 dimensional array with all possibi Input | Output -------- | --------- `a AND b` | `[[a, b]]` +`a AND b` | `[[a, b]]` `a OR b` | `[[a], [b]]` `a AND b AND c` | `[[a, b, c]]` `a AND b OR c` | `[[a, b], [c]]` `a AND (b OR c)` | `[[a, b], [a, c]]` `a AND (b OR c) AND (d OR e)` | `[[a, b, d], [a, b, e], [a, c, d], [a, c, e]]` -Whereas `a`, `b` and `c` represent words, forming a complex query pattern. +Whereas `a`, `b` and `c` represent terms, forming a complex query pattern. + +A term can be written as a single word, or a phrase can be represented by using double quotes to wrap multiple words (e.g. term `a` could be `"foo bar"`). This function works recursively trough all brackets and generates an array of all possible combinations of a matching query. diff --git a/index.js b/index.js index 162c77e..cf331e3 100644 --- a/index.js +++ b/index.js @@ -24,6 +24,79 @@ function _arraysAreEqual(arrA, arrB) { return true; } +function parseBooleanQuery(searchPhrase) { + + searchPhrase = escapeCharactersInQuotes(searchPhrase); + + var permutations = _parseBooleanQuery(searchPhrase); + + permutations = unescapeCharactersInQuotes(permutations); + + return permutations; +} + +//var defaultSplitTerm = 'AND'; + +function injectOperatorBetweenTerms(searchPhrase) { + // Default to using AND + useAnd = (module.exports.defaultSplitTerm == 'AND'); + + // Remove leading and trailing whitespace + searchPhrase = searchPhrase.trim(); + + if(useAnd){ + // replace all spaces with ' AND ', then remove any extra ANDs + searchPhrase = searchPhrase.replace(/ /gi, ' AND '); + searchPhrase = searchPhrase.replace(/ AND AND AND /gi, ' AND '); + searchPhrase = searchPhrase.replace(/ AND OR AND /gi, ' OR '); + searchPhrase = searchPhrase.replace(/\( AND /gi, '('); + searchPhrase = searchPhrase.replace(/ AND \)/gi, ')'); + } else { + // replace all spaces with ' OR ', then remove any extra ORs + searchPhrase = searchPhrase.replace(/ /gi, ' OR '); + searchPhrase = searchPhrase.replace(/ OR AND OR /gi, ' AND '); + searchPhrase = searchPhrase.replace(/ OR OR OR /gi, ' OR '); + searchPhrase = searchPhrase.replace(/\( OR /gi, '('); + searchPhrase = searchPhrase.replace(/ OR \)/gi, ')'); + } + return searchPhrase; +} + +function escapeCharactersInQuotes(searchPhrase){ + searchPhrase = searchPhrase.replace(/(".+?")/g, function(match, group1, offset, input_string) { + // remove spaces + var encoded = encodeURI(group1.trim()); + // remove parenthesis + encoded = encoded.replace(/\(/g, '('); + encoded = encoded.replace(/\)/g, ')'); + return encoded; + }); + return searchPhrase; +} + +function unescapeCharactersInQuotes(permutations){ + var decodedPermutations = []; + + permutations.forEach(function(element){ + var decodedElement = []; + element.forEach(function(term){ + // restore parenthesis that may have been encoded + var decoded = term.replace(/(/g, '('); + decoded = decoded.replace(/)/g, ')'); + // restore spaces that may have been encoded + decoded = decodeURI(decoded); + + // strip off quotes + decoded = decoded.replace(/^"(.*)"$/, function(match, group1, offset, original){ + return group1; + }); + decodedElement.push(decoded); + }); + decodedPermutations.push(decodedElement); + }); + return decodedPermutations; +} + // This function converts a boolean query to a 2 dimensional array. // a AND (b OR c) // Becomes: @@ -34,7 +107,7 @@ function _arraysAreEqual(arrA, arrB) { // There are more efficient ways to match content to this query, though this is // the one that is most easy to maintain and limits risk of side-effects. // Especially when considering recursively nested queries. -function parseBooleanQuery(searchPhrase) { +function _parseBooleanQuery(searchPhrase) { // Remove outer brackets if they exist. EX: (a OR b) -> a OR b searchPhrase = removeOuterBrackets(searchPhrase); @@ -42,6 +115,9 @@ function parseBooleanQuery(searchPhrase) { // remove double whitespaces searchPhrase = removeDoubleWhiteSpace(searchPhrase); + // Put ANDs inbetween all the terms that only have a space betwee them + searchPhrase = injectOperatorBetweenTerms(searchPhrase); + // Split the phrase on the term 'OR', but don't do this on 'OR' that's in // between brackets. EX: a OR (b OR c) should not parse the `OR` in between b // and c. @@ -68,7 +144,7 @@ function parseBooleanQuery(searchPhrase) { // If the string contains brackets, parse it recursively, and add it to // `nestedPaths`. if (containsBrackets(ands[i])) { - nestedPaths.push(parseBooleanQuery(ands[i])); + nestedPaths.push(_parseBooleanQuery(ands[i])); } // If it doesn't. Push the word to `andPath`. @@ -302,5 +378,9 @@ module.exports = { removeDoubleWhiteSpace: removeDoubleWhiteSpace, removeOuterBrackets: removeOuterBrackets, parseBooleanQuery: parseBooleanQuery, - containsBrackets: containsBrackets + containsBrackets: containsBrackets, + escapeCharactersInQuotes: escapeCharactersInQuotes, + unescapeCharactersInQuotes: unescapeCharactersInQuotes, + injectOperatorBetweenTerms: injectOperatorBetweenTerms, + defaultSplitTerm: 'AND' }; diff --git a/test/test.js b/test/test.js index 2821ee0..ddddde1 100644 --- a/test/test.js +++ b/test/test.js @@ -89,7 +89,65 @@ describe('String functions', function() { assert.equal('a b c', bparser.removeDoubleWhiteSpace("a\nb\tc")); }); }); + + describe('escapeCharactersInQuotes', function() { + it('Should modify inside quotes to have no spaces', function() { + assert.equal('a %22b%20c%22', bparser.escapeCharactersInQuotes('a "b c"')); + }); + + it('Should modify inside quotes to have no parenthesis', function() { + assert.equal('a %22(b-c)%22', bparser.escapeCharactersInQuotes('a "(b-c)"')); + }); + + it('Should modify multiple sets of quotes', function() { + assert.equal('a %22b%20c%22 d %22e%20f%22 g', bparser.escapeCharactersInQuotes('a "b c" d "e f" g')); + }); + + it('Should ignore dangling quotes', function() { + assert.equal('a "b c', bparser.escapeCharactersInQuotes('a "b c')); + }); + }); + + describe('unescapeCharactersInQuotes', function() { + it('Should restore spaces in terms', function() { + assert.deepEqual([['a'],['b c']], bparser.unescapeCharactersInQuotes([['a'],['%22b%20c%22']])); + }); + + it('Should restore parenthesis in terms', function() { + assert.deepEqual([['a'],['(b-c)']], bparser.unescapeCharactersInQuotes([['a'],['%22(b-c)%22']])); + }); + + it('Should restore multiple sets of quotes', function() { + assert.deepEqual([['a'],['b c'],['d'],['e f'],['g']], bparser.unescapeCharactersInQuotes([['a'],['%22b%20c%22'],['d'],['%22e%20f%22'],['g']])); + }); + + it('Should ignore dangling quotes', function() { + assert.deepEqual([['a'],['"b'],['c']], bparser.unescapeCharactersInQuotes([['a'],['"b'],['c']])); + }); + }); + + describe('injectOperatorBetweenTerms()', function() { + it('should add in additional ANDs to the searchPhrase by default', function() { + assert.equal('a AND b', bparser.injectOperatorBetweenTerms('a b')); + assert.equal('a AND b', bparser.injectOperatorBetweenTerms('a AND b')); + assert.equal('a OR b', bparser.injectOperatorBetweenTerms('a OR b')); + assert.equal('((a AND (b OR c)) AND (d AND e) AND (f OR g OR h)) OR i OR j', bparser.injectOperatorBetweenTerms(' ( ( a AND ( b OR c ) ) AND ( d AND e ) AND ( f OR g OR h ) ) OR i OR j ')); + assert.equal('((a AND (b OR c)) AND (d AND e) AND (f OR g OR h)) OR i OR j', bparser.injectOperatorBetweenTerms('((a ( b OR c)) (d e) (f OR g OR h)) OR i OR j')); + }); + it('should add in ORs to the searchPhrase, if specified', function() { + // Save off the old split term and override it to 'OR' + var oldSplitTerm = bparser.defaultSplitTerm; + bparser.defaultSplitTerm = 'OR'; + assert.equal('a OR b', bparser.injectOperatorBetweenTerms('a b')); + assert.equal('a AND b', bparser.injectOperatorBetweenTerms('a AND b')); + assert.equal('a OR b', bparser.injectOperatorBetweenTerms('a OR b')); + assert.equal('((a AND (b OR c)) AND (d AND e) AND (f OR g OR h)) OR i OR j', bparser.injectOperatorBetweenTerms(' ( ( a AND ( b OR c ) ) AND ( d AND e ) AND ( f OR g OR h ) ) OR i OR j ')); + assert.equal('((a AND (b OR c)) AND (d AND e) AND (f OR g OR h)) OR i OR j', bparser.injectOperatorBetweenTerms('((a AND (b c)) AND (d AND e) AND (f g h)) i j')); + // Restore the old split term + bparser.defaultSplitTerm = oldSplitTerm; + }); + }); }); describe('query merging functions', function() { @@ -177,6 +235,11 @@ describe('query merging functions', function() { }); describe('parse function', function() { + it('Should parse a simple query without an operator', function() { + assert.deepEqual([['a', 'b']], bparser.parseBooleanQuery('a b')); + assert.deepEqual([['a', 'b','c']], bparser.parseBooleanQuery('a AND b c')); + assert.deepEqual([['a','b c']], bparser.parseBooleanQuery('a "b c"')); + }); it('Should parse a simple query without any brackets', function() { assert.deepEqual([['a', 'b']], bparser.parseBooleanQuery('a AND b')); assert.deepEqual([['a'], ['b']], bparser.parseBooleanQuery('a OR b')); @@ -189,6 +252,12 @@ describe('parse function', function() { it('Should parse a simple query a single depth of brackets', function() { assert.deepEqual([['a', 'c'], ['b', 'c']], bparser.parseBooleanQuery('(a OR b) AND c')); }); + it('Should parse a simple query a query with quoted terms', function() { + assert.deepEqual([['a', 'c'], ['b', 'c']], bparser.parseBooleanQuery('("a" OR b) AND c')); + }); + it('Should parse a more complex query a query with quoted terms', function() { + assert.deepEqual([['a b', 'e f'], ['c', 'e f']], bparser.parseBooleanQuery('("a b" OR c) AND "e f"')); + }); // This resolves to issue #3 on github it('Should parse a query, where the final bracket is not related to the first bracket', function() { @@ -220,5 +289,19 @@ describe('parse function', function() { recursiveSort(bparser.parseBooleanQuery(searchPhrase)) ); }); + it('..long shot with quotes', function(){ + var searchPhrase = '(("a " AND ("(b" OR "c)")) AND ("d AND" AND "e OR") AND ("(f)" OR g OR h)) OR i OR j'; + assert.deepEqual( + recursiveSort( + [['a ','(b','d AND','e OR','(f)'], + ['a ','(b','d AND','e OR','g'], + ['a ','(b','d AND','e OR','h'], + ['a ','c)','d AND','e OR','(f)'], + ['a ','c)','d AND','e OR','g'], + ['a ','c)','d AND','e OR','h'], + ['i'],['j']]), + recursiveSort(bparser.parseBooleanQuery(searchPhrase)) + ); + }); });