Improving combinations from abc[d[e,f],gh] pattern algorithm
Asked Answered
T

5

5

I wrote an algorithm that is inadequate, namely because it does not handle [,abc] cases (see the string variations and conditions below), and would like to know how it can be improved so it covers those cases:

Given

Pattern, that describes strings variations: abc[de[f,g],hk], which gives

abcdef
abcdeg
abchk

Pattern consists of "arrays", that followed by strings: abc[...], and strings adj,kg,q

Another possible more complex example: utvk[fvu,gn[u,k,r],nl,q[t[ij,lo[z,x]],bm]].

Conditions

  1. Strings itself can contain only letters and numbers. There couldn't be abc[h\,k,b] or abc[h\[k,b] that gives abch,k or abch[k.
  2. "Arrays" always not empty, and has at least 2 elements.
  3. There can be any order of "array", or "only string" value, i.e.: abc[a,b[c,d]] or abc[a[b,c],d]. The order is strict from left to right, there can not be from pattern abc[d,e] combinations eabc or dabc.
  4. abc[d,e] doesn't gives abcde nor abced string, only abcd and abce.
  5. Pattern always starts with string with array: something[...].
  6. There can be string without array: abc[a,bc[d,f]], but array without string is not allowed: abc[a,[d,f]].
  7. There can be an empty string, i.e.: a[,b], that gives a and ab

My solution

function getStrings(pat) {
    if(pat.indexOf('[') == -1)
    return pat;

        String.prototype.insert = function(index, string) {
        if (index > 0) {
            return this.substring(0, index) + string + this.substr(index);
        }
    
        return string + this;
        };
    
        function getArray(str, start, isSource = false) {
        if (start < 0) return null;
    
        var n = 0;
        var ret = "";
        var i = start;
    
        for (; i < str.length; i++) {
            if (str[i] == "[") n++;
            else if (str[i] == "]") n--;
    
            if (n == 0) break;
        }
    
        var ret = {
            str: "",
            arr: "",
            end: 0,
        };
        ret.arr = str.slice(start, i) + "]";
        ret.end = i;
    
        start--;
        var end = start;
        for (
            ;
            start > 0 &&
            str[start] != "," &&
            str[start] != "]" &&
            str[start] != "[";
            start--
        ) {}
    
        if(!isSource)
        start++;
        end++;
    
        ret.str = str.slice(start, end);
    
        return ret;
        }
    
        function getElement(source, start) {
        var ret = [];
        start++;
    
        for (
            ;
            start < source.length && source[start] != "," && source[start] != "]";
            start++
        )
            ret[ret.length] = source[start];
    
        return ret;
        }
    
        var source = getArray(pat, pat.indexOf("["), true); // parsing
    
        var ar = source.arr;
    
        source.arrs = getArrays(source); // parsing
        source.source = true;
        
    
        var fi = "";
        var temp_out = [];
        var tol = 0;
    
        return getVariations(source); // getting variations of parsed
    
    
        function getVariations(source) {
        if (source.arrs == undefined) {
        } else
            for (var i = 0; i < source.arrs.length; i++) {
            if (source.source) fi = source.str;
    
            if (!source.arrs[i].arrs) {
                temp_out[tol] = fi + source.arrs[i].str;
                tol++;
            } else {
                var lafi = fi;
                fi += source.arrs[i].str;
    
                getVariations(source.arrs[i]);
                
                if(i != source.arrs.length - 1)
                fi = lafi;
            }
    
            if (source.source && i == source.arrs.length - 1) {
                var temp = temp_out;
                temp_out = [];
                tol = 0;
                return temp;
            }
            }
        }
    
        function getArrays(source) {
        var la = 1;
        var start = 0;
        var arrs = [];
    
        if (!source.arr) return;
    
        while (start != -1) {
            start = source.arr.indexOf("[", la);
            var qstart = source.arr.indexOf(",", la);
            if(source.arr[la] == ',')
            qstart = source.arr.indexOf(",", la+1);
    
            var pu = false;
    
    
            if(qstart != la && qstart != -1 && qstart < start && start != -1)
            {
            pu = true;
            var str = source.arr;
            var buf = [];
            qstart--;
            var i = -1;
    
            for(i = qstart; i > 0 && str[i] != '[' && str[i] != ','; i--)
            {}
            i++;
    
            for(; i < str.length && str[i]!= ','; i++)
            {
                buf[buf.length] = str[i];
            }
    
            if(buf.length == 0)
            {
                la = start;
                alert("1!")
            }
            else
            {
                
                buf = buf.join('');
                arrs[arrs.length] = {str:buf};
                la += buf.length+1;
            }
            }
            else
            if (start != -1) {
            arrs[arrs.length] = getArray(source.arr, start);
            la = arrs[arrs.length - 1].end + 1;
            } else {
            
            start = source.arr.indexOf(",", la);
            if (start != -1) {
                var ret = getElement(source.arr, start);
                arrs[arrs.length] = ret;
                la += ret.length;
            }
            }
        }
    
    
        for (var i = 0; i < arrs.length; i++)
            if (typeof arrs[i] != "string" && arrs[i].arr) {
            arrs[i].arrs = getArrays(arrs[i]);
            var st = arrs[i].arr;
    
            if (occ(arrs[i].arr, "[") == 1 && occ(arrs[i].arr, "]") == 1) {
                st = st.replaceAll("[", '["');
                st = st.replaceAll("]", '"]');
                st = st.replaceAll(",", '","');
                st = JSON.parse(st);
    
                for (var j = 0; j < st.length; j++) st[j] = { str: st[j] };
                arrs[i].arrs = st;
            }
            } else if (typeof arrs[i] == "string") {
            arrs[i] = { str: arrs[i] };
            }
    
        RecursArrs(arrs);
    
        return arrs;
        }
    
        function RecursArrs(arrs) {
        for (var i = 0; i < arrs.length; i++) {
            if (!arrs[i].source)
            if (arrs[i].arr) {
                delete arrs[i].arr;
                delete arrs[i].end;
            }
            if (!arrs[i].str) {
                try{
            arrs[i] = { str: arrs[i].join("") };
                }catch(er)
                {
                    arrs[i] = {str:''};
                }
            if (i && arrs[i - 1].str == arrs[i].str) {
                arrs.splice(i, 1);
                i--;
            }
            } else if (arrs[i].arrs) RecursArrs(arrs[i].arrs);
        }
        }
    
        function occ(string, word) {
        return string.split(word).length - 1;
        }
    
}

// getStrings('IE5E[COR[R[,G[A,E,I]],S,T,U,V,W,X,Y,Z],EORRG[I,M]]')
Taurine answered 26/8, 2022 at 7:55 Comment(14)
Is it allowed that a ] is followed by a letter or by a [?Equalizer
@trincot, can You please provide an example what You meanTaurine
Example: a[b,c]d or a[b,c][d,e]Equalizer
@trincot, no elements always separated by comma - a[b,c],d, a[b,c],[d,e]. But there are two important things I didn't mentioned in the post, wait a minuteTaurine
OK clear. One more thing. In the code you have an example that has R[,G[A,E,I]]. Does the immediate comma mean that R is a possible outcome? Does that mean that also ,, would be allowed or ,] -- with the same purpose?Equalizer
@trincot, I actually didn't notice that, but this a real pattern I grabbed from my work. I think it should be skipped in that case, i.e. considered like there there just no [,... stuff. Probably just pat = pat.replaceAll('[,','[').replaceAll(',]',']').replaceAll(',,',',') at the beginningTaurine
I hope you are sure about that. It would make sense to keep it, as it denotes an empty string as a possibility, and would give an extra possible output than when you remove that comma. It would not have the same meaning. So for example a[,b] would allow a and ab as outputs, while a[b] would only allow ab as output. Can you confirm what you want?Equalizer
@trincot, hmm actually it also could be the case, yes, let's keep it and consider as You describedTaurine
I just noticed that the pattern in your code has more opening brackets than closing. Surely that ain't right. Can you check?Equalizer
@tincot, yeah, just missed one, when copied. Updated postTaurine
How do you feel your algorithm falls short, specifically, in objective metrics? Are you concerned about runtime, about the number of edge cases that might fail, about lines of code?Piotrowski
@TylerH, sorry my English is pretty bad, and after third time reread Your comment, I didn't catch it. The algorithm I wrote is probably worse than anyone's others, I didn't write that it is good, but it is mine, and it is important.Taurine
@Taurine OK, let me try again. Why, specifically, do you think your algorithm is "probably worse than anyone else's"? Stack Overflow is not for opinions or gut feelings, but for answering objective questions/problems.Piotrowski
@TylerH, at least because it didn't handle [,abc] cases, which I just have not been noticing before trincot pointed thatTaurine
E
3

I would use a regular expression to break up the input into tokens. In this case I chose to take pairs of (letters, delimiter), where the delimiter is one of "[", "]", ",". The letters part could be empty.

Then I would use a recursive function like you did, but I went for a recursive generator function.

Here is the suggested implementation:

function* getStrings(pattern) {
    const tokens = pattern.matchAll(/([^[\],]*)([[\],])?/g);
    
    function* dfs(recur=false) {
        let expectToken = true;
        while (true) {
            const [, token, delim] = tokens.next().value;
            if (delim === "[") {
                for (const deep of dfs(true)) yield token + deep;
            } else {
                if (token || expectToken) yield token;
                if (delim === "]" && !recur) throw "Invalid pattern: too many `]`";
                if (!delim && recur) throw "Invalid pattern: missing `]`";
                if (delim !== ",") return;
            }
            expectToken = delim !== "["; // After [...] we don't expect a letter
        }
    }
    yield* dfs();
}


const input = 'IE5E[COR[R[,G[A,E,I]],S,T,U,V,W,X,Y,Z],EORRG[I,M]]';
for (const s of getStrings(input))
    console.log(s);

This implementation should match the patterns according to the given restrictions, but it will also allow the following:

  • An "array" can start without a prefix of letters. So [a,b] is allowed and will produce the same output as a,b.
  • An "array" may be followed immediately by letters or a new "array", but this will be interpreted as if they were separated by a comma. So x[a,b]c will be interpreted as x[a,b],c
  • An "array" can be empty. In that case the array is ignored. So x[] is the same as x.

There is some basic error checking: an error will be generated when the brackets are not balanced.

Equalizer answered 26/8, 2022 at 10:30 Comment(3)
Thank You, what do asterisks stand for?Taurine
function* is the syntax for a generator function, so that it can use yield. yield* is a shortcut for yielding all values from an iterable.Equalizer
The performance testing I ran online places this answer between 20 and 130 times faster than all the others (cc @Ngdgvcb).Tessietessier
J
3

We can do this in an inside-out fashion. If we replace the innermost group (e.g. 'de[fg]' with its expansion, 'def,deg', and recur until there are no more groups remaining, we will have created a comma-separated list of final strings, which we can simply split apart and return.

const _expand = (
  s, 
  match = s .match (/(.*?)(\w*)\[([^\[\]]+)\](.*)/), 
  [_, a, b, c, d] = match || []
) => match ? _expand (a + c .split (',') .map (x => b + x) .join (',') + d) : s

const expand = (s) => _expand (s) .split (',')

console .log (expand ('abc[de[f,g],hk]'))
console .log (expand ('utvk[fvu,gn[u,k,r],nl,q[t[ij,lo[z,x]],bm]]'))
.as-console-wrapper {max-height: 100% !important; top: 0}

Our main recursive function -- _expand -- uses a regular expression that extracts the first group, and breaks it into constituent parts, and puts it back together by mapping over the parts of the array. Then our public function, expand simply calls the recursive one and splits the result into an array.

For example, this is how the recursive calls would be handled for the string, 'utvk[fvu,gn[u,k,r],nl,q[t[ij,lo[z,x]],bm]]':

'utvk[fvu,gn[u,k,r],nl,q[t[ij,lo[z,x]],bm]]'    //-->
//        ^^^^^^^^^
'utvk[fvu,gnu,gnk,gnr,nl,q[t[ij,lo[z,x]],bm]]'  //-->
//                              ^^^^^^^  
'utvk[fvu,gnu,gnk,gnr,nl,q[t[ij,loz,lox],bm]]'  //-->
//                         ^^^^^^^^^^^^^^^^^
'utvk[fvu,gnu,gnk,gnr,nl,q[tij,tloz,tlox,bm]]'  //-->
//                       ^^^^^^^^^^^^^^^^^^^
'utvk[fvu,gnu,gnk,gnr,nl,qtij,qtloz,qtlox,qbm]' //-->
//   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
'utvkfvu,utvkgnu,utvkgnk,utvkgnr,utvknl,utvkqtij,utvkqtloz,utvkqtlox,utvkqbm'

Update: Regex explanation:

The regex used here can be broken down into six sections:

  • (.*?): captures (non-greedy) an initial set of characters, stored as a
  • (\w*): captures our letters before an opening brace, stored as b
  • \[: captures an opening brace ([)
  • ([^\[\]]+): captures everything but braces ([ or ]), stored as c
  • \]: captures a closing brace (])
  • (.*): captures everything after the closing brace, stored as d

The point is for the group inside the braces to include no other braces. An example might look like this:

    utvk[fvu,gn[u,k,r],nl,q[t[ij,lo[z,x]],bm]]
   `----+---'\/|`-+-'|`----------+-----------'
        \     | \  \  \__         \
         |    \  \_ \__  \____     \
a:     (.*?)   \_  \_  \       \    \
       ~~~~~     |   \  \__     \    \
b:             (\w*)  |    \     \    \
               ~~~~~  |     \     \    \
[:                    \[     |     \    \
                      ~~     |      \    \
c:                      ([^\[\]]+)   \    \
                        ~~~~~~~~~~   |     |
]:                                   \]    |
                                     ~~    |
d:                                        (.*)
                                          ~~~~
Jennie answered 26/8, 2022 at 19:21 Comment(5)
Thanks, I think I need a lot of time to at least understand the regex pattern You providedTaurine
Updated with a regex explanation. Hope it helps.Jennie
Always impressed with the economy of your code! Quick question: Is there a benefit to the way _expand is defined with pre-calculated parameters rather than the traditional means of including these calculations in the function body? Or is this simply a matter of taste?Hornswoggle
@Trentium: it's a matter of taste. I prefer working as much as possible with expressions rather than statements. Defaulting parameters makes this simpler. You could alternatively return an IIFE or use a call helper. There is a potential downside to defaulted parameters, but that does not apply to internal functions like this one.Jennie
@ScottSauyet Looks like we can get away without recursion if you take a look at my vanilla answer!Lebron
L
2

Vanialla solution without recursion:

const expander = /([^,[\]]*?)\[([^[\]]*?)]/;

const parse = (fields) => {
  let result = fields;
  while (result.match(expander)) {
    result = result.replace(expander, (m, p1, p2) => p2.split(',').map((e) => `${p1}${e}`).join(','));
  }
  return result.split(',');
};

console.log(parse('abc[de[f,g],hk]'));
// => [ 'abcdef', 'abcdeg', 'abchk' ]
console.log(parse('utvk[fvu,gn[u,k,r],nl,q[t[ij,lo[z,x]],bm]]'));
// => [ 'utvkfvu', 'utvkgnu', 'utvkgnk', 'utvkgnr', 'utvknl', 'utvkqtij', 'utvkqtloz', 'utvkqtlox', 'utvkqbm' ]
.as-console-wrapper {max-height: 100% !important; top: 0}

Basically I just took the code from object-fields, which one could use as follows

// const objectFields = require('object-fields');

const parse = (input) => objectFields.split(input.replace(/\[/g, '(').replace(/]/g, ')')).map((e) => e.replace(/\./g, ''));

console.log(parse('abc[de[f,g],hk]'));
// => [ 'abcdef', 'abcdeg', 'abchk' ]
console.log(parse('utvk[fvu,gn[u,k,r],nl,q[t[ij,lo[z,x]],bm]]'));
// => [ 'utvkfvu', 'utvkgnu', 'utvkgnk', 'utvkgnr', 'utvknl', 'utvkqtij', 'utvkqtloz', 'utvkqtlox', 'utvkqbm' ]
.as-console-wrapper {max-height: 100% !important; top: 0}
<script src="https://bundle.run/[email protected]"></script>

Disclaimer: I'm the author of object-fields

Lebron answered 27/8, 2022 at 4:54 Comment(3)
Oh, Your code way more tinier than others! Pretty coolTaurine
While I see absolutely no reason to avoid recursion -- unless there's a performance or scaling problem -- this is a very nice version. And I learned something I didn't know about regex!Jennie
@ScottSauyet Haha, I'm obsessed with recursion free implementations! And I've leaned a few things from your answer as well hahaLebron
H
1

Let's describe an algorithm in words. Let's define word as a group of consecutive letters without a comma or bracket, which can also be an empty string. Then one way to think about this process is as a stack with two types of entries:

  1. A word.
  2. An opening bracket, [.

As we traverse the string,

(1) push words and opening brackets onto the stack, not commas.

(2a) when we reach a closing bracket, ], we start a list and keep popping the stack, adding words to that list until we pop an opening bracket from the stack. We then (2b) pop the next entry in the stack, which is the prefix for our current list, and (2c) push each entry from the list onto the stack with the prefix prepended.

Finally, return the stack.

Here's an implementation of the algorithm described above.

function f(s) {
  if (s.length == 0) {
    return [];
  }
  const stack = [""];
  let i = 0;
  while (i < s.length) {
    if (s[i] == "[") {
      i += 1;
      stack.push("[", "");
    } else if (s[i] == "]") {
      i += 1;
      const suffixes = [];
      while (true) {
        const word = stack.pop();
        if (word == "[") {
          const prefix = stack.pop();
          for (let j = suffixes.length - 1; j >= 0; j--) {
            stack.push(prefix + suffixes[j]);
          }
          break;
        } else {
          suffixes.push(word);
        }
      }
    } else if (s[i] == ",") {
      i += 1;
      stack.push("");
    } else {
      stack[stack.length - 1] += s[i];
      i += 1;
    }
  }
  return stack;
}

// Output

var s = "a[bp,c[,d]],b[yx,]"

console.log(s);
for (const w of f(s)) {
  console.log(w);
}

console.log("");

s = "abc[de[f,g],hk]"

console.log(s);
for (const w of f(s)) {
  console.log(w);
}
Harville answered 27/8, 2022 at 15:6 Comment(5)
I meant to write another solution on this model but got distracted. Thanks for posting it. Although there is far more code than some solutions, this is likely the most performant of the solutions posted so far.Jennie
@ScottSauyet actually, from my benchmarking online, it seems to come a FAR second after trincot's answer (I commented there).Tessietessier
@ScottSauyet (although the testing was only on one short string)Tessietessier
Ha! I'd forgotten Trincot's answer, having thought through mine and both of vincents, and consideration of a stack-based solution like yours. I'll have to look more closely at this generator version.Jennie
Really? Would have definitely thought this is fastest. Regex tend to be slow, however there is on one execution in that answerLebron
L
0

Here is a recursion free solution using object-scan.

This solution is probably more of academic interest since it uses library internals and I wrote it to satisfy my curiosity whether it could be done this way. Also serves as a head scratcher for @ScottSauyet - payback for his answer which took me a while to figure out =)

Anyways, enjoy!

.as-console-wrapper {max-height: 100% !important; top: 0}
<script type="module">
import objectScan from 'https://cdn.jsdelivr.net/npm/[email protected]/lib/index.min.js';
import { compile } from 'https://cdn.jsdelivr.net/npm/[email protected]/lib/core/compiler.js';

const parse = (input) => {
  const compiled = compile([input.replace(/\[/g, '.{').replace(/]/g, '}')], {});
  return objectScan(['++{children[*]}.value'], {
    filterFn: ({ parent }) => parent.children.length === 0,
    rtn: ({ parents }) => parents.filter((e) => !Array.isArray(e)).map(({ value }) => value).reverse().slice(1).join('')
  })(compiled);
};

console.log(parse('abc[de[f,g],hk]'));
// => [ 'abcdef', 'abcdeg', 'abchk' ]
console.log(parse('utvk[fvu,gn[u,k,r],nl,q[t[ij,lo[z,x]],bm]]'));
// => [ 'utvkfvu', 'utvkgnu', 'utvkgnk', 'utvkgnr', 'utvknl', 'utvkqtij', 'utvkqtloz', 'utvkqtlox', 'utvkqbm' ]
</script>

Disclaimer: I'm the author of object-scan

Lebron answered 27/8, 2022 at 4:45 Comment(2)
Been following object-scan for some time now, so not much of a head-scratcher, I'm afraid; but it's still interesting, and it's always worth seeing many alternatives. I don't think this gains anything over my recursive version or your similar while-loop one, but it's still great to see.Jennie
@ScottSauyet Haha, I tried! But good that you can understand it! I was worried noone would be able to 😅Lebron

© 2022 - 2024 — McMap. All rights reserved.