How can I determine all possible ways a subsequence can be removed from a sequence?
Asked Answered
Q

6

41

Given two sequences, A and B, how can I generate a list of all the possible ways that B can be removed from A?

For example, In JavaScript, if I had a function removeSubSeq taking two array arguments that did what I want, it would work as follows:

removeSubSeq([1,2,1,3,1,4,4], [1,4,4]) would return [ [2,1,3,1], [1,2,3,1], [1,2,1,3] ] because the 4s at the end would match, and there are three possible places for the 1 to match

removeSubSeq([8,6,4,4], [6,4,8]) would return [] because the second argument is not actually a subsequence

removeSubSeq([1,1,2], [1]) would return [ [1,2], [1,2] ] because there's two ways that the 1 could be removed, even though it results in duplicates

Quaint answered 21/8, 2016 at 3:19 Comment(2)
Added JavaScript code to my answer using LCS.Sass
I have added JavaScript implementation to my answer: https://mcmap.net/q/389067/-how-can-i-determine-all-possible-ways-a-subsequence-can-be-removed-from-a-sequenceEuphoria
I
18

This problem can be solved in O(n*m + r) time, where r is the total length of the results, using the classic longest common subsequence algorithm.

Once the table is made, as in Wikipedia's example, replace it with a list of the cells with a diagonal arrow that also have a value corresponding with their row. Now traverse backwards from each cell with a diagonal in the last row, accumulating the relevant index in the string and duplicating and splitting the accumulation such that each cell with a diagonal arrow will have a continuation to all cells with a diagonal in the preceding row that are to the left of it (store that count as well, as you build the matrix) and one less in value. When an accumulation reaches a zero cell, splice the accumulated indexes from the string and add that as a result.

(The arrows correspond with whether the LCS so far came from LCS(X[i-1],Y[j]) and/or LCS(X[i],Y[j-1]), or LCS(X[i-1],Y[j-1]), see the function definition.)

For example:

  0  a  g  b  a  b  c  c
0 0  0  0  0  0  0  0  0
a 0 ↖1  1  1 ↖1  1  1  1
b 0  1  1 ↖2  2 ↖2  2  2
c 0  1  1  2  2  2 ↖3 ↖3

JavaScript code:

function remove(arr,sub){
  var _arr = [];
  arr.forEach(function(v,i){ if (!sub.has(i)) _arr.push(arr[i]); });
  return _arr;
}

function f(arr,sub){
  var res = [],
      lcs = new Array(sub.length + 1),
      nodes = new Array(sub.length + 1);
     
  for (var i=0; i<sub.length+1;i++){
    nodes[i] = [];
    lcs[i] = [];
   
    for (var j=0; j<(i==0?arr.length+1:1); j++){
      // store lcs and node count on the left
      lcs[i][j] = [0,0];
    }
  }
 
  for (var i=1; i<sub.length+1;i++){ 
    for (var j=1; j<arr.length+1; j++){
      if (sub[i-1] == arr[j-1]){
        lcs[i][j] = [1 + lcs[i-1][j-1][0],lcs[i][j-1][1]];
       
        if (lcs[i][j][0] == i){
                  // [arr index, left node count above]
          nodes[i].push([j - 1,lcs[i-1][j-1][1]]);
       
          lcs[i][j][1] += 1;
        }
       
      } else {
        lcs[i][j] = [Math.max(lcs[i-1][j][0],lcs[i][j-1][0]),lcs[i][j-1][1]];
      }
    }
  }
   
  function enumerate(node,i,accum){
    if (i == 0){
      res.push(remove(arr,new Set(accum)));
      return;
    }
    
    for (var j=0; j<node[1]; j++){
      var _accum = accum.slice();
      _accum.push(nodes[i][j][0]);
      
      enumerate(nodes[i][j],i - 1,_accum);
    }
  }
  
  nodes[sub.length].forEach(function(v,i){ 
    enumerate(nodes[sub.length][i],sub.length - 1,[nodes[sub.length][i][0]]); 
  });

  return res;
}

console.log(JSON.stringify(f([1,2,1,3,1,4,4], [1,4,4])));
console.log(JSON.stringify(f([8,6,4,4], [6,4,8])));
console.log(JSON.stringify(f([1,1,2], [1])));
console.log(JSON.stringify(f(['a','g','b','a','b','c','c'], ['a','b','c'])));
Incogitant answered 22/8, 2016 at 1:25 Comment(2)
The problem does boil down to a variant of LCS. This code as posted isn't the most readable, but it does seem to be faster than the other solutions according to my unscientific microbenchmarking.Quaint
@Quaint Thanks for checking it out! Basically, the code is following the algorithm description above so the LCS table needs an extra field for how many nodes are to the left for each cell (nodes are the matches that are also part of a longest common subsequence). There may be ways to optimize depending on the data and use. I also wonder, since we know we are only looking for complete matches, if there may be some kind of an order of magnitude optimization.Sass
E
7

You can use recursion. Build a new subsequence C by walking through A and pushing elements in order. Whenever you encounter an element that matches the head of B, you will fork the recursion into two paths: one in which you remove (i.e. skip) the element from A and B, and another in which you ignore it and continue business as usual.

If you exhaust all of B (meaning that you "removed" all elements in B from A), then appending the rest of A to C will produce a valid subsequence. Otherwise, if you reach the end of A without exhausting all of B, C is not a valid subsequence and should be discarded.

function removeSubSeq(a, b) {
    function* remove(i, j, c) {
        if (j >= b.length) {
            yield c.concat(a.slice(i));
        } else if (i >= a.length) {
            return;
        } else if (a[i] === b[j]) {
            yield* remove(i + 1, j + 1, c);
            yield* remove(i + 1, j, c.concat(a.slice(i, i + 1)));
        } else {
            yield* remove(i + 1, j, c.concat(a.slice(i, i + 1)));
        }
    }

    if (a.length < b.length) {
        return [];   
    }

    return Array.from(remove(0, 0, []));
}

The inner helper function can be made slightly more efficient by replacing the use of Array.concat in each recursive branch with a simple push()/pop() pair, although, this makes the control flow a little harder to grok.

function* remove(i, j, c) {
    if (j >= b.length) {
        yield c.concat(a.slice(i));
    } else if (i >= a.length) {
        return;
    } else {
        if (a[i] === b[j]) {
            yield* remove(i + 1, j + 1, c);
        }

        c.push(a[i]);
        yield* remove(i + 1, j, c);
        c.pop();
    }
}
Exemplar answered 21/8, 2016 at 5:38 Comment(1)
I like the elegance of this, and it is a good usage of generator functions. Nice!Quaint
E
6

This problem can be solved using the bottom-up Dynamic Programming approach with backtracking.

Let's consider a recurrence relation f(i1, i2), which helps to check whether the tail of the sequence arr2 can be removed from the tail of the sequence arr1:

f(i1, i2) = true, if(i1 == length(arr1) AND i2 == length(arr2))
f(i1, i2) = f(i1 + 1, i2) OR f(i1 + 1, i2 + 1), if(arr1[i1] == arr2[i2])
f(i1, i2) = f(i1 + 1, i2), if(arr1[i1] != arr2[i2])

solution  = f(0, 0)

I use the term tail to denote the subsequence of arr1 which starts at the index i1 and spans to the end of arr1 (and the same for arr2 - tail of arr2 starts at the index i2 and spans to the end of arr2).

Let's start with top-down implementation of the given recurrence relation (yet without memoization, in order to keep the explanation simple). Below is the snippet of Java code, which prints all possible subsequences of arr1 after removal of arr2:

void remove(int[] arr1, int[] arr2) {
    boolean canBeRemoved = remove(arr1, arr2, 0, 0, new Stack<>());
    System.out.println(canBeRemoved);
}

boolean remove(int[] arr1, int[] arr2, int i1, int i2, Stack<Integer> stack) {

    if (i1 == arr1.length) {
        if (i2 == arr2.length) {
            // print yet another version of arr1, after removal of arr2
            System.out.println(stack);
            return true;
        }
        return false;
    }

    boolean canBeRemoved = false;
    if ((i2 < arr2.length) && (arr1[i1] == arr2[i2])) {
        // current item can be removed
        canBeRemoved |= remove(arr1, arr2, i1 + 1, i2 + 1, stack);
    }

    stack.push(arr1[i1]);
    canBeRemoved |= remove(arr1, arr2, i1 + 1, i2, stack);
    stack.pop();

    return canBeRemoved;
}

Th provided snippet of code does not use any memoization technique, and has the exponential runtime complexity for all instances of given problem.

However, we can see that the variable i1 can only have the value from the interval [0..length(arr1)], also the variable i2 can only have the value from the interval [0..length(arr2)].

Hence, it is possible to check, whether arr2 can be removed from arr1 with polynomial runtime complexity: O(length(arr1) * length(arr2)).

On the other hand, even if we find with polynomial runtime complexity that arr2 can be removed from arr1 - there still might be an exponential amount of possible ways to remove arr2 from arr1.

For example, consider the instance of the problem: when it is needed to remove arr2 = [1,1,1] from arr1 = [1,1,1,1,1,1,1]. There are 7!/(3! * 4!) = 35 ways to do it.

Nevertheless, below is the bottom-up Dynamic Programming solution with backtracking, which is still for many instances of given problem will have better runtime complexity than exponential:

void remove_bottom_up(int[] arr1, int[] arr2) {
    boolean[][] memoized = calculate_memoization_table(arr1, arr2);
    backtrack(arr1, arr2, 0, 0, memoized, new Stack<>());
}

/**
 * Has a polynomial runtime complexity: O(length(arr1) * length(arr2))
 */
boolean[][] calculate_memoization_table(int[] arr1, int[] arr2) {

    boolean[][] memoized = new boolean[arr1.length + 1][arr2.length + 1];
    memoized[arr1.length][arr2.length] = true;

    for (int i1 = arr1.length - 1; i1 >= 0; i1--) {
        for (int i2 = arr2.length; i2 >= 0; i2--) {

            if ((i2 < arr2.length) && (arr1[i1] == arr2[i2])) {
                memoized[i1][i2] = memoized[i1 + 1][i2 + 1];
            }
            memoized[i1][i2] |= memoized[i1 + 1][i2];
        }
    }
    return memoized;
}

/**
 * Might have exponential runtime complexity.
 *
 * E.g. consider the instance of the problem, when it is needed to remove
 * arr2 = [1,1,1] from arr1 = [1,1,1,1,1,1,1].
 *
 * There are 7!/(3! * 4!) = 35 ways to do it.
 */
void backtrack(int[] arr1, int[] arr2, int i1, int i2, boolean[][] memoized, Stack<Integer> stack) {

    if (!memoized[i1][i2]) {
        // arr2 can't be removed from arr1
        return;
    }

    if (i1 == arr1.length) {
        // at this point, instead of printing the variant of arr1 after removing of arr2
        // we can just collect this variant into some other container
        // e.g. allVariants.add(stack.clone())
        System.out.println(stack);
        return;
    }

    if ((i2 < arr2.length) && (arr1[i1] == arr2[i2])) {
        backtrack(arr1, arr2, i1 + 1, i2 + 1, memoized, stack);
    }

    stack.push(arr1[i1]);
    backtrack(arr1, arr2, i1 + 1, i2, memoized, stack);
    stack.pop();
}

JavaScript Implementation of described solution

function remove_bottom_up(base_arr, removed_arr) {

    // Initialize memoization table
    var memoized = new Array(base_arr.length + 1);
    for (var i = 0; i < base_arr.length + 1; i++) {
        memoized[i] = new Array(removed_arr.length + 1);
    }
    memoized[base_arr.length][removed_arr.length] = true;

    // Calculate memoization table 
    for (var i1 = base_arr.length - 1; i1 >= 0; i1--) {
        for (var i2 = removed_arr.length; i2 >= 0; i2--) {
            if ((i2 < removed_arr.length) && (base_arr[i1] == removed_arr[i2])) {
                memoized[i1][i2] = memoized[i1 + 1][i2 + 1];
            }
            memoized[i1][i2] |= memoized[i1 + 1][i2];
        }
    }

    // Collect all variants
    var all_variants = [];
    backtrack(base_arr, removed_arr, 0, 0, memoized, [], all_variants);
    return all_variants;
}

function backtrack(base_arr, removed_arr, i1, i2, memoized, stack, all_variants) {

    if (!memoized[i1][i2]) {
        // arr2 can't be removed from arr1
        return;
    }

    if (i1 == base_arr.length) {
        all_variants.push(stack.slice(0));
        return;
    }

    if ((i2 < removed_arr.length) && (base_arr[i1] == removed_arr[i2])) {
        backtrack(base_arr, removed_arr, i1 + 1, i2 + 1, memoized, stack, all_variants);
    }

    stack.push(base_arr[i1]);
    backtrack(base_arr, removed_arr, i1 + 1, i2, memoized, stack, all_variants);
    stack.pop();
}

console.log(JSON.stringify(remove_bottom_up([1, 2, 1, 3, 1, 4, 4], [1, 4, 4])));
console.log(JSON.stringify(remove_bottom_up([8, 6, 4, 4], [6, 4, 8])));
console.log(JSON.stringify(remove_bottom_up([1, 1, 2], [1])));
console.log(JSON.stringify(remove_bottom_up([1, 1, 1, 1, 1, 1, 1], [1, 1, 1])));
Euphoria answered 21/8, 2016 at 13:42 Comment(0)
C
3

The algorithm:

  1. Recursively build a tree of nodes, starting from the first element in B. Each node's value is the index of the subsequence item matching its level and its descendants are the indices of the next item -- so for [1,2,1,3,1,4,4], [1,4,4] the tree would be [ [ 0, [5, [6]], [6] ], [ 2, [5, [6]], [6] ], [ 4, [5, [6]], [6] ].
  2. Walk this tree and build up subsequences of items to delete, i.e. find all paths in the tree that are as long as the subsequence. This would result in a list like [ [ 0, 5, 6 ], [ 2, 5, 6 ], [ 4, 5, 6 ] ].
  3. For each list thus developed, append the list that results from the elements at those indices being deleted: [ [ 2, 1, 3, 1 ], [ 1, 2, 3, 1 ], [ 1, 2, 1, 3 ] ].

The code to do this, which matches all your test cases:


#!/usr/bin/env node

var _findSubSeqs = function(outer, inner, current) {

    var results = [];
    for (var oi = current; oi < outer.length; oi++) {
        if (outer[oi] == inner[0]) {
            var node = {
                value: oi,
                children: _findSubSeqs(outer, inner.slice(1), oi+1)
            };
            results.push(node);
            }
    }
    return results;
}

var findSubSeqs = function(outer, inner) {
    var results = _findSubSeqs(outer, inner, 0);
    return walkTree(results).filter(function(a) {return (a.length == inner.length)});
}

var _walkTree = function(node) {
    var results = [];
    if (node.children.length) {
        for (var n = 0; n < node.children.length; n++) {
            var res = _walkTree(node.children[n])
            for (r of res) {
                results.push([node.value].concat(r))
            }
        }
    } else {
        return [[node.value]]
    }
    return results
}

var walkTree = function(nds) {
    var results = [];
    for (var i = 0; i < nds.length; i++) {
        results = results.concat(_walkTree(nds[i]))
    }
    return results
}

var removeSubSeq = function(outer, inner) {
    var res = findSubSeqs(outer, inner);
    var subs = [];
    for (r of res) {
        var s = [];
        var k = 0;
        for (var i = 0; i < outer.length; i++) {
            if (i == r[k]) {
                k++;
            } else {
                s.push(outer[i]);
            }
        }
        subs.push(s);
    }
    return subs
}

console.log(removeSubSeq([1,2,1,3,1,4,4], [1,4,4]))
console.log(removeSubSeq([8,6,4,4], [6,4,8]) )
console.log(removeSubSeq([1,1,2], [1]))
Chazan answered 21/8, 2016 at 6:41 Comment(0)
G
2

First I would use string. It's easier to manipulate:

var results = [];

function permute(arr) {
    var cur, memo = [];

    for (var i = 0; i < arr.length; i++) {
        cur = arr.splice(i, 1);
        if (arr.length === 0) {
            results.push(memo.concat(cur));
        }
        permute(arr.slice(), memo.concat(cur));
        arr.splice(i, 0, cur[0]);
    }
    return results;
}

function removeSub(arr, sub) {
    strArray = arr.join(' ');
    if(strArray.includes(sub)){
        return strArray.replace(sub.join(' ')).split(' ');
    }
    return [];
}

function removeSubSeq(arr, sub) {
    return permute(removeSub(arr, sub));
}

I have not commented the code, but do not hesitate to ask for clarification. It's not tested, but the idea is in it...

Grodno answered 21/8, 2016 at 3:59 Comment(1)
Missing closing ) at return permute(removeSub(arr, sub)Explant
P
1

My aim was to create and call functions as little as possible. This seems to work. Could definitely be cleaned up. Something to play around with...

function removeSubSeq( seq, sub ) {

    var arr,
        sub_v,
        sub_i = 0,
        seq_i,
        sub_len = sub.length,
        sub_lenm1 = sub_len - 1,
        seq_len = seq.length,
        pos = {},
        pos_len = [],
        c_pos,
        map_i = [],
        len,
        r_pos,
        sols = [],
        sol;

    do {

        map_i[ sub_i ] = 0;
        sub_v = sub[ sub_i ];

        if( pos[ sub_v ] ) {

            pos_len[ sub_i ] = pos_len[ sub_i - 1 ];
            continue;
        
        }

        arr = pos[ sub_v ] = [];

        c_pos = 0;

        seq_i = seq_len;

        while( seq_i-- ) {

            if( seq[ seq_i ] === sub_v ) {
                
                arr[ c_pos++ ] = seq_i;

            }

        }

        pos_len[ sub_i ] = arr.length;

    } while( ++sub_i < sub_len );

    len = pos[ sub[ 0 ] ].length;

    while( map_i[ 0 ] < len ) {

        sub_i = 0;
        arr = [];

        do {

            r_pos = pos[ sub[ sub_i ] ][ map_i[ sub_i ] ];
            
            if( sub_i && r_pos <= arr[ sub_i - 1] ) break;
            
            arr.push( r_pos );
        
        } while( ++sub_i < sub_len );

        if( sub_i === sub_len ) {
            
            sol = seq.slice( 0 );

            while( sub_i-- ) sol.splice( arr[ sub_i ], 1 );

            sols.push( sol );
        
        }

        sub_i = sub_lenm1;

        while( ++map_i[ sub_i ] === pos_len[ sub_i ] ) {
            if( sub_i === 0 ) break;
            map_i[ sub_i-- ] = 0;
        }

    } while( map_i[ 0 ] < len );

    return sols;

}

console.log(JSON.stringify(removeSubSeq([1,2,1,3,1,4,4], [1,4,4])));
console.log(JSON.stringify(removeSubSeq([8,6,4,4], [6,4,8])));
console.log(JSON.stringify(removeSubSeq([1,1,2], [1])));
console.log(JSON.stringify(removeSubSeq(['a','g','b','a','b','c','c'], ['a','b','c'])));
Parody answered 29/8, 2016 at 14:22 Comment(0)

© 2022 - 2024 — McMap. All rights reserved.