How to generate a rolling std line chart in dc.js/reductio/crossfilter

myCrossfilter = crossfilter(data); function getRunningDates(numDays) { return function getDates(d) { var s = d.ValueDate; var e = new Date(s); e.setDate(e.getDate() + numDays); a = []; while (s < e) { a.push(s); s = new Date(s.setDate( s.getDate() + 1 )) } return a; } } var dim1 = myCrossfilter.dimension(getRunningDates(20), true); var dim2 = myCrossfilter.dimension(dc.pluck("ValueDate")); var group1 = dim1.group(); var group2 = dim2.group(); var reducerRolling = reductio() .std("value"); reducerRolling(group1); var reducer = reductio() .sum("value") reducer(group2);

OK so I've come up with a solution based on the 'fake group' approach suggested by Gordon.

I have updated the jsFiddle with a working version.

The gist of it is define custom reducing functions :

reduceAddRunning = function(p,v) {
    if (!p.datesData.hasOwnProperty(v.ValueDate)) {
        p.datesData[v.ValueDate]=0;
    }
    p.datesData[v.ValueDate]+=+v.value;
    p.value+=+v.value;
    return(p);
};
reduceRemoveRunning = function(p,v) {
    p.datesData[v.ValueDate]-=+v.value;
    p.value-=+v.value;
    return(p);
};
reduceInitRunning = function(p,v) {
    return({
        value:0,
        datesData:{},
    });
};

and then build a fake group as such :

var running_group = function (source_group,theRunningFn) {
    return {
        all:function () {
            return source_group.all().map(function(d) {
                var arr = [];
                for (var date in d.value.datesData) {
                    if (d.value.datesData.hasOwnProperty(date)) {
                        arr.push(d.value.datesData[date]);
                    }
                }
                return {key:d.key, value:theRunningFn(arr)};
            });
        }
    };
}

with theRunningFn being math.std in my case.

I am still left with 2 issues which will be the basis for a new question I guess :

This is quite slow. Happy to hear suggestions to speed it up. (My graph updates used to be snappy they are now slowish. Still usable but slowish)
I do not know how to handle the edge cases. The values shown at the beginning of the time series do not make sense as they are based on less history. Same issue applies when I filter the data by dates.

EDIT : the following is a better solution based on Gordon comment (again!).

Just do a regular sum group and apply the following fake group function :

var running_group_2 = function (source_group,numDays,theRunningFn) {
return {
    all:function () {
        var source_arr = source_group.all();
        var keys = source_arr.map(function(d) {return d.key;});
        var values = source_arr.map(function(d) {return d.value;});
        var output_arr = [];

        for (var i = numDays;i<source_arr.length;i++) {
            if (i<numDays) {
                output_arr.push({key:keys[i],value:0});
            } else {
                output_arr.push({
                    key:keys[i],
                    value:theRunningFn(values.slice(i-numDays,i))
                });
            }
        }
        return output_arr;
    }
};
}

It solves both the speed issue (as it's much less cumbersome and doesn't store all the daily values to be used, instead using the already aggregated values) and the edge cases (even if it's not easily generalizable beyond my case as far as the edge cases are concerned : I juts don't show a value when I don't have enough points to calculate the running variable).

Here is the jsFiddle for that second (better for my purposes) solution.

Recommended topics

Hot tags