dc.js create dataTable with min max avg values for three columns
Asked Answered
P

2

5

Attempting to create a d3/dc/xfilter dataTable with the min, max and average values for 3 of the columns in the sample data. Been struggling for hours but unable to understand how to integrate the reduceAdd, reduceRemove, reduceInitial functions into the dataTable to create the three necessary rows.

Desired output will look something like this:

------------------------------------------
|  Value   |  Cars  |  Bikes  |  Trucks  |
------------------------------------------
|    Min   |   125  |   310   |    189   |
------------------------------------------
|    Max   |   230  |   445   |    290   |
------------------------------------------
|    Avg   |   178  |   385   |    245   |
------------------------------------------

Also cannot see how to add the first (label) column. I know reduceInitial can return an array (e.g. ['min', 'max', 'avg']) but how to reference the labels from it?

var myCSV = [	
{"shift":"1","date":"01/01/2016/08/00/00","car":"178","truck":"255","bike":"317","moto":"237"},
{"shift":"2","date":"01/01/2016/17/00/00","car":"125","truck":"189","bike":"445","moto":"273"},
{"shift":"3","date":"02/01/2016/08/00/00","car":"140","truck":"219","bike":"328","moto":"412"},
{"shift":"4","date":"02/01/2016/17/00/00","car":"222","truck":"290","bike":"432","moto":"378"},
{"shift":"5","date":"03/01/2016/08/00/00","car":"200","truck":"250","bike":"420","moto":"319"},
{"shift":"6","date":"03/01/2016/17/00/00","car":"230","truck":"220","bike":"310","moto":"413"},
{"shift":"7","date":"04/01/2016/08/00/00","car":"155","truck":"177","bike":"377","moto":"180"},
{"shift":"8","date":"04/01/2016/17/00/00","car":"179","truck":"203","bike":"405","moto":"222"},
{"shift":"9","date":"05/01/2016/08/00/00","car":"208","truck":"185","bike":"360","moto":"195"},
{"shift":"10","date":"05/01/2016/17/00/00","car":"150","truck":"290","bike":"315","moto":"280"},
{"shift":"11","date":"06/01/2016/08/00/00","car":"200","truck":"220","bike":"350","moto":"205"},
{"shift":"12","date":"06/01/2016/17/00/00","car":"230","truck":"170","bike":"390","moto":"400"},
];


dataTable = dc.dataTable('#dataTable');
lc1 = dc.lineChart("#line1");
lc2 = dc.lineChart("#line2");
lc3 = dc.lineChart("#line3");

var dateFormat = d3.time.format("%d/%m/%Y/%H/%M/%S");

myCSV.forEach(function (d) {
	d.date = dateFormat.parse(d.date);
});

myCSV.forEach(function (d) {
	d['car'] = +d['car'];
	d['bike'] = +d['bike'];
	d['moto'] = +d['moto'];
});

//console.log(myCSV);

var facts = crossfilter(myCSV);
var dateDim = facts.dimension(function (d) {return d.date});

var carDim = facts.dimension(function (d) {return d['car']});
var dgCar = dateDim.group().reduceSum(function (d) {return d['car']});

var bikeDim = facts.dimension(function (d) {return d['bike']});
var dgBike = dateDim.group().reduceSum(function (d) {return d['bike']});

var motoDim = facts.dimension(function (d) {return d['moto']});
var dgMoto = dateDim.group().reduceSum(function (d) {return d['moto']});

var minDate = new Date ("2016-01-01T08:00:00.000Z");
var maxDate = new Date ("2016-01-03T17:00:00.000Z");	

var maxY = d3.max(myCSV, function(d) {return d['car']});

function reduceAdd(i,d){ return i+1; }
function reduceRemove(i,d){return i-1; }
function reduceInitial(){ return ['min','max','avg'];}


dataTable
  .width(jsTablWidth)
  .height(400)
  .dimension(dateDim)
  .group( function(d){return '';} )
  .columns([
    {
      label: 'Value',
      format: function(d) { return dateGroup1.reduce(reduceAdd,reduceRemove,reduceInital); }
    },
    {
      label: tSel1.replace(/_/g, " "),
      format: function(d) { return //avg cars ; }
    },
    {
      label: tSel2.replace(/_/g, " "),
      format: function(d) { return //avg bikes ; }
    },
    {
      label: tSel3.replace(/_/g, " "),
      format: function(d) { return //avg moto; }
    }
  ]);


dc.renderAll();
dc.redrawAll();
svg{height:280px;}
<script src="http://cdnjs.cloudflare.com/ajax/libs/d3/3.3.3/d3.min.js"></script>
<script src="http://cdnjs.cloudflare.com/ajax/libs/crossfilter/1.3.1/crossfilter.min.js"></script>
<script src="http://dc-js.github.io/dc.js/js/dc.js"></script>
<link href="http://dc-js.github.io/dc.js/css/dc.css" rel="stylesheet"/>

<svg id="dataTable"></svg>
<svg id="line1"></svg>
<svg id="line2"></svg>
<svg id="line3"></svg>
Perspiratory answered 27/2, 2017 at 0:4 Comment(3)
This is actually not trivial - not only is that a different kind of aggregation from what crossfilter is designed for, but the data table is intended to show rows of raw data. It's not too hard to get it to show aggregations, but then you're also asking to show different aggregations in each row. It's an interesting challenge but I'm not going to take it up today. You can start with this example to learn how to do min/max aggregations.Pizzicato
And this example shows how to use a group with a table to show aggregated data. You'd still need to come up with a group that has three different aggregations in it - I think you can do this with fake groupsPizzicato
@gibberish, you're very kind to throw some bounty points to SergGr. That's very civic of you. Please remember to actually award the bounty by clicking the bounty next to the answer you intended, because it's not automatic in the case where the answers have already been written.Pizzicato
P
5

Okay, hope you're okay with transposing the table across the diagonal, putting the modes of transportation as rows instead of columns. This solution is already pretty wacky without figuring that part out.

sample output

There's really no way to calculate of the min and max except to keep track of all the values. So we're going to use the reductions from the complex reductions example. These actually don't reduce at all, but maintain a sorted array of the filtered rows.

We need a unique key in order to keep the sorted array (so that we remove the correct row. Luckily you have that in the shift field.

So here are those functions, or rather functions that generate reducers given a unique key accessor.

  function groupArrayAdd(keyfn) {
      var bisect = d3.bisector(keyfn);
      return function(elements, item) {
          var pos = bisect.right(elements, keyfn(item));
          elements.splice(pos, 0, item);
          return elements;
      };
  }
  function groupArrayRemove(keyfn) {
      var bisect = d3.bisector(keyfn);
      return function(elements, item) {
          var pos = bisect.left(elements, keyfn(item));
          if(keyfn(elements[pos])===keyfn(item))
              elements.splice(pos, 1);
          return elements;
      };
  }
  function groupArrayInit() {
      return [];
  }

Since these keep references to the entire rows, we only need one group; we'll use more specific accessors when we calculate the metrics below.

Here we want crossfilter.groupAll, which reduces everything to one bin. This is because the rows are not partitioned by any key; every row contributes to all modes of transport:

var filteredRows = facts.groupAll().reduce(
  groupArrayAdd(dc.pluck('shift')),
  groupArrayRemove(dc.pluck('shift')),
  groupArrayInit
);

Now comes the most absurd part. We're going to create the fakest dimension object you ever saw. The important thing is that it's an object with a .bottom() method which dynamically calculates each of the rows:

var fakeDim = {
  bottom: function() {
    return [
      {key: 'Car', value: filteredRows.value(), acc: dc.pluck('car')},
      {key: 'Truck', value: filteredRows.value(), acc: dc.pluck('car')},
      {key: 'Bike', value: filteredRows.value(), acc: dc.pluck('bike')},
      {key: 'Moto', value: filteredRows.value(), acc: dc.pluck('moto')}
    ];
  }
};

Except, wait, that doesn't look like it's doing any calculation at all, just fetching values? And what's that weird acc?

Well we're producing exactly the source data that we need to produce the table rows, and we'll use the format accessors below to actually calculate everything. We'll use the key for the "label column", we'll keep the raw rows in the value member; and we'll supply an accessor acc for computing the metrics.

The data table definition looks like this:

dataTable
  .width(400)
  .height(400)
  .dimension(fakeDim)
  .group( function(d){return '';} )
  .columns([
    {
      label: 'Value',
      format: function(d) { 
        return d.key;
      }
    },
    {
      label: 'Min',
      format: function(d) {
        return d3.min(d.value, d.acc);
      }
    },
    {
      label: 'Max',
      format: function(d) {
        return d3.max(d.value, d.acc);
      }
    },
    {
      label: 'Avg',
      format: function(d) {
        return d3.mean(d.value, d.acc);
      }
    }
  ]);

Here's where all the metrics are finally calculated. We'll have all the rows available, and we have an accessor for each table row. d3-array has handy functions for computing the min, max, and average of an array. Boom, done.

I threw a stacked chart into this fiddle for testing. (I know stacking these values probably makes no sense, it just helps to be able to filter.)

http://jsfiddle.net/gordonwoodhull/g4xqvgvL/21/

Rotating the dataTable

The extra bounty on this reminded me that I never solved the table transposition problem, so I thought I'd take a look, because it's fun. I still think the bounty should go to @SergGr, but here is a solution to transpose the table, based on the categories, the dimension, and the column accessors/formatters.

First, we're going to need the list of categories, so let's structure the categories and field names a little better:

var categories = {  
  Car: 'car',
  Truck: 'truck',
  Bike: 'bike',
  Moto: 'moto'
};

Now the fake dimension can be simplified, because it's generated from this category map:

function fake_dimension(cats) {
  return {
    bottom: function() {
      return Object.keys(cats).map(function(k) {
        return {
          key: k,
          value: filteredRows.value(),
          acc: dc.pluck(cats[k])
        };
      });
    }
  };
}
var fakeDim = fake_dimension(categories);

We need to pull the column definitions out of the chart definition, because we're going to transform them:

var columns = [
  {
    label: 'Value',
    format: function(d) { 
      return d.key;
    }
  },
  {
    label: 'Min',
    format: function(d) {
      return d3.min(d.value, d.acc);
    }
  },
  {
    label: 'Max',
    format: function(d) {
      return d3.max(d.value, d.acc);
    }
  },
  {
    label: 'Avg',
    format: function(d) {
      return d3.mean(d.value, d.acc);
    }
  }
];

Finally, we can write the transposition function:

function transpose_datatable(cats, dim, cols) {
  var cols2 = d3.map(cols, function(col) { // 1
    return col.label;
  });
  return {
    dim: { // 2
      bottom: function() {
        var dall = d3.map(dim.bottom(Infinity), function(row) { // 3
          return row.key;
        });
        return cols.slice(1).map(function(col) { // 4
          var row = {
            label: col.label
          };
          Object.keys(cats).forEach(function(k) {
            row[k] = dall.get(k);
          });
          return row;
        });
      }
    },
    cols: [ // 5
      {
        label: cols[0].label,
        format: function(d) {
          return d.label;
        }
      }
    ].concat(Object.keys(cats).map(function(k) { // 6
      return {
        label: k,
        format: function(d) {
          return cols2.get(d.label).format(d[k]);
        }
      }
    }))
  };
}

var transposed = transpose_datatable(categories, fakeDim, columns)
  1. First, we're going to need a map of the original columns to their definitions, because these will become the rows. We can use a d3.map here, which acts like a well-behaved JavaScript object.
  2. We're going to create a new fake dimension, and a new array of column definitions. The fake dimension only has a .bottom() method, just like the one above.
  3. The definition of .bottom() will need all the original data, indexed by key (category name). So we'll throw that into a d3.map object as well.
  4. Now we can build the fake dimension data. The first column was just the titles (which will now be column headers), so we'll skip that. The data for the row will be the new title (former column label), and a field for each category. The fields are populated with the rows from the original dimension.
  5. The new column definitions need to replace the label, column, and the rest are generated from the category names.
  6. The label for each column is now the category name, and .format() calls the original column's format, fetching the data using the category name.

New screenshot:

screenshot with rotated datatable

Pizzicato answered 3/3, 2017 at 1:13 Comment(3)
Hi Gordon, I am deeply grateful for this answer. It worked out of the box, and was straight-forward to integrate with my code. However, I took considerable time studying SergGr's exceptional answer above, and I apologize for keeping you hanging.Perspiratory
Awesome. Yeah @SergGr's answer is really clever - I had never thought about avoiding the fake dimensions and groups with these virtual crossfilters, definitely something to remember. May come in handy in the future.Pizzicato
@gordon - Thanks for the reminder the other day about not letting the bounty expire. I've been caught by that a couple times already. Apologies for not having had the few seconds to respond when I got your message. "Some days you're the dog..." BTW, your answer is excellent and fully deserved the bounty and checkmark. crashwap chose well (although I understand his sentiments for wanting to also reward SergGr's answer which came at the problem from a quite different direction).Inkhorn
Z
4

This is another solution that produce result closer to the requested ones although with much more code than Gordon's.

Intro

I agree with Gordon that there is no reasonable way to achieve what you want directly with crossfilter. Crossfilter is row oriented and you want to produce multiple rows basing on columns. So the only way is to make some "fake" step. And "fake" step implicitly mean that result will not be updated when the original datasource is changed. I see no way to fix it as crossfilter hides its implementation deatils (such as filterListeners, dataListeners, and removeDataListeners) well enough.

However dc is implemented in such a way that by default after various events all charts are redrawn (because they are all in the same global group). And because of this "fake objects" if properly implemented might be recalculated as well basing on the updated data.

Thus my code contains two implementations for min/max:

  • fast(er) but unsafe if you don't do any additional filter
  • slow(er) but safe in case you want additional filtering

Note that if you used fast but unasfe implementation and do additional filtering, you'll get exceptions and other features might get broken as well.

Code

All the code is available at https://jsfiddle.net/4kcu2ut1/1/. Let's separate it into logical blocks and see them one by one.

First go some helper methods and objects. Each Op object essentially contains methods necessary to pass to reduce + additional optional getOutput if the accumulator contains more data then just result such as the case for avgOp of min/max "safe" ops.

var minOpFast = {
    add: function (acc, el) {
        return Math.min(acc, el);
    },
    remove: function (acc, el) {
        throw new Error("Not supported");
    },
    initial: function () {
        return Number.MAX_VALUE;
    }
};

var maxOpFast = {
    add: function (acc, el) {
        return Math.max(acc, el);
    },
    remove: function (acc, el) {
        throw new Error("Not supported");
    },
    initial: function () {
        return Number.MIN_VALUE;
    }
};


var binarySearch = function (arr, target) {
    var lo = 0;
    var hi = arr.length;
    while (lo < hi) {
        var mid = (lo + hi) >>> 1; // safe int division
        if (arr[mid] === target)
            return mid;
        else if (arr[mid] < target)
            lo = mid + 1;
        else
            hi = mid;
    }
    return lo;
};

var minOpSafe = {
    add: function (acc, el) {
        var index = binarySearch(acc, el);
        acc.splice(index, 0, el);
        return acc;
    },
    remove: function (acc, el) {
        var index = binarySearch(acc, el);
        acc.splice(index, 1);
        return acc;
    },
    initial: function () {
        return [];
    },
    getOutput: function (acc) {
        return acc[0];
    }
};

var maxOpSafe = {
    add: function (acc, el) {
        var index = binarySearch(acc, el);
        acc.splice(index, 0, el);
        return acc;
    },
    remove: function (acc, el) {
        var index = binarySearch(acc, el);
        acc.splice(index, 1);
        return acc;
    },
    initial: function () {
        return [];
    },
    getOutput: function (acc) {
        return acc[acc.length - 1];
    }
};

var avgOp = {
    add: function (acc, el) {
        acc.cnt += 1;
        acc.sum += el;
        acc.avg = acc.sum / acc.cnt;
        return acc;
    },
    remove: function (acc, el) {
        acc.cnt -= 1;
        acc.sum -= el;
        acc.avg = acc.sum / acc.cnt;
        return acc;
    },
    initial: function () {
        return {
            cnt: 0,
            sum: 0,
            avg: 0
        };
    },
    getOutput: function (acc) {
        return acc.avg;
    }
};

Then we prepare source data and specify transformation we want. aggregates is a list of operations from the previous step additionally decorated with key to store temporary data in compound accumulator (it just has to be unique) and label to show in the output. srcKeys contains list of names of properties (all of which must be of the same shape) that will be processed by each operation from the aggregates lits.

var myCSV = [
    {"shift": "1", "date": "01/01/2016/08/00/00", "car": "178", "truck": "255", "bike": "317", "moto": "237"},
    {"shift": "2", "date": "01/01/2016/17/00/00", "car": "125", "truck": "189", "bike": "445", "moto": "273"},
    {"shift": "3", "date": "02/01/2016/08/00/00", "car": "140", "truck": "219", "bike": "328", "moto": "412"},
    {"shift": "4", "date": "02/01/2016/17/00/00", "car": "222", "truck": "290", "bike": "432", "moto": "378"},
    {"shift": "5", "date": "03/01/2016/08/00/00", "car": "200", "truck": "250", "bike": "420", "moto": "319"},
    {"shift": "6", "date": "03/01/2016/17/00/00", "car": "230", "truck": "220", "bike": "310", "moto": "413"},
    {"shift": "7", "date": "04/01/2016/08/00/00", "car": "155", "truck": "177", "bike": "377", "moto": "180"},
    {"shift": "8", "date": "04/01/2016/17/00/00", "car": "179", "truck": "203", "bike": "405", "moto": "222"},
    {"shift": "9", "date": "05/01/2016/08/00/00", "car": "208", "truck": "185", "bike": "360", "moto": "195"},
    {"shift": "10", "date": "05/01/2016/17/00/00", "car": "150", "truck": "290", "bike": "315", "moto": "280"},
    {"shift": "11", "date": "06/01/2016/08/00/00", "car": "200", "truck": "220", "bike": "350", "moto": "205"},
    {"shift": "12", "date": "06/01/2016/17/00/00", "car": "230", "truck": "170", "bike": "390", "moto": "400"},
];

var dateFormat = d3.time.format("%d/%m/%Y/%H/%M/%S");

myCSV.forEach(function (d) {
    d.date = dateFormat.parse(d.date);
    d['car'] = +d['car'];
    d['bike'] = +d['bike'];
    d['moto'] = +d['moto'];
    d['truck'] = +d['truck'];
    d.shift = +d.shift;
});

//console.table(myCSV);

var aggregates = [
    // not compatible with addtional filtering
    /*{
        key: 'min',
        label: 'Min',
        agg: minOpFast
    },**/
    {
        key: 'minSafe',
        label: 'Min Safe',
        agg: minOpSafe
    },
    // not compatible with addtional filtering
    /*{
        key: 'max',
        label: 'Max',
        agg: maxOpFast
    },*/
    {
        key: 'maxSafe',
        label: 'Max Safe',
        agg: maxOpSafe
    },
    {
        key: 'avg',
        agg: avgOp,
        label: 'Average'
    }
];

var srcKeys = ['car', 'bike', 'moto', 'truck'];

And now to the magic. buildTransposedAggregatesDimension is what does all the heavy work here. Essentially it does two steps:

  1. First groupAll to get aggregated data for each combination in a cross product of all operatins and all keys.

  2. Split the mega-object grouped to an array that can be a data-source for another crossfilter

Step #2 is where my "fake" is. It seems to me as much less "fake" than in Gordon's solution as it doesn't rely on any internal details of crossfilter or dc (see bottom method in Gordon's solution).

Also splitting at step #2 is where data is actually transposed to meet your requirements. Obviously, the code can be easily modified to do not do it and produce results in the same way as in Gordon's solution.

Note also that it is important that additional step does no additional calculations and only just transforms already computed values to appropriet format. This is crucial for update after filtering to work because in such ay table bound to the result of buildTransposedAggregatesDimension is still effectively bound to the original crossfilter datasource.

var buildTransposedAggregatesDimension = function (facts, keysList, aggsList) {
    // "grouped" is a single record with all aggregates for all keys computed
    var grouped = facts.groupAll()
            .reduce(
            function add(acc, el) {
                aggsList.forEach(function (agg) {
                    var innerAcc = acc[agg.key];
                    keysList.forEach(function (key) {
                        var v = el[key];
                        innerAcc[key] = agg.agg.add(innerAcc[key], v);
                    });
                    acc[agg.key] = innerAcc;
                });
                return acc;
            },
            function remove(acc, el) {
                aggsList.forEach(function (agg) {
                    var innerAcc = acc[agg.key];
                    keysList.forEach(function (key) {
                        var v = el[key];
                        innerAcc[key] = agg.agg.remove(innerAcc[key], v);
                    });
                    acc[agg.key] = innerAcc;
                });
                return acc;
            },
            function initial() {
                var acc = {};
                aggsList.forEach(function (agg) {
                    var innerAcc = {};
                    keysList.forEach(function (key) {
                        innerAcc[key] = agg.agg.initial();
                    });
                    acc[agg.key] = innerAcc;
                });
                return acc;
            }).value();

    // split grouped back to array with element for each aggregation function
    var groupedAsArr = [];
    aggsList.forEach(function (agg, index) {
        groupedAsArr.push({
            sortIndex: index, // preserve index in aggsList so we can sort by it later
            //agg: agg,
            key: agg.key,
            label: agg.label,
            valuesContainer: grouped[agg.key],
            getOutput: function (columnKey) {
                var aggregatedValueForKey = grouped[agg.key][columnKey];
                return agg.agg.getOutput !== undefined ?
                        agg.agg.getOutput(aggregatedValueForKey) :
                        aggregatedValueForKey;
            }
        })
    });
    return crossfilter(groupedAsArr).dimension(function (el) {   return el;    });
};

Small helper method buildColumns creates columns for each original key in srcKeys + additional column for the label of operations

var buildColumns = function (srcKeys) {
    var columns = [];
    columns.push({
        label: "Aggregate",
        format: function (el) {
            return el.label;
        }
    });
    srcKeys.forEach(function (key) {
        columns.push({
            label: key,
            format: function (el) {
                return el.getOutput(key);
            }
        });
    });
    return columns;
};

So now let's get all together and create a table.

var facts = crossfilter(myCSV);
var aggregatedDimension = buildTransposedAggregatesDimension(facts, srcKeys, aggregates);
dataTable = dc.dataTable('#dataTable'); // put such a <table> in your HTML!

dataTable
        .width(500)
        .height(400)
        .dimension(aggregatedDimension)
        .group(function (d) { return ''; })
        .columns(buildColumns(srcKeys))
        .sortBy(function (el) { return el.sortIndex; })
        .order(d3.ascending);

//dataTable.render();
dc.renderAll();

There is also additional piece of code shamelessly stolen from Gordon to add a line chart for additional filtering.

Zito answered 3/3, 2017 at 2:52 Comment(9)
Main thing I'd suggest here is that you still need to implement .bottom() (yeah, unfortunately), because this will only calculate the aggregates and transposition once, and it needs to react to filtering in other charts. That's really the essential idea behind fake groups and dimensions - wait until the results are needed, then do the calculation, and calculate it anew each time requested.Pizzicato
@Pizzicato I believe that your solution that implements bottom doesn't react to filtering as well because there is no way it can be bound to the original dataset internals that track changes. If you think I'm mistaken, could you provide me an example where it works so I can remove false claims from my answer?Zito
My fiddle has another chart in it to test the filtering. The fake dimension is bound to the original crossfilter simply by calling filteredRows.value()Pizzicato
@Gordon, I had wrong idea about how dc works. And actually it appears that my solution works with filtering perfectly well (if you use safe operations only) without any need for custom bottom. See my updated jsfiddle.net/4kcu2ut1/1Zito
Interesting - looks like your getOutput functions serve a similar purpose - the groupAll is recalculated automatically when the filter changes, and getOutput rereads it. So you set up sort of a virtual crossfilter in buildTransposedAggregatesDimension which is not calculating anything, just provides rows of accessors. Clever!Pizzicato
Sorry for the delay, I've been trying to integrate both solutions and will continue to work on yours, but in all honesty I found Gordon's to be straight-forward to integrate and as of yet i was unable to get your answer working with my code.Perspiratory
I am deeply grateful for your example, and I will be studying all your other answers over the weeks to come.Perspiratory
@crashwap, could you tell where you see problems with integration of my solution? I'm not sure how I can help you without additional details on what bothers you. And of course you still can use Gordon's solution anyway.Zito
@crashwap, It is not bounty that I'm after (at least now ☺). Gordon is definitely much more professional user of dc.js/d3.js and fully deserves his bounty. I'm a bit disappointed that my clever trick (that does produce the result you asked for) appeared to be too clever to be useful. Still, If you have questions regarding it, feel free to ask and I'll try to clarify it.Zito

© 2022 - 2024 — McMap. All rights reserved.