I did some benchmarks to checkout which way is quicker, I got the following results (in seconds):
12
30
3
6
23
3
The first line is doing iteration as suggested by @jleahy.
The second line is doing iteration as I've done in my code in the question (the inverse order of @jleahy).
The third line is doing iteration using PlainObjectBase::data()
like this for (int i = 0; i < matrixObject.size(); i++)
.
The other 3 lines repeat the same as the above, but with an temporary as suggest by @lucas92
I also did the same tests but using substituting /if else.*/ for /else/ (no special treatment for sparse matrix) and I got the following (in seconds):
10
27
3
6
24
2
Doing the tests again gave me results pretty similar. I used g++ 4.7.3
with -O3
. The code:
#include <ctime>
#include <iostream>
#include <Eigen/Dense>
using namespace std;
template <typename T, int R, int C>
inline T sum_kahan1(const Eigen::Matrix<T,R,C>& xs) {
if (xs.size() == 0) return 0;
T sumP(0);
T sumN(0);
T tP(0);
T tN(0);
T cP(0);
T cN(0);
T yP(0);
T yN(0);
for (size_t i = 0, nRows = xs.rows(), nCols = xs.cols(); i < nCols; ++i)
for (size_t j = 0; j < nRows; ++j)
{
if (xs(j,i)>0)
{
yP = xs(j,i) - cP;
tP = sumP + yP;
cP = (tP - sumP) - yP;
sumP = tP;
}
else if (xs(j,i)<0)
{
yN = xs(j,i) - cN;
tN = sumN + yN;
cN = (tN - sumN) - yN;
sumN = tN;
}
}
return sumP+sumN;
}
template <typename T, int R, int C>
inline T sum_kahan2(const Eigen::Matrix<T,R,C>& xs) {
if (xs.size() == 0) return 0;
T sumP(0);
T sumN(0);
T tP(0);
T tN(0);
T cP(0);
T cN(0);
T yP(0);
T yN(0);
for (size_t i = 0, nRows = xs.rows(), nCols = xs.cols(); i < nRows; ++i)
for (size_t j = 0; j < nCols; ++j)
{
if (xs(i,j)>0)
{
yP = xs(i,j) - cP;
tP = sumP + yP;
cP = (tP - sumP) - yP;
sumP = tP;
}
else if (xs(i,j)<0)
{
yN = xs(i,j) - cN;
tN = sumN + yN;
cN = (tN - sumN) - yN;
sumN = tN;
}
}
return sumP+sumN;
}
template <typename T, int R, int C>
inline T sum_kahan3(const Eigen::Matrix<T,R,C>& xs) {
if (xs.size() == 0) return 0;
T sumP(0);
T sumN(0);
T tP(0);
T tN(0);
T cP(0);
T cN(0);
T yP(0);
T yN(0);
for (size_t i = 0, size = xs.size(); i < size; i++)
{
if ((*(xs.data() + i))>0)
{
yP = (*(xs.data() + i)) - cP;
tP = sumP + yP;
cP = (tP - sumP) - yP;
sumP = tP;
}
else if ((*(xs.data() + i))<0)
{
yN = (*(xs.data() + i)) - cN;
tN = sumN + yN;
cN = (tN - sumN) - yN;
sumN = tN;
}
}
return sumP+sumN;
}
template <typename T, int R, int C>
inline T sum_kahan1t(const Eigen::Matrix<T,R,C>& xs) {
if (xs.size() == 0) return 0;
T sumP(0);
T sumN(0);
T tP(0);
T tN(0);
T cP(0);
T cN(0);
T yP(0);
T yN(0);
for (size_t i = 0, nRows = xs.rows(), nCols = xs.cols(); i < nCols; ++i)
for (size_t j = 0; j < nRows; ++j)
{
T temporary = xs(j,i);
if (temporary>0)
{
yP = temporary - cP;
tP = sumP + yP;
cP = (tP - sumP) - yP;
sumP = tP;
}
else if (temporary<0)
{
yN = temporary - cN;
tN = sumN + yN;
cN = (tN - sumN) - yN;
sumN = tN;
}
}
return sumP+sumN;
}
template <typename T, int R, int C>
inline T sum_kahan2t(const Eigen::Matrix<T,R,C>& xs) {
if (xs.size() == 0) return 0;
T sumP(0);
T sumN(0);
T tP(0);
T tN(0);
T cP(0);
T cN(0);
T yP(0);
T yN(0);
for (size_t i = 0, nRows = xs.rows(), nCols = xs.cols(); i < nRows; ++i)
for (size_t j = 0; j < nCols; ++j)
{
T temporary = xs(i,j);
if (temporary>0)
{
yP = temporary - cP;
tP = sumP + yP;
cP = (tP - sumP) - yP;
sumP = tP;
}
else if (temporary<0)
{
yN = temporary - cN;
tN = sumN + yN;
cN = (tN - sumN) - yN;
sumN = tN;
}
}
return sumP+sumN;
}
template <typename T, int R, int C>
inline T sum_kahan3t(const Eigen::Matrix<T,R,C>& xs) {
if (xs.size() == 0) return 0;
T sumP(0);
T sumN(0);
T tP(0);
T tN(0);
T cP(0);
T cN(0);
T yP(0);
T yN(0);
for (size_t i = 0, size = xs.size(); i < size; i++)
{
T temporary = (*(xs.data() + i));
if (temporary>0)
{
yP = temporary - cP;
tP = sumP + yP;
cP = (tP - sumP) - yP;
sumP = tP;
}
else if (temporary<0)
{
yN = temporary - cN;
tN = sumN + yN;
cN = (tN - sumN) - yN;
sumN = tN;
}
}
return sumP+sumN;
}
template <typename T, int R, int C>
inline T sum_kahan1e(const Eigen::Matrix<T,R,C>& xs) {
if (xs.size() == 0) return 0;
T sumP(0);
T sumN(0);
T tP(0);
T tN(0);
T cP(0);
T cN(0);
T yP(0);
T yN(0);
for (size_t i = 0, nRows = xs.rows(), nCols = xs.cols(); i < nCols; ++i)
for (size_t j = 0; j < nRows; ++j)
{
if (xs(j,i)>0)
{
yP = xs(j,i) - cP;
tP = sumP + yP;
cP = (tP - sumP) - yP;
sumP = tP;
}
else
{
yN = xs(j,i) - cN;
tN = sumN + yN;
cN = (tN - sumN) - yN;
sumN = tN;
}
}
return sumP+sumN;
}
template <typename T, int R, int C>
inline T sum_kahan2e(const Eigen::Matrix<T,R,C>& xs) {
if (xs.size() == 0) return 0;
T sumP(0);
T sumN(0);
T tP(0);
T tN(0);
T cP(0);
T cN(0);
T yP(0);
T yN(0);
for (size_t i = 0, nRows = xs.rows(), nCols = xs.cols(); i < nRows; ++i)
for (size_t j = 0; j < nCols; ++j)
{
if (xs(i,j)>0)
{
yP = xs(i,j) - cP;
tP = sumP + yP;
cP = (tP - sumP) - yP;
sumP = tP;
}
else
{
yN = xs(i,j) - cN;
tN = sumN + yN;
cN = (tN - sumN) - yN;
sumN = tN;
}
}
return sumP+sumN;
}
template <typename T, int R, int C>
inline T sum_kahan3e(const Eigen::Matrix<T,R,C>& xs) {
if (xs.size() == 0) return 0;
T sumP(0);
T sumN(0);
T tP(0);
T tN(0);
T cP(0);
T cN(0);
T yP(0);
T yN(0);
for (size_t i = 0, size = xs.size(); i < size; i++)
{
if ((*(xs.data() + i))>0)
{
yP = (*(xs.data() + i)) - cP;
tP = sumP + yP;
cP = (tP - sumP) - yP;
sumP = tP;
}
else
{
yN = (*(xs.data() + i)) - cN;
tN = sumN + yN;
cN = (tN - sumN) - yN;
sumN = tN;
}
}
return sumP+sumN;
}
template <typename T, int R, int C>
inline T sum_kahan1te(const Eigen::Matrix<T,R,C>& xs) {
if (xs.size() == 0) return 0;
T sumP(0);
T sumN(0);
T tP(0);
T tN(0);
T cP(0);
T cN(0);
T yP(0);
T yN(0);
for (size_t i = 0, nRows = xs.rows(), nCols = xs.cols(); i < nCols; ++i)
for (size_t j = 0; j < nRows; ++j)
{
T temporary = xs(j,i);
if (temporary>0)
{
yP = temporary - cP;
tP = sumP + yP;
cP = (tP - sumP) - yP;
sumP = tP;
}
else
{
yN = temporary - cN;
tN = sumN + yN;
cN = (tN - sumN) - yN;
sumN = tN;
}
}
return sumP+sumN;
}
template <typename T, int R, int C>
inline T sum_kahan2te(const Eigen::Matrix<T,R,C>& xs) {
if (xs.size() == 0) return 0;
T sumP(0);
T sumN(0);
T tP(0);
T tN(0);
T cP(0);
T cN(0);
T yP(0);
T yN(0);
for (size_t i = 0, nRows = xs.rows(), nCols = xs.cols(); i < nRows; ++i)
for (size_t j = 0; j < nCols; ++j)
{
T temporary = xs(i,j);
if (temporary>0)
{
yP = temporary - cP;
tP = sumP + yP;
cP = (tP - sumP) - yP;
sumP = tP;
}
else
{
yN = temporary - cN;
tN = sumN + yN;
cN = (tN - sumN) - yN;
sumN = tN;
}
}
return sumP+sumN;
}
template <typename T, int R, int C>
inline T sum_kahan3te(const Eigen::Matrix<T,R,C>& xs) {
if (xs.size() == 0) return 0;
T sumP(0);
T sumN(0);
T tP(0);
T tN(0);
T cP(0);
T cN(0);
T yP(0);
T yN(0);
for (size_t i = 0, size = xs.size(); i < size; i++)
{
T temporary = (*(xs.data() + i));
if (temporary>0)
{
yP = temporary - cP;
tP = sumP + yP;
cP = (tP - sumP) - yP;
sumP = tP;
}
else
{
yN = temporary - cN;
tN = sumN + yN;
cN = (tN - sumN) - yN;
sumN = tN;
}
}
return sumP+sumN;
}
int main() {
Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic> test = Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic>::Random(10000,10000);
cout << "start" << endl;
int now;
now = time(0);
sum_kahan1(test);
cout << time(0) - now << endl;
now = time(0);
sum_kahan2(test);
cout << time(0) - now << endl;
now = time(0);
sum_kahan3(test);
cout << time(0) - now << endl;
now = time(0);
sum_kahan1t(test);
cout << time(0) - now << endl;
now = time(0);
sum_kahan2t(test);
cout << time(0) - now << endl;
now = time(0);
sum_kahan3t(test);
cout << time(0) - now << endl;
now = time(0);
sum_kahan1e(test);
cout << time(0) - now << endl;
now = time(0);
sum_kahan2e(test);
cout << time(0) - now << endl;
now = time(0);
sum_kahan3e(test);
cout << time(0) - now << endl;
now = time(0);
sum_kahan1te(test);
cout << time(0) - now << endl;
now = time(0);
sum_kahan2te(test);
cout << time(0) - now << endl;
now = time(0);
sum_kahan3te(test);
cout << time(0) - now << endl;
return 0;
}
PlainObjectBase::data()
like thisfor (int i = 0; i < Acolmajor.size(); i++)
, I didn't new this function existed, maybe it's as fast as simple loops and I don't have to worry if it's a column and row major order matrix. I'll do some benchmarks to check this. Thanks for the answer and link! – Perchloride