I decided to use Eigen library in my project. But it is not clear from documentation how the most efficiently one should specify an array of 3d vectors.
As I suggest, the first way is
Eigen::Matrix<Eigen::Vector3d, Eigen::Dynamic, 1> array_of_v3d(size);
But in that case how should I get another array which elements are equal to scalar products of elements of array_of_v3d
and some other instance of Vector3d
?
In other words, can I rewrite the following loop using Eigen
's functions:
Eigen::Vector3d v3d(0.5, 0.5, 0.5);
Eigen::VectorXd other_array(size);
for (size_t i = 0; i < size; ++i)
other_array(i) = array_of_v3d(i).dot(v3d);
The second way is to use a matrix which size is (3 x size)
or (size x 3)
.
For example, I can declare it like this:
Eigen::Matrix<double, 3, Eigen::Dynamic> matrix;
But I didn't get from the documentation how to set the number of columns.
The following seems to work but I have to repeat the number of rows 3
twice:
Eigen::Matrix<double, 3, Eigen::Dynamic> matrix(3, size);
Then the above loop is equivalent to
other_array = v3d.transpose() * array_of_v3d;
As my experiments shown that this is a little bit faster than
Eigen::Matrix<double, Eigen::Dynamic, 3> matrix(size, 3);
other_array = array_of_v3d * v3d;
In addition:
Anyway, my use of Eigen
seems to be not so optimal since the same program in plain C
is almost 1.5 times faster (it is not true indeed, it depends on size
):
for (size_t i = 0; i < size; i+=4) {
s[i] += a[i] * x + b[i] * y + c[i] * z;
s[i+1] += a[i+1] * x + b[i+1] * y + c[i+1] * z;
s[i+2] += a[i+2] * x + b[i+2] * y + c[i+2] * z;
s[i+3] += a[i+3] * x + b[i+3] * y + c[i+3] * z;
}
Have I missed something? Is there some other way in the scope of Eigen
library to solve my problem?
Update:
Here I present results of my testings. There are 5 cases:
C
-style for loop which is partially unrolledEigen::Matrix
(rows x cols = 3 x size
). In this case values of 3d vectors are stored together in memory because by defaultEigen
stores data in column-major. Or I could setEigen::RowMajor
and everything else like in next case.Eigen::Matrix
(rows x cols = size x 3
).- Here each component of 3d vector is stored on a separate
VectorXd
. So there are three VectorXd objects which are put together onVector3d
. - An
std::vector
container is used to storeVector3d
objects.
This is my test program
#include <iostream>
#include <iomanip>
#include <vector>
#include <ctime>
#include <Eigen/Dense>
double for_loop(size_t rep, size_t size)
{
std::vector<double> a(size), b(size), c(size);
double x = 1, y = 2, z = - 3;
std::vector<double> s(size);
for(size_t i = 0; i < size; ++i) {
a[i] = i;
b[i] = i;
c[i] = i;
s[i] = 0;
}
double dtime = clock();
for(size_t j = 0; j < rep; j++)
for(size_t i = 0; i < size; i += 8) {
s[i] += a[i] * x + b[i] * y + c[i] * z;
s[i] += a[i+1] * x + b[i+1] * y + c[i+1] * z;
s[i] += a[i+2] * x + b[i+2] * y + c[i+2] * z;
s[i] += a[i+3] * x + b[i+3] * y + c[i+3] * z;
s[i] += a[i+4] * x + b[i+4] * y + c[i+4] * z;
s[i] += a[i+5] * x + b[i+5] * y + c[i+5] * z;
s[i] += a[i+6] * x + b[i+6] * y + c[i+6] * z;
s[i] += a[i+7] * x + b[i+7] * y + c[i+7] * z;
}
dtime = (clock() - dtime) / CLOCKS_PER_SEC;
double res = 0;
for(size_t i = 0; i < size; ++i)
res += std::abs(s[i]);
assert(res == 0.);
return dtime;
}
double eigen_3_size(size_t rep, size_t size)
{
Eigen::Matrix<double, 3, Eigen::Dynamic> A(3, size);
Eigen::Matrix<double, 1, Eigen::Dynamic> S(size);
Eigen::Vector3d X(1, 2, -3);
for(size_t i = 0; i < size; ++i) {
A(0, i) = i;
A(1, i) = i;
A(2, i) = i;
S(i) = 0;
}
double dtime = clock();
for (size_t j = 0; j < rep; j++)
S.noalias() += X.transpose() * A;
dtime = (clock() - dtime) / CLOCKS_PER_SEC;
double res = S.array().abs().sum();
assert(res == 0.);
return dtime;
}
double eigen_size_3(size_t rep, size_t size)
{
Eigen::Matrix<double, Eigen::Dynamic, 3> A(size, 3);
Eigen::Matrix<double, Eigen::Dynamic, 1> S(size);
Eigen::Vector3d X(1, 2, -3);
for(size_t i = 0; i < size; ++i) {
A(i, 0) = i;
A(i, 1) = i;
A(i, 2) = i;
S(i) = 0;
}
double dtime = clock();
for (size_t j = 0; j < rep; j++)
S.noalias() += A * X;
dtime = (clock() - dtime) / CLOCKS_PER_SEC;
double res = S.array().abs().sum();
assert(res == 0.);
return dtime;
}
double eigen_vector3_vector(size_t rep, size_t size)
{
Eigen::Matrix<Eigen::VectorXd, 3, 1> A;
A(0).resize(size);
A(1).resize(size);
A(2).resize(size);
Eigen::VectorXd S(size);
Eigen::Vector3d X(1, 2, -3);
for(size_t i = 0; i < size; ++i) {
A(0)(i) = i;
A(1)(i) = i;
A(2)(i) = i;
S(i) = 0;
}
double dtime = clock();
for (size_t j = 0; j < rep; j++)
S.noalias() += A(0) * X(0) + A(1) * X(1) + A(2) * X(2);
dtime = (clock() - dtime) / CLOCKS_PER_SEC;
double res = S.array().abs().sum();
assert(res == 0.);
return dtime;
}
double eigen_stlvector_vector3(size_t rep, size_t size)
{
std::vector< Eigen::Vector3d,
Eigen::aligned_allocator<Eigen::Vector3d> > A(size);
std::vector<double> S(size);
Eigen::Vector3d X(1, 2, -3);
for(size_t i = 0; i < size; ++i) {
A[i](0) = i;
A[i](1) = i;
A[i](2) = i;
S[i] = 0;
}
double dtime = clock();
for (size_t j = 0; j < rep; j++)
for(size_t i = 0; i < size; i++)
S[i] += X.dot(A[i]);
dtime = (clock() - dtime) / CLOCKS_PER_SEC;
double res = 0;
for(size_t i = 0; i < size; i++)
res += std::abs(S[i]);
assert(res == 0.);
return dtime;
}
int main()
{
std::cout << " size | for loop | Matrix | Matrix | Vector3 of | STL vector of \n"
<< " | | 3 x size | size x 3 | Vector_size | TinyVectors \n"
<< std::endl;
size_t n = 10;
size_t sizes[] = {16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192};
int rep_all = 1024 * 1024 * 1024;
for (int i = 0; i < n; ++i) {
size_t size = sizes[i];
size_t rep = rep_all / size;
double t1 = for_loop (rep, size);
double t2 = eigen_3_size (rep, size);
double t3 = eigen_size_3 (rep, size);
double t4 = eigen_vector3_vector (rep, size);
double t5 = eigen_stlvector_vector3 (rep, size);
using namespace std;
cout << setw(8) << size
<< setw(13) << t1 << setw(13) << t2 << setw(13) << t3
<< setw(14) << t4 << setw(15) << t5 << endl;
}
}
The program was compiled by gcc 4.6
with options -march=native -O2 -msse2 -mfpmath=sse
. On my Athlon 64 X2 4600+
I got a nice table:
size | for loop | Matrix | Matrix | Vector3 of | STL vector of
| | 3 x size | size x 3 | Vector_size | TinyVectors
16 2.23 3.1 3.29 1.95 3.34
32 2.12 2.72 3.51 2.25 2.95
64 2.15 2.52 3.27 2.03 2.74
128 2.22 2.43 3.14 1.92 2.66
256 2.19 2.38 3.34 2.15 2.61
512 2.17 2.36 3.54 2.28 2.59
1024 2.16 2.35 3.52 2.28 2.58
2048 2.16 2.36 3.43 2.42 2.59
4096 11.57 5.35 20.29 13.88 5.23
8192 11.55 5.31 16.17 13.79 5.24
The table shows that good representations of an array of 3d vectors are Matrix
(components of 3d vectors should stored together) and std::vector
of fixed size Vector3d
objects. This confirms Jakob's answer. For big vectors Eigen
indeed shows good results.