Compare blitz++, armadillo, boost::MultiArray
Asked Answered
F

4

36

I did a comparison between blitz++, armadillo, boost::MultiArray with the following code (borrowed from an old post)

#include <iostream>
using namespace std;
#include <windows.h>
#define _SCL_SECURE_NO_WARNINGS
#define BOOST_DISABLE_ASSERTS 
#include <boost/multi_array.hpp>
#include <blitz/array.h>
#include <armadillo>

int main(int argc, char* argv[])
{
    const int X_SIZE = 1000;
    const int Y_SIZE = 1000;
    const int ITERATIONS = 100;
    unsigned int startTime = 0;
    unsigned int endTime = 0;

    // Create the boost array


    //------------------Measure boost Loop------------------------------------------
    {
        typedef boost::multi_array<double, 2> ImageArrayType;
        ImageArrayType boostMatrix(boost::extents[X_SIZE][Y_SIZE]);
        startTime = ::GetTickCount();
        for (int i = 0; i < ITERATIONS; ++i)
        {
            for (int x = 0; x < X_SIZE; ++x)
            {
                for (int y = 0; y < Y_SIZE; ++y)
                {
                    boostMatrix[x][y] = 1.0001;
                }
            }
        }
        endTime = ::GetTickCount();
        printf("[Boost Loop] Elapsed time: %6.3f seconds\n", (endTime - startTime) / 1000.0);
    }
    //------------------Measure blitz Loop-------------------------------------------
    {
        blitz::Array<double, 2> blitzArray( X_SIZE, Y_SIZE );
        startTime = ::GetTickCount();
        for (int i = 0; i < ITERATIONS; ++i)
        {
            for (int x = 0; x < X_SIZE; ++x)
            {
                for (int y = 0; y < Y_SIZE; ++y)
                {
                    blitzArray(x,y) = 1.0001;
                }
            }
        }
        endTime = ::GetTickCount();
        printf("[Blitz Loop] Elapsed time: %6.3f seconds\n", (endTime - startTime) / 1000.0);
    }

    //------------------Measure armadillo loop----------------------------------------
    {
        arma::mat matArray( X_SIZE, Y_SIZE );
        startTime = ::GetTickCount();
        for (int i = 0; i < ITERATIONS; ++i)
        {
            for (int y = 0; y < Y_SIZE; ++y)
            {
                for (int x = 0; x < X_SIZE; ++x)
                {
                    matArray(x,y) = 1.0001;
                }
            }
        }
        endTime = ::GetTickCount();
        printf("[arma  Loop]  Elapsed time: %6.3f seconds\n", (endTime - startTime) / 1000.0);
    }

    //------------------Measure native loop----------------------------------------
    // Create the native array
    {
        double *nativeMatrix = new double [X_SIZE * Y_SIZE];
        startTime = ::GetTickCount();
        for (int i = 0; i < ITERATIONS; ++i)
        {
            for (int y = 0; y < Y_SIZE*X_SIZE; ++y)
            {
                nativeMatrix[y] = 1.0001;
            }
        }
        endTime = ::GetTickCount();
        printf("[Native Loop]Elapsed time: %6.3f seconds\n", (endTime - startTime) / 1000.0);
        delete[] nativeMatrix;
    }

    //------------------Measure boost computation-----------------------------------
    {
        typedef boost::multi_array<double, 2> ImageArrayType;
        ImageArrayType boostMatrix(boost::extents[X_SIZE][Y_SIZE]);
        for (int x = 0; x < X_SIZE; ++x)
        {
            for (int y = 0; y < Y_SIZE; ++y)
            {
                boostMatrix[x][y] = 1.0001;
            }
        }
        startTime = ::GetTickCount();
        for (int i = 0; i < ITERATIONS; ++i)
        {
            for (int x = 0; x < X_SIZE; ++x)
            {
                for (int y = 0; y < Y_SIZE; ++y)
                {
                    boostMatrix[x][y] += boostMatrix[x][y] * 0.5;
                }
            }
        }
        endTime = ::GetTickCount();
        printf("[Boost computation] Elapsed time: %6.3f seconds\n", (endTime - startTime) / 1000.0);
    }

    //------------------Measure blitz computation-----------------------------------
    {
        blitz::Array<double, 2> blitzArray( X_SIZE, Y_SIZE );
        blitzArray = 1.0001;
        startTime = ::GetTickCount();
        for (int i = 0; i < ITERATIONS; ++i)
        {
            blitzArray += blitzArray*0.5;
        }
        endTime = ::GetTickCount();
        printf("[Blitz computation] Elapsed time: %6.3f seconds\n", (endTime - startTime) / 1000.0);
    }

    //------------------Measure armadillo computation-------------------------------
    {
        arma::mat matArray( X_SIZE, Y_SIZE );
        matArray.fill(1.0001);
        startTime = ::GetTickCount();
        for (int i = 0; i < ITERATIONS; ++i)
        {
            //matArray.fill(1.0001);
            matArray += matArray*0.5;
        }
        endTime = ::GetTickCount();
        printf("[arma  computation] Elapsed time: %6.3f seconds\n", (endTime - startTime) / 1000.0);
    }

    //------------------Measure native computation------------------------------------------
    // Create the native array
    {
        double *nativeMatrix = new double [X_SIZE * Y_SIZE];
        for (int y = 0; y < Y_SIZE*X_SIZE; ++y)
        {
            nativeMatrix[y] = 1.0001;
        }
        startTime = ::GetTickCount();
        for (int i = 0; i < ITERATIONS; ++i)
        {
            for (int y = 0; y < Y_SIZE*X_SIZE; ++y)
            {
                nativeMatrix[y] += nativeMatrix[y] * 0.5;
            }
        }
        endTime = ::GetTickCount();
        printf("[Native computation]Elapsed time: %6.3f seconds\n", (endTime - startTime) / 1000.0);
        delete[] nativeMatrix;
    }

    return 0;
}

On windows, VS2010, results are

[Boost Loop] Elapsed time:  1.217 seconds
[Blitz Loop] Elapsed time:  0.046 seconds
[arma  Loop]  Elapsed time:  0.078 seconds
[Native Loop]Elapsed time:  0.172 seconds
[Boost computation] Elapsed time:  2.152 seconds
[Blitz computation] Elapsed time:  0.156 seconds
[arma  computation] Elapsed time:  0.078 seconds
[Native computation]Elapsed time:  0.078 seconds

On windows, intel c++, results are

[Boost Loop] Elapsed time:  0.468 seconds
[Blitz Loop] Elapsed time:  0.125 seconds
[arma  Loop]  Elapsed time:  0.046 seconds
[Native Loop]Elapsed time:  0.047 seconds
[Boost computation] Elapsed time:  0.796 seconds
[Blitz computation] Elapsed time:  0.109 seconds
[arma  computation] Elapsed time:  0.078 seconds
[Native computation]Elapsed time:  0.062 seconds

Something strange:

(1) with VS2010, native computation (including loop) is faster than native loop
(2) blitz loop behave so different under VS2010 and intel C++. 

To compile blitz++ with intel c++ compiler, a file called bzconfig.h is required in blitz/intel/ folder. But there isn't. I just copy the one in blitz/ms/bzconfig.h in. That may give an non-optimal configuration. Anyone can tell me how to compile blitz++ with intel c++ compiler? In the manual, it said run bzconfig script to get the right bzconfig.h. But I don't understand what it means.

Thanks a lot!

Add some of my conclusion:

1. Boost multi array is the slowest.
2. With intel c++ compiler, native pointers are very fast.
3. With intel c++ compiler,  armadillo can achieve the performance of native pointers.
4. Also test eigen, it is x0% slower than armadillo in my simple cases.
5. Curious about blitz++'s behavior in intel c++ compiler with proper configuration.
   Please see my question.
Fibrilla answered 19/1, 2013 at 13:36 Comment(7)
Note that by default Armadillo has bounds checks enabled (as well as other useful sanity checks). The reasoning is to first get your algorithm right, then optimise it. You can get faster speeds from Armadillo by disabling the bounds checks, as documented here and here. Basically it amounts to defining ARMA_NO_DEBUG before including the armadillo header, in a similar manner to BOOST_DISABLE_ASSERTS.Kinghood
How you tried changing the order, so it reads, native, arma, blitz, boost.Nerynesbit
So, your only question's "Anyone can tell me how to compile blitz++ with intel c++ compiler?". If so add detail, and we don't need the rest - it belongs in a blog post, or perhaps on codereview.stackexchange.com if you're soliciting feedback.Quita
I'm voting to close this question as off-topic because it seeks build help, and analogous to the expectations for debugging help, I'd expect a minimal and clear problem statement. As is, the problem's buried in unrelated code and observations.Quita
What was optimization level? Very often there is sensible difference at -O0 and no difference at -O3. Also, your test takes only couple of seconds, so you might want to increase number of iterations, to get more precious results. I think 10000 iterations is a reasonable number. It would take couple of minutes, but outcome will be more reliableMcilroy
Did you replace the default BLAS and LAPACK libs for arma by defining ARMA_USE_LAPACK and ARMA_USE_BLAS in config.hpp? with OpenBLAS you get multithreaded performance. The test of using single assigns to the elements is IMHO not a good one, as it´s a very inefficient way to fill a matrix, and the libraries will not be optimized for that.Tsai
Many of these posts, including the one you refer to, suffer from not comparing like with like. The latest reply to your cited post #447366 shows that for a comparison optimised towards boost, multi_array can be made to perform on a par with a native array implementation.Essequibo
L
5

Short answer: ./configure CXX=icpc, found by reading the Blitz++ User's Guide.

Long answer:

To compile blitz++ with intel c++ compiler, a file called bzconfig.h is required in blitz/intel/ folder. But there isn't.

Yes and yes. Blitz++ is supposed to generate the file itself. According to the Blitz++ User's Guide blitz.pdf included in blitz-0.10.tar.gz, section "Installation",

Blitz++ uses GNU Autoconf, which handles rewriting Makefiles for various platforms and compilers.

More accurately, Blitz++ uses the GNU autotools tool chain (automake, autoconf, configure), which can generate makefiles, configure scripts, header files and more. The bzconfig.h files are supposed to be generated by the configure script, which comes with Blitz++, ready to use.

I just copy the one in blitz/ms/bzconfig.h in. That may give an non-optimal configuration.

If "non-optimal" means "non-working" to you, then yes. :-) You need an intel/bzconfig.h that accurately represents your compiler.

Anyone can tell me how to compile blitz++ with intel c++ compiler?

Read and follow the fine manual, in particular the section "Installation" mentioned above.

go into the ‘blitz-VERSION’ directory, and type: ./configure CXX=[compiler] where [compiler] is one of xlc++, icpc, pathCC, xlC, cxx, aCC, CC, g++, KCC, pgCC or FCC. (If you do not choose a C++ compiler, the configure script will attempt to find an appropriate compiler for the current platform.)

Have you done this? For the Intel compiler, you would need to use ./configure CXX=icpc.

In the manual, it said run bzconfig script to get the right bzconfig.h. But I don't understand what it means.

I assume that by "it" you mean "that". What do you mean by "manual"? My copy of the Blitz++ User's Guide does not mention bzconfig. Are you sure that you are using the manual that corresponds to your Blitz++ version?

PS: Looking for "bzconfig" in the contents of blitz-0.10, it looks like "bzconfig" is no longer part of Blitz++, but used to be:

find . -name bzconfig -> No results

find . -print0 | xargs -0 grep -a -i -n -e bzconfig:

./blitz/compiler.h:44:    #error  In <blitz/config.h>: A working template implementation is required by Blitz++ (you may need to rerun the compiler/bzconfig script)

That needs to be updated.

./blitz/gnu/bzconfig.h:4:/* blitz/gnu/bzconfig.h. Generated automatically at end of configure. */
./configure.ac:159:# autoconf replacement of bzconfig

There you have it, these bzconfig.h files should be generated by configure.

./ChangeLog.1:1787: will now replace the old file that was generate with the bzconfig

That may be the change that switched to autoconf.

./INSTALL:107:  2. Go into the compiler subdirectory and run the bzconfig

That needs to be updated. Is this what made you look for bzconfig?

./README:27:compiler      Compiler tests (used with obsolete bzconfig script)  

Needs updating, a compiler directory is no longer included.

Latish answered 22/10, 2015 at 12:23 Comment(0)
I
5

As far as I can tell, you are judging the performance of each matrix library by measuring the speed of multiplying a single matrix by a scalar. Due to its template-based policy, Armadillo will do a very good job at this by breaking down each multiply into parallelizable code for most compilers.

But I suggest you need to rethink your test scope and methodology. For example, you've left out every BLAS implementation. The BLAS function you'd need would be dscal. A vendor-provided implementation for your specific CPU would probably do a good job.

More relevantly, there are many more things any reasonable vector library would need to be able to do: matrix multiplies, dot products, vector lengths, transposes, and so forth, which aren't addressed by your test. Your test addresses exactly two things: element assignment, which practically speaking is never a bottleneck for vector libraries, and scalar/vector multiplication, which is a BLAS level 1 function provided by every CPU manufacturer.

There is a discussion of BLAS level 1 vs. compiler-emitted code here.

tl:dr; use Armadillo with BLAS and LAPACK native libraries linked in for your platform.

Ivatts answered 22/12, 2015 at 21:45 Comment(1)
Use of language such as "Grown-ups discuss..." is rather condescending and unprofessional in appearance.Microsecond
N
2

My test showed boost arrays had the same performance as the native/hardcoded C++ code.

You need to compare them using compiler optimisations activated. That is: -O3 -DNDEBUG -DBOOST_UBLAS_NDEBUG -DBOOST_DISABLE_ASSERTS -DARMA_NO_DEBUG ... When I tested (em++), Boost performed at least 10X faster when you deactivate its asserts, enable level 3 optimisation using -O3, etc. Any fair comparison should use these flags.

Nth answered 8/6, 2016 at 9:51 Comment(0)
F
0

Out of curiosity, I am revisiting this speed test.

I added tests for the Eigen library and my own library, MULTI (https://gitlab.com/correaa/boost-multi)

The full code, adapted to Linux, is below at the end of the post.

Having implemented a multidimensional array library, it is important to consider that most of these libraries come with bounds checking that can be optionally disabled.

Also, it is important to note that timing measurements require, at least, that we prevent the compiler from optimizing away code without side effects. For this reason, I added a doNotOptimizeAway function that prevents this. (See its code at the end.) It is also important to know where to call this function from (inside the repetition loop, I believe).

These are the results:

Without optimization (this is not a good test; it just shows how bad it can be to run these measurements in debug mode)

$ g++ a.cpp && ./a.out
[Boost Loop] Elapsed time:  7.216 seconds
[Blitz Loop] Elapsed time:  1.151 seconds
[arma  Loop]  Elapsed time:  0.747 seconds
[Native Loop] Elapsed time:  0.319 seconds
[EIGEN Loop] Elapsed time:  9.022 seconds
[MULTI Loop] Elapsed time:  7.769 seconds

[Boost computation] Elapsed time: 15.456 seconds
[Blitz computation] Elapsed time:  4.441 seconds
[arma  computation] Elapsed time:  0.662 seconds
[Native computation] Elapsed time:  0.340 seconds
[EIGEN computation] Elapsed time: 18.714 seconds
[MULTI computation] Elapsed time: 15.434 seconds

With optimization:

$ sudo cpupower frequency-set --governor performance
$ g++ -O3 -DNDEBUG a.cpp && sudo nice -n -10 ./a.out
[Boost Loop] Elapsed time:  0.024 seconds
[Blitz Loop] Elapsed time:  0.027 seconds
[arma  Loop]  Elapsed time:  0.052 seconds
[Native Loop] Elapsed time:  0.023 seconds
[EIGEN Loop] Elapsed time:  0.138 seconds
[MULTI Loop] Elapsed time:  0.031 seconds

[Boost computation] Elapsed time:  0.059 seconds
[Blitz computation] Elapsed time:  0.054 seconds
[arma  computation] Elapsed time:  0.061 seconds
[Native computation] Elapsed time:  0.058 seconds
[EIGEN computation] Elapsed time:  0.196 seconds
[MULTI computation] Elapsed time:  0.060 seconds

With fast math:

$ g++ -O3 -DNDEBUG a.cpp && sudo nice -n -10 ./a.out
[Boost Loop] Elapsed time:  0.024 seconds
[Blitz Loop] Elapsed time:  0.028 seconds
[arma  Loop]  Elapsed time:  0.051 seconds
[Native Loop] Elapsed time:  0.024 seconds
[EIGEN Loop] Elapsed time:  0.139 seconds
[MULTI Loop] Elapsed time:  0.027 seconds

[Boost computation] Elapsed time:  0.040 seconds
[Blitz computation] Elapsed time:  0.033 seconds
[arma  computation] Elapsed time:  0.043 seconds
[Native computation] Elapsed time:  0.039 seconds
[EIGEN computation] Elapsed time:  0.148 seconds
[MULTI computation] Elapsed time:  0.041 seconds

With macros to disable bound checks mentioned around here:

$ sudo cpupower frequency-set --governor performance
$ g++ -march=native -mtune=native -Ofast -DNDEBUG -DARMA_NO_DEBUG -DBOOST_DISABLE_ASSERTS-DARMA_USE_LAPACK -DARMA_USE_BLAS -DBOOST_UBLAS_NDEBUG -DARMA_NO_DEBUG -DEIGEN_NO_DEBUG a.cpp && sudo nice -n -10 ./a.out
[Boost Loop] Elapsed time:  0.024 seconds
[Blitz Loop] Elapsed time:  0.024 seconds
[arma  Loop]  Elapsed time:  0.022 seconds
[Native Loop] Elapsed time:  0.022 seconds
[EIGEN Loop] Elapsed time:  0.131 seconds
[MULTI Loop] Elapsed time:  0.024 seconds

[Boost computation] Elapsed time:  0.025 seconds
[Blitz computation] Elapsed time:  0.023 seconds
[arma  computation] Elapsed time:  0.045 seconds
[Native computation] Elapsed time:  0.024 seconds
[EIGEN computation] Elapsed time:  0.132 seconds
[MULTI computation] Elapsed time:  0.024 seconds

With clang the results are a bit puzzling (my library and Blitz do worst!)

$ clang++ -std=c++17 -march=native -mtune=native -Ofast -DNDEBUG -DARMA_NO_DEBUG -DBOOST_DISABLE_ASSERTS-DARMA_USE_LAPACK -DARMA_USE_BLAS -DBOOST_UBLAS_NDEBUG -DARMA_NO_DEBUG -DEIGEN_NO_DEBUG a.cpp && sudo nice -n -10 ./a.out
[Boost Loop] Elapsed time:  0.025 seconds
[Blitz Loop] Elapsed time:  0.024 seconds
[arma  Loop]  Elapsed time:  0.023 seconds
[Native Loop] Elapsed time:  0.023 seconds
[EIGEN Loop] Elapsed time:  0.132 seconds
[MULTI Loop] Elapsed time:  0.069 seconds

[Boost computation] Elapsed time:  0.024 seconds
[Blitz computation] Elapsed time:  0.055 seconds
[arma  computation] Elapsed time:  0.023 seconds
[Native computation] Elapsed time:  0.023 seconds
[EIGEN computation] Elapsed time:  0.124 seconds
[MULTI computation] Elapsed time:  0.087 seconds

So, there you go. It seems that these days, the most important factor is to know how to compile with optimization, disable debugging, choose the compiler, and know how to write timing tests rather than the library chosen (at least for these simple use patterns).

Eigen is still an outlier here; please let me know if somebody knows the correct options to compile with Eigen.

More details of my Ubuntu 23.04 machine:

g++ (Ubuntu 12.3.0-1ubuntu1~23.04) 12.3.0
clang version 15.0.7
libboost1.81-all-dev/lunar 1.81.0-4build2 amd64
libblitz0-dev/lunar,now 1:1.0.2+ds-4 amd64 [installed]
libarmadillo-dev/lunar,now 1:11.4.2+dfsg-1 amd64 [installed]
libeigen3-dev/lunar,now 3.4.0-4 all [installed]
MULTI v0.80.1 from https://gitlab.com/correaa/boost-multi

CPU: Intel® Core™ i7-9750H × 12


Full code:

#include <iostream>
using namespace std;
#define _SCL_SECURE_NO_WARNINGS
#define BOOST_DISABLE_ASSERTS
#include <boost/multi_array.hpp>
#include <blitz/array.h>
#include <armadillo>
#include <eigen3/Eigen/Dense>
#include "/home/correaa/boost-multi/include/multi/array.hpp"

#include <chrono>
auto GetTickCount() {return chrono::steady_clock::now();}

template <class T>
void doNotOptimizeAway(T&& t) {
    __asm__ __volatile__ ("" :: "g" (t));
}

int main(int argc, char* argv[])
{
    const int X_SIZE = 1000;
    const int Y_SIZE = 1000;
    double factor = 10;  // had to do more iteration to get more steady results, the timings are normalized still
    const int ITERATIONS = 100*10;
    auto startTime = ::GetTickCount();
    auto endTime = ::GetTickCount();

    // Create the boost array


    //------------------Measure boost Loop------------------------------------------
    {
        typedef boost::multi_array<double, 2> ImageArrayType;
        ImageArrayType boostMatrix(boost::extents[X_SIZE][Y_SIZE]);
        startTime = ::GetTickCount();
        for (int i = 0; i < ITERATIONS; ++i)
        {
            for (int x = 0; x < X_SIZE; ++x)
            {
                for (int y = 0; y < Y_SIZE; ++y)
                {
                    boostMatrix[x][y] = 1.0001;
                }
            }
            doNotOptimizeAway(boostMatrix);
        }
        endTime = ::GetTickCount();
        printf("[Boost Loop] Elapsed time: %6.3f seconds\n", chrono::duration_cast<chrono::nanoseconds>(endTime - startTime).count() / 1000000000.0 / factor);
    }
    //------------------Measure blitz Loop-------------------------------------------
    {
        blitz::Array<double, 2> blitzArray( X_SIZE, Y_SIZE );
        startTime = ::GetTickCount();
        for (int i = 0; i < ITERATIONS; ++i)
        {
            for (int x = 0; x < X_SIZE; ++x)
            {
                for (int y = 0; y < Y_SIZE; ++y)
                {
                    blitzArray(x,y) = 1.0001;
                }
            }
            doNotOptimizeAway(blitzArray);
        }
        endTime = ::GetTickCount();
        printf("[Blitz Loop] Elapsed time: %6.3f seconds\n", chrono::duration_cast<chrono::nanoseconds>(endTime - startTime).count() / 1000000000.0 / factor);
    }

    //------------------Measure armadillo loop----------------------------------------
    {
        arma::mat matArray( X_SIZE, Y_SIZE );
        startTime = ::GetTickCount();
        for (int i = 0; i < ITERATIONS; ++i)
        {
            for (int y = 0; y < Y_SIZE; ++y)
            {
                for (int x = 0; x < X_SIZE; ++x)
                {
                    matArray(x,y) = 1.0001;
                }
            }
            doNotOptimizeAway(matArray);
        }
        endTime = ::GetTickCount();
        printf("[arma  Loop]  Elapsed time: %6.3f seconds\n", chrono::duration_cast<chrono::nanoseconds>(endTime - startTime).count() / 1000000000.0 / factor);
    }

    //------------------Measure native loop----------------------------------------
    // Create the native array
    {
        double *nativeMatrix = new double [X_SIZE * Y_SIZE];
        startTime = ::GetTickCount();
        for (int i = 0; i < ITERATIONS; ++i)
        {
            for (int y = 0; y < Y_SIZE*X_SIZE; ++y)
            {
                nativeMatrix[y] = 1.0001;
            }
            doNotOptimizeAway(nativeMatrix);
        }
        endTime = ::GetTickCount();
        printf("[Native Loop] Elapsed time: %6.3f seconds\n", chrono::duration_cast<chrono::nanoseconds>(endTime - startTime).count() / 1000000000.0 / factor);
        delete[] nativeMatrix;
    }
    //------------------Measure EIGEN Loop------------------------------------------
    {
        typedef Eigen::MatrixXd ImageArrayType;
        ImageArrayType eigenMatrix(X_SIZE, Y_SIZE);
        startTime = ::GetTickCount();
        for (int i = 0; i < ITERATIONS; ++i)
        {
            for (int x = 0; x < X_SIZE; ++x)
            {
                for (int y = 0; y < Y_SIZE; ++y)
                {
                    eigenMatrix(x, y) = 1.0001;
                }
            }
            doNotOptimizeAway(eigenMatrix);
        }
        endTime = ::GetTickCount();
        printf("[EIGEN Loop] Elapsed time: %6.3f seconds\n", chrono::duration_cast<chrono::nanoseconds>(endTime - startTime).count() / 1000000000.0 / factor);
    }
    //------------------Measure Multi Loop------------------------------------------
    {
        typedef boost::multi::array<double, 2> ImageArrayType;
        ImageArrayType multiMatrix({X_SIZE, Y_SIZE});
        startTime = ::GetTickCount();
        for (int i = 0; i < ITERATIONS; ++i)
        {
            for (int x = 0; x < X_SIZE; ++x)
            {
                for (int y = 0; y < Y_SIZE; ++y)
                {
                    multiMatrix[x][y] = 1.0001;
                }
            }
            doNotOptimizeAway(multiMatrix);
        }
        endTime = ::GetTickCount();
        printf("[MULTI Loop] Elapsed time: %6.3f seconds\n", chrono::duration_cast<chrono::nanoseconds>(endTime - startTime).count() / 1000000000.0 / factor);
    }


    //------------------Measure boost computation-----------------------------------
    {
        typedef boost::multi_array<double, 2> ImageArrayType;
        ImageArrayType boostMatrix(boost::extents[X_SIZE][Y_SIZE]);
        for (int x = 0; x < X_SIZE; ++x)
        {
            for (int y = 0; y < Y_SIZE; ++y)
            {
                boostMatrix[x][y] = 1.0001;
            }
        }
        startTime = ::GetTickCount();
        for (int i = 0; i < ITERATIONS; ++i)
        {
            for (int x = 0; x < X_SIZE; ++x)
            {
                for (int y = 0; y < Y_SIZE; ++y)
                {
                    boostMatrix[x][y] += boostMatrix[x][y] * 0.5;
                }
            }
            doNotOptimizeAway(boostMatrix);
        }
        endTime = ::GetTickCount();
        printf("[Boost computation] Elapsed time: %6.3f seconds\n", chrono::duration_cast<chrono::nanoseconds>(endTime - startTime).count() / 1000000000.0 / factor);
    }

    //------------------Measure blitz computation-----------------------------------
    {
        blitz::Array<double, 2> blitzArray( X_SIZE, Y_SIZE );
        blitzArray = 1.0001;
        doNotOptimizeAway(blitzArray);
        startTime = ::GetTickCount();
        for (int i = 0; i < ITERATIONS; ++i)
        {
            blitzArray += blitzArray*0.5;
            doNotOptimizeAway(blitzArray);
        }
        endTime = ::GetTickCount();
        printf("[Blitz computation] Elapsed time: %6.3f seconds\n", chrono::duration_cast<chrono::nanoseconds>(endTime - startTime).count() / 1000000000.0 / factor);
    }

    //------------------Measure armadillo computation-------------------------------
    {
        arma::mat matArray( X_SIZE, Y_SIZE );
        matArray.fill(1.0001);
        doNotOptimizeAway(matArray);
        startTime = ::GetTickCount();
        for (int i = 0; i < ITERATIONS; ++i)
        {
            //matArray.fill(1.0001);
            matArray += matArray*0.5;
            doNotOptimizeAway(matArray);
        }
        endTime = ::GetTickCount();
        printf("[arma  computation] Elapsed time: %6.3f seconds\n", chrono::duration_cast<chrono::nanoseconds>(endTime - startTime).count() / 1000000000.0 / factor);
    }

    //------------------Measure native computation------------------------------------------
    // Create the native array
    {
        double *nativeMatrix = new double [X_SIZE * Y_SIZE];
        for (int y = 0; y < Y_SIZE*X_SIZE; ++y)
        {
            nativeMatrix[y] = 1.0001;
        }
        doNotOptimizeAway(nativeMatrix);
        startTime = ::GetTickCount();
        for (int i = 0; i < ITERATIONS; ++i)
        {
            for (int y = 0; y < Y_SIZE*X_SIZE; ++y)
            {
                nativeMatrix[y] += nativeMatrix[y] * 0.5;
            }
            doNotOptimizeAway(nativeMatrix);
        }
        endTime = ::GetTickCount();
        printf("[Native computation] Elapsed time: %6.3f seconds\n", chrono::duration_cast<chrono::nanoseconds>(endTime - startTime).count() / 1000000000.0 / factor);
        delete[] nativeMatrix;
    }
    //------------------Measure EIGEN computation-----------------------------------
    {
        typedef Eigen::MatrixXd ImageArrayType;
        ImageArrayType eigenMatrix(X_SIZE, Y_SIZE);
        for (int x = 0; x < X_SIZE; ++x)
        {
            for (int y = 0; y < Y_SIZE; ++y)
            {
                eigenMatrix(x, y) = 1.0001;
            }
        }
        doNotOptimizeAway(eigenMatrix);
        startTime = ::GetTickCount();
        for (int i = 0; i < ITERATIONS; ++i)
        {
            for (int x = 0; x < X_SIZE; ++x)
            {
                for (int y = 0; y < Y_SIZE; ++y)
                {
                    eigenMatrix(x, y) += eigenMatrix(x, y) * 0.5;
                }
            }
            doNotOptimizeAway(eigenMatrix);
        }
        endTime = ::GetTickCount();
        printf("[EIGEN computation] Elapsed time: %6.3f seconds\n", chrono::duration_cast<chrono::nanoseconds>(endTime - startTime).count() / 1000000000.0 / factor);
    }
    //------------------Measure MULTI computation-----------------------------------
    {
        typedef boost::multi::array<double, 2> ImageArrayType;
        ImageArrayType multiMatrix({X_SIZE, Y_SIZE});
        for (int x = 0; x < X_SIZE; ++x)
        {
            for (int y = 0; y < Y_SIZE; ++y)
            {
                multiMatrix[x][y] = 1.0001;
            }
        }
        doNotOptimizeAway(multiMatrix);
        startTime = ::GetTickCount();
        for (int i = 0; i < ITERATIONS; ++i)
        {
            for (int x = 0; x < X_SIZE; ++x)
            {
                for (int y = 0; y < Y_SIZE; ++y)
                {
                    multiMatrix[x][y] += multiMatrix[x][y] * 0.5;
                }
            }
            doNotOptimizeAway(multiMatrix);
        }
        endTime = ::GetTickCount();
        printf("[MULTI computation] Elapsed time: %6.3f seconds\n", chrono::duration_cast<chrono::nanoseconds>(endTime - startTime).count() / 1000000000.0 / factor);
    }

    return 0;
}
Frequency answered 31/8, 2023 at 8:35 Comment(0)

© 2022 - 2024 — McMap. All rights reserved.