#include #include #include // Compute C=AB using namespace std; typedef float real_type; void print_time(const char* st, const boost::timer& timer, real_type* c, unsigned N) { double time= timer.elapsed(), mflops= 2*N*N*N / time / 1e6; cout << st << time <<"s, " << mflops << " MFLOPS\n"; if (fabs(c[3*N+7] - 6.0 * N) > 0.01) cout << "wrong result\n"; } int main(int argc, char** argv) { unsigned M= 512, N= M, L= M; if (argc > 1) { M= N= L= atoi(argv[1]); } real_type *a= new real_type[M*N], *b= new real_type[N*L], *c= new real_type[M*L]; fill(&a[0], &a[M*N], 2.0); fill(&b[0], &b[N*L], 3.0); boost::timer t1; fill(&c[0], &c[M*L], 0.0); for (unsigned i= 0; i < M; ++i) for (unsigned j= 0; j < N; ++j) for (unsigned k= 0; k < L; ++k) c[i*L+k]+= a[i*N+j] * b[j*L+k]; print_time("i, j, k = ", t1, c, N); boost::timer t2; fill(&c[0], &c[M*L], 0.0); for (unsigned i= 0; i < M; ++i) for (unsigned k= 0; k < L; ++k) for (unsigned j= 0; j < N; ++j) c[i*L+k]+= a[i*N+j] * b[j*L+k]; print_time("i, k, j = ", t2, c, N); boost::timer t3; fill(&c[0], &c[M*L], 0.0); for (unsigned j= 0; j < N; ++j) for (unsigned i= 0; i < M; ++i) for (unsigned k= 0; k < L; ++k) c[i*L+k]+= a[i*N+j] * b[j*L+k]; print_time("j, i, k = ", t3, c, N); boost::timer t4; fill(&c[0], &c[M*L], 0.0); for (unsigned j= 0; j < N; ++j) for (unsigned k= 0; k < L; ++k) for (unsigned i= 0; i < M; ++i) c[i*L+k]+= a[i*N+j] * b[j*L+k]; print_time("j, k, i = ", t4, c, N); boost::timer t5; fill(&c[0], &c[M*L], 0.0); for (unsigned k= 0; k < L; ++k) for (unsigned i= 0; i < M; ++i) for (unsigned j= 0; j < N; ++j) c[i*L+k]+= a[i*N+j] * b[j*L+k]; print_time("k, i, j = ", t5, c, N); boost::timer t6; fill(&c[0], &c[M*L], 0.0); for (unsigned k= 0; k < L; ++k) for (unsigned j= 0; j < N; ++j) for (unsigned i= 0; i < M; ++i) c[i*L+k]+= a[i*N+j] * b[j*L+k]; print_time("k, j, i = ", t6, c, N); return 0; }