i trying figure out why program (code below) runs faster using 2 treads instead of 4 on intel(r) core(tm) i5-3210m cpu @ 2.50ghz on intel(r) core(tm) i7-4510u cpu @ 2.00ghz 4 thread version runs faster 2 thread version of same program.
# include <stdlib.h> # include <stdio.h> # include <math.h> # include <time.h> # include <omp.h> int main ( void ); void timestamp ( void ); int main ( void ) { double a[500][500]; double angle; double b[500][500]; double c[500][500]; int i; int j; int k; int n = 500; double pi = m_pi; double s; int thread_num; double wtime; timestamp ( ); printf ( "\n" ); printf ( "mxm_openmp:\n" ); printf ( " c/openmp version\n" ); printf ( " compute matrix product c = * b.\n" ); omp_set_dynamic(0); omp_set_num_threads(2); /* change param modify number of threads */ thread_num = omp_get_max_threads ( ); printf ( "\n" ); printf ( " number of processors available = %d\n", omp_get_num_procs ( ) ); printf ( " number of threads available = %d\n", thread_num ); printf ( " matrix order n = %d\n", n ); /*loop 1: evaluate a.*/ s = 1.0 / sqrt ( ( double ) ( n ) ); wtime = omp_get_wtime ( ); # pragma omp parallel shared ( a, b, c, n, pi, s ) private ( angle, i, j, k ) { # pragma omp ( = 0; < n; i++ ) { ( j = 0; j < n; j++ ) { angle = 2.0 * pi * * j / ( double ) n; a[i][j] = s * ( sin ( angle ) + cos ( angle ) ); } } /* loop 2: copy b.*/ # pragma omp ( = 0; < n; i++ ) { ( j = 0; j < n; j++ ) { b[i][j] = a[i][j]; } } /* loop 3: compute c = * b.*/ # pragma omp ( = 0; < n; i++ ) { ( j = 0; j < n; j++ ) { c[i][j] = 0.0; ( k = 0; k < n; k++ ) { c[i][j] = c[i][j] + a[i][k] * b[k][j]; } } } printf ( " number of threads used = %d\n", omp_get_num_threads() ); } wtime = omp_get_wtime ( ) - wtime; printf ( " elapsed seconds = %g\n", wtime ); printf ( " c(100,100) = %g\n", c[99][99] ); /*terminate.*/ printf ( "\n" ); printf ( "mxm_openmp:\n" ); printf ( " normal end of execution.\n" ); printf ( "\n" ); timestamp ( ); return 0; } void timestamp ( void ) { # define time_size 40 static char time_buffer[time_size]; const struct tm *tm; time_t now; = time ( null ); tm = localtime ( &now ); strftime ( time_buffer, time_size, "%d %b %y %i:%m:%s %p", tm ); printf ( "%s\n", time_buffer ); return; # undef time_size }
even if have exact same machine same functionality still execution time may different. why? because, when may in machine 1, 200 services running , in machine 2, 225 program in machine 2 less time on processor/resources may lead time.
it depends on running services (background , foreground both).
Comments
Post a Comment