25 cout <<
"Perform an array multiplication: a[1...n] = b[1...n] x c[1...n]" 28 cout <<
"Usage: " << prg <<
"[-asz N] [-modb N] [-modc N] [-moda N] [-nt N]" 46 unsigned* buffer = (
unsigned*) malloc(sz/4);
47 for (
unsigned i=0; i<sz/4; i++)
68 void process_arr(
double* ara,
double* arb,
double* arc,
unsigned sz,
unsigned nthreads,
void* (*fun)(
void*))
73 pthread_t *allthreads =
new pthread_t[
nthreads];
79 #define MIN_T(a,b) (a)<(b)?(a):(b) 81 for (
unsigned i=0; i<
nthreads; i++, start+=chunk_sz) {
82 thread_args[i].
ara = &(ara[start]);
83 thread_args[i].
arb = &(arb[start]);
84 thread_args[i].
arc = &(arc[start]);
85 thread_args[i].
sz =
MIN_T(chunk_sz,sz-start);
90 pthread_create(&allthreads[i], NULL, fun, &(thread_args[i]));
95 pthread_join(allthreads[i], NULL);
111 T* array = args->
array;
112 unsigned sub_sz = args->
sub_sz;
114 for (
unsigned i=0; i<sub_sz; i++)
115 array[i] = args->
map_func(array[i]);
120 void thread_map1(T* array, T (*map1)(T),
unsigned sz,
unsigned nthreads)
125 pthread_t *allthreads =
new pthread_t[
nthreads];
131 for (
unsigned i=0; i<
nthreads; i++, start+=chunk_sz) {
132 thread_args[i].
array = &(array[start]);
138 for(
unsigned i=0; i<
nthreads; i++) {
139 pthread_create(&allthreads[i], NULL, thread_map1_worker<T>, &(thread_args[i]));
143 for(
unsigned i=0; i<
nthreads; i++) {
144 pthread_join(allthreads[i], NULL);
152 for (
unsigned i=0; i<sz; i++,j--)
153 ara[i] = arb[i] * arc[j];
161 unsigned sz = args->
sz;
162 double* ara = args->
ara;
163 double* arb = args->
arb;
164 double* arc = args->
arc;
165 for (
unsigned i=0; i<sz; i++)
166 ara[i] = arb[i] * arc[i];
174 unsigned sz = args->
sz;
175 double* ara = args->
ara;
177 for (
unsigned i=0; i<sz; i++)
178 ara[i] = ara[i] * 1.9752;
186 unsigned sz = args->
sz;
187 double* ara = args->
ara;
188 double* arb = args->
arb;
189 double* arc = args->
arc;
190 for (
unsigned i=0; i<sz; i++)
191 ara[i] = arb[i] + arc[i];
199 unsigned sz = args->
sz;
200 double* ara = args->
ara;
201 double* arb = args->
arb;
202 for (
unsigned i=0; i<sz; i++)
203 ara[i] = ara[i] + arb[i];
212 unsigned sz = args->
sz;
213 double* arb = args->
arb;
214 double* arc = args->
arc;
216 for (
unsigned i=0; i<sz; i++)
217 res += arb[i] * arc[i];
226 unsigned sz = args->
sz;
227 double* arb = args->
arb;
228 double* arc = args->
arc;
230 for (
unsigned i=0; i<sz; i++)
231 res += arb[0] * arc[0];
250 #define GIGA 1000000000 257 int main(
int argc,
char *argv[])
261 cerr <<
"Error when parsing the arguments!" << endl;
273 #define VERBOSE(level,...) if (level <= verbose) cout << __VA_ARGS__ 276 std::cout <<
"- Arguments -----------------------" << std::endl;
278 std::cout <<
"-----------------------------------" << std::endl;
282 #define MEGABYTE (1024*1024) 284 cout <<
"Cleaning memory: " << cm.
get_value() <<
" MB ...";
286 cout <<
"[Done]" << endl;
292 double* ara =
new double[sz];
293 double* arb =
new double[sz];
294 double* arc =
new double[sz];
323 cout <<
"Array mul performance : " <<
gflops(et_mul,sz) << endl;
324 cout <<
"Array imul performance : " <<
gflops(et_imul,sz) << endl;
325 cout <<
"Array add performance : " <<
gflops(et_add,sz) << endl;
326 cout <<
"Array acc performance : " <<
gflops(et_acc,sz) << endl;
327 cout <<
"Array mul red performance : " <<
gflops(et_mulred,sz) << endl;
328 cout <<
"Fake array mul performance: " <<
gflops(et_mulsingle,sz) << endl;
Contains a class to record running statistics on CSV tables.
void clean_mem(unsigned sz)
RunStatsTable mul_rst("-mul_rdt", "Array multiply statistics raw data table")
void thread_map1(T *array, T(*map1)(T), unsigned sz, unsigned nthreads)
clarg::argInt num_threads("-ntdec", "Number of threads to decompose in TPZParFrontStructMatrix.", 6)
RunStatsTable acc_rst("-acc_rdt", "Array accumulate statistics raw data table")
void values(ostream &os, bool defined_only)
clarg::argInt moda("-moda", "modulo a", 0)
clarg::argInt modb("-modb", "modulo b", 0)
clarg::argBool h("-h", "help message", false)
clarg::argInt cm("-cm", "clean memory before execution", 512)
void profile(double *ara, double *arb, double *arc, unsigned sz, unsigned num_threads, void *(*fun)(void *), ElapsedTimeRunStat &et, RunStatsTable &rst)
void * thread_map1_worker(void *arg)
void help(const char *prg)
double getElapsedMS() const
clarg::argInt verb_level("-v", "verbosity level", 0)
int parse_arguments(int argc, char *argv[])
RunStatsTable imul_rst("-imul_rdt", "Array immeditate multiply statistics raw data table")
RunStatsTable mulred_rst("-mulred_rdt", "Array multiply and reduce statistics raw data table")
clarg::argInt asz("-asz", "array size", 10000000)
void mul_arr_rev(double *ara, double *arb, double *arc, unsigned sz, unsigned threads)
RunStatsTable add_rst("-add_rdt", "Array add statistics raw data table")
int main(int argc, char *argv[])
void arguments_descriptions(ostream &os, string prefix, string suffix)
void * mulsingle_arr(void *a)
clarg::argInt modc("-modc", "modulo c", 0)
const T & get_value() const
void process_arr(double *ara, double *arb, double *arc, unsigned sz, unsigned nthreads, void *(*fun)(void *))
void * mulred_arr(void *a)
clarg::argInt nt("-nt", "number of threads", 0)
double gflops(ElapsedTimeRunStat &et, unsigned sz)
RunStatsTable mulsingle_rst("-mulsingle_rdt", "Fake array-multiply statistics raw data table")
double sqrt_dbl(double v)