NeoPZ
gflopstest.cpp
Go to the documentation of this file.
1 
8 #ifdef HAVE_CONFIG_H
9 #include <pz_config.h>
10 #endif
11 
12 #include <iostream>
13 #include <cstdlib>
14 #include <fstream>
15 #include <string>
16 
17 //#include "timing_analysis.h"
18 #include "arglib.h"
19 #include "run_stats_table.h"
20 
21 using namespace std;
22 
23 void help(const char* prg)
24 {
25  cout << "Perform an array multiplication: a[1...n] = b[1...n] x c[1...n]"
26  << endl;
27 
28  cout << "Usage: " << prg << "[-asz N] [-modb N] [-modc N] [-moda N] [-nt N]"
29  << endl << endl;
30 
31  clarg::arguments_descriptions(cout, " ", "\n");
32 }
33 
34 clarg::argInt asz("-asz", "array size", 10000000);
35 clarg::argInt moda("-moda", "modulo a", 0);
36 clarg::argInt modb("-modb", "modulo b", 0);
37 clarg::argInt modc("-modc", "modulo c", 0);
38 clarg::argInt nt("-nt", "number of threads", 0);
39 clarg::argInt verb_level("-v", "verbosity level", 0);
40 clarg::argBool h("-h", "help message", false);
41 clarg::argInt cm("-cm", "clean memory before execution", 512);
42 
43 
44 void clean_mem(unsigned sz)
45 {
46  unsigned* buffer = (unsigned*) malloc(sz/4);
47  for (unsigned i=0; i<sz/4; i++)
48  buffer[i] = i;
49  free(buffer);
50 }
51 
52 /* Run statistics. */
53 RunStatsTable mul_rst ("-mul_rdt", "Array multiply statistics raw data table");
54 RunStatsTable imul_rst ("-imul_rdt", "Array immeditate multiply statistics raw data table");
55 RunStatsTable mulred_rst ("-mulred_rdt", "Array multiply and reduce statistics raw data table");
56 RunStatsTable add_rst ("-add_rdt", "Array add statistics raw data table");
57 RunStatsTable acc_rst ("-acc_rdt", "Array accumulate statistics raw data table");
58 RunStatsTable mulsingle_rst ("-mulsingle_rdt", "Fake array-multiply statistics raw data table");
59 
61 {
62  double* ara;
63  double* arb;
64  double* arc;
65  unsigned sz;
66 };
67 
68 void process_arr(double* ara, double* arb, double* arc, unsigned sz, unsigned nthreads, void* (*fun)(void*))
69 {
70  if (nthreads==0)
71  nthreads = 1;
72 
73  pthread_t *allthreads = new pthread_t[nthreads];
74  thread_arg_t *thread_args = new thread_arg_t[nthreads];
75 
76  unsigned chunk_sz = sz/nthreads;
77  unsigned start = 0;
78 
79 #define MIN_T(a,b) (a)<(b)?(a):(b)
80 
81  for (unsigned i=0; i<nthreads; i++, start+=chunk_sz) {
82  thread_args[i].ara = &(ara[start]);
83  thread_args[i].arb = &(arb[start]);
84  thread_args[i].arc = &(arc[start]);
85  thread_args[i].sz = MIN_T(chunk_sz,sz-start);
86  }
87 
88  /* Spaw threads */
89  for(unsigned i=0; i<nthreads; i++) {
90  pthread_create(&allthreads[i], NULL, fun, &(thread_args[i]));
91  }
92 
93  /* Join threads */
94  for(unsigned i=0; i<nthreads; i++) {
95  pthread_join(allthreads[i], NULL);
96  }
97 }
98 
99 template<class T>
101 {
102  T* array;
103  unsigned sub_sz;
104  T (*map_func)(T);
105 };
106 
107 template<class T>
108 void* thread_map1_worker(void* arg)
109 {
110  thread_map1_arg<T>* args = (thread_map1_arg<T>*) arg;
111  T* array = args->array;
112  unsigned sub_sz = args->sub_sz;
113 
114  for (unsigned i=0; i<sub_sz; i++)
115  array[i] = args->map_func(array[i]);
116  return 0;
117 }
118 
119 template<class T>
120 void thread_map1(T* array, T (*map1)(T), unsigned sz, unsigned nthreads)
121 {
122  if (nthreads==0)
123  nthreads = 1;
124 
125  pthread_t *allthreads = new pthread_t[nthreads];
126  thread_map1_arg<T> *thread_args = new thread_map1_arg<T>[nthreads];
127 
128  unsigned chunk_sz = sz/nthreads;
129  unsigned start = 0;
130 
131  for (unsigned i=0; i<nthreads; i++, start+=chunk_sz) {
132  thread_args[i].array = &(array[start]);
133  thread_args[i].sub_sz = MIN_T(chunk_sz,sz-start);
134  thread_args[i].map_func = map1;
135  }
136 
137  /* Spaw threads */
138  for(unsigned i=0; i<nthreads; i++) {
139  pthread_create(&allthreads[i], NULL, thread_map1_worker<T>, &(thread_args[i]));
140  }
141 
142  /* Join threads */
143  for(unsigned i=0; i<nthreads; i++) {
144  pthread_join(allthreads[i], NULL);
145  }
146 }
147 
148 
149 void mul_arr_rev(double* ara, double* arb, double* arc, unsigned sz, unsigned threads)
150 {
151  unsigned j=sz-1;
152  for (unsigned i=0; i<sz; i++,j--)
153  ara[i] = arb[i] * arc[j];
154 }
155 
156 double sqrt_dbl(double v) { return v*v; }
157 
158 void* mul_arr(void* a)
159 {
160  thread_arg_t* args = (thread_arg_t*) a;
161  unsigned sz = args->sz;
162  double* ara = args->ara;
163  double* arb = args->arb;
164  double* arc = args->arc;
165  for (unsigned i=0; i<sz; i++)
166  ara[i] = arb[i] * arc[i];
167 
168  return 0;
169 }
170 
171 void* imul_arr(void* a)
172 {
173  thread_arg_t* args = (thread_arg_t*) a;
174  unsigned sz = args->sz;
175  double* ara = args->ara;
176 
177  for (unsigned i=0; i<sz; i++)
178  ara[i] = ara[i] * 1.9752; // Multiply by a constant
179 
180  return 0;
181 }
182 
183 void* add_arr(void* a)
184 {
185  thread_arg_t* args = (thread_arg_t*) a;
186  unsigned sz = args->sz;
187  double* ara = args->ara;
188  double* arb = args->arb;
189  double* arc = args->arc;
190  for (unsigned i=0; i<sz; i++)
191  ara[i] = arb[i] + arc[i];
192 
193  return 0;
194 }
195 
196 void* acc_arr(void* a)
197 {
198  thread_arg_t* args = (thread_arg_t*) a;
199  unsigned sz = args->sz;
200  double* ara = args->ara;
201  double* arb = args->arb;
202  for (unsigned i=0; i<sz; i++)
203  ara[i] = ara[i] + arb[i];
204 
205  return 0;
206 }
207 
208 double global_res;
209 void* mulred_arr(void* a)
210 {
211  thread_arg_t* args = (thread_arg_t*) a;
212  unsigned sz = args->sz;
213  double* arb = args->arb;
214  double* arc = args->arc;
215  double res = 0;
216  for (unsigned i=0; i<sz; i++)
217  res += arb[i] * arc[i];
218 
219  global_res = res;
220  return 0;
221 }
222 
223 void* mulsingle_arr(void* a)
224 {
225  thread_arg_t* args = (thread_arg_t*) a;
226  unsigned sz = args->sz;
227  double* arb = args->arb;
228  double* arc = args->arc;
229  double res = 0;
230  for (unsigned i=0; i<sz; i++)
231  res += arb[0] * arc[0];
232 
233  global_res = res;
234  return 0;
235 }
236 
237 void profile(double* ara, double* arb, double* arc, unsigned sz, unsigned num_threads,
238  void* (*fun)(void*), ElapsedTimeRunStat& et, RunStatsTable& rst)
239 {
240  process_arr(ara,arb,arc,sz,num_threads,fun);
241  rst.start();
242  et.start();
243  process_arr(ara,arb,arc,sz,num_threads,fun);
244  et.stop();
245  rst.stop();
246 }
247 
248 double gflops(ElapsedTimeRunStat& et, unsigned sz)
249 {
250 #define GIGA 1000000000
251 
252  double secs = (et.getElapsedMS()/1000.0);
253  double gflops = (double) sz / (secs * GIGA);
254  return gflops;
255 }
256 
257 int main(int argc, char *argv[])
258 {
259  /* Parse the arguments */
260  if (clarg::parse_arguments(argc, argv)) {
261  cerr << "Error when parsing the arguments!" << endl;
262  return 1;
263  }
264 
265  if (h.get_value() == true) {
266  help(argv[0]);
267  return 1;
268  }
269 
270  /* Verbose macro. */
271  unsigned verbose = verb_level.get_value();
272 
273 #define VERBOSE(level,...) if (level <= verbose) cout << __VA_ARGS__
274 
275  if (verbose >= 1) {
276  std::cout << "- Arguments -----------------------" << std::endl;
277  clarg::values(std::cout, false);
278  std::cout << "-----------------------------------" << std::endl;
279  }
280 
281  if (cm.was_set()) {
282 #define MEGABYTE (1024*1024)
283  unsigned sz = cm.get_value() * MEGABYTE;
284  cout << "Cleaning memory: " << cm.get_value() << " MB ...";
285  clean_mem(sz);
286  cout << "[Done]" << endl;
287  }
288 
289  unsigned sz = asz.get_value();
290 
291  /* Create arrays. */
292  double* ara = new double[sz];
293  double* arb = new double[sz];
294  double* arc = new double[sz];
295 
296  ElapsedTimeRunStat et_mul, et_imul, et_add, et_acc, et_mulred, et_mulsingle;
297 
298  profile(ara,arb,arc,sz,nt.get_value(),
299  mul_arr,et_mul,mul_rst);
300 
301  profile(ara,arb,arc,sz,nt.get_value(),
302  imul_arr,et_imul,imul_rst);
303 
304  profile(ara,arb,arc,sz,nt.get_value(),
305  add_arr,et_add,add_rst);
306 
307  profile(ara,arb,arc,sz,nt.get_value(),
308  acc_arr,et_acc,acc_rst);
309 
310  profile(ara,arb,arc,sz,nt.get_value(),
311  mulred_arr,et_mulred,mulred_rst);
312 
313  profile(ara,arb,arc,sz,nt.get_value(),
314  mulsingle_arr,et_mulsingle,mulsingle_rst);
315 
316  //compute_rev_rst.start();
317  //et_rev_mul.start();
318  //thread_map1(ara, sqrt_dbl, sz, nt.get_value());
319  //et_rev_mul.stop();
320  //compute_rev_rst.stop();
321 
322 
323  cout << "Array mul performance : " << gflops(et_mul,sz) << endl;
324  cout << "Array imul performance : " << gflops(et_imul,sz) << endl;
325  cout << "Array add performance : " << gflops(et_add,sz) << endl;
326  cout << "Array acc performance : " << gflops(et_acc,sz) << endl;
327  cout << "Array mul red performance : " << gflops(et_mulred,sz) << endl;
328  cout << "Fake array mul performance: " << gflops(et_mulsingle,sz) << endl;
329 
330  return 0; // Return ok
331 }
332 
Contains a class to record running statistics on CSV tables.
void clean_mem(unsigned sz)
Definition: gflopstest.cpp:44
RunStatsTable mul_rst("-mul_rdt", "Array multiply statistics raw data table")
void thread_map1(T *array, T(*map1)(T), unsigned sz, unsigned nthreads)
Definition: gflopstest.cpp:120
clarg::argInt num_threads("-ntdec", "Number of threads to decompose in TPZParFrontStructMatrix.", 6)
list threads
Definition: test.py:140
RunStatsTable acc_rst("-acc_rdt", "Array accumulate statistics raw data table")
void values(ostream &os, bool defined_only)
Definition: arglib.cpp:183
clarg::argInt moda("-moda", "modulo a", 0)
clarg::argInt modb("-modb", "modulo b", 0)
void * mul_arr(void *a)
Definition: gflopstest.cpp:158
clarg::argBool h("-h", "help message", false)
clarg::argInt cm("-cm", "clean memory before execution", 512)
void profile(double *ara, double *arb, double *arc, unsigned sz, unsigned num_threads, void *(*fun)(void *), ElapsedTimeRunStat &et, RunStatsTable &rst)
Definition: gflopstest.cpp:237
void * thread_map1_worker(void *arg)
Definition: gflopstest.cpp:108
void help(const char *prg)
Definition: gflopstest.cpp:23
double global_res
Definition: gflopstest.cpp:208
double getElapsedMS() const
#define MIN_T(a, b)
#define GIGA
int nthreads
Definition: numatst.cpp:315
clarg::argInt verb_level("-v", "verbosity level", 0)
int parse_arguments(int argc, char *argv[])
Definition: arglib.cpp:195
RunStatsTable imul_rst("-imul_rdt", "Array immeditate multiply statistics raw data table")
RunStatsTable mulred_rst("-mulred_rdt", "Array multiply and reduce statistics raw data table")
clarg::argInt asz("-asz", "array size", 10000000)
string res
Definition: test.py:151
void * add_arr(void *a)
Definition: gflopstest.cpp:183
double * ara
Definition: gflopstest.cpp:62
void mul_arr_rev(double *ara, double *arb, double *arc, unsigned sz, unsigned threads)
Definition: gflopstest.cpp:149
double * arb
Definition: gflopstest.cpp:63
RunStatsTable add_rst("-add_rdt", "Array add statistics raw data table")
int main(int argc, char *argv[])
Definition: gflopstest.cpp:257
void arguments_descriptions(ostream &os, string prefix, string suffix)
Definition: arglib.cpp:189
void * mulsingle_arr(void *a)
Definition: gflopstest.cpp:223
clarg::argInt modc("-modc", "modulo c", 0)
double * arc
Definition: gflopstest.cpp:64
int verbose
Definition: decompose.cpp:67
bool was_set() const
Definition: arglib.h:138
void * imul_arr(void *a)
Definition: gflopstest.cpp:171
const T & get_value() const
Definition: arglib.h:177
void process_arr(double *ara, double *arb, double *arc, unsigned sz, unsigned nthreads, void *(*fun)(void *))
Definition: gflopstest.cpp:68
void * mulred_arr(void *a)
Definition: gflopstest.cpp:209
clarg::argInt nt("-nt", "number of threads", 0)
double gflops(ElapsedTimeRunStat &et, unsigned sz)
Definition: gflopstest.cpp:248
RunStatsTable mulsingle_rst("-mulsingle_rdt", "Fake array-multiply statistics raw data table")
unsigned sz
Definition: gflopstest.cpp:65
double sqrt_dbl(double v)
Definition: gflopstest.cpp:156
#define MEGABYTE
void * acc_arr(void *a)
Definition: gflopstest.cpp:196