15 #include "pzbfilestream.h" 24 static LoggerPtr loggerconverge(Logger::getLogger(
"pz.converge"));
25 static LoggerPtr logger(Logger::getLogger(
"main"));
35 #include <sys/resource.h> 39 #include "tbb/task_scheduler_init.h" 48 cout <<
"Compute the Decompose_LDLt method for the matrix" << endl;
50 cout <<
"Usage: " << prg <<
"-if file [-v verbose_level] [-b] " 51 <<
"[-tot_rdt rdt_file] [-op matrix_operation] [-h]" << endl << endl;
52 cout <<
"matrix_operation:" << endl;
53 cout <<
" 0: Decompose_LDLt()" << endl;
54 cout <<
" 1: Decompose_LDLt2() -- deprecated (not working)" << endl;
55 cout <<
" 2: Decompose_Cholesky()" << endl;
59 clarg::argString ifn(
"-ifn",
"input matrix file name (use -bi to read from binary files)",
"matrix.txt");
64 #define VERBOSE(level,...) if (level <= verbose) cout << __VA_ARGS__ 67 clarg::argBool br(
"-br",
"binary reference. Reference decomposed matrix file format == binary.",
false);
68 clarg::argBool bi(
"-bi",
"binary input. Input file format == binary.",
false);
73 clarg::argInt maxcol(
"-maxcol",
"Limit computation to max column (Use Resize(maxcol)).", 0);
74 clarg::argString gen_dm_sig(
"-gen_dm_md5",
"generates MD5 signature for decomposed matrix into file.",
"decomposed_matrix.md5");
75 clarg::argString chk_dm_sig(
"-chk_dm_md5",
"compute MD5 signature for decomposed matrix and check against MD5 at file.",
"decomposed_matrix.md5");
76 clarg::argString chk_dm_error(
"-chk_dm_error",
"check the decomposed matrix error against a reference matrix. (use -br to read from binary files)",
"ref_decomposed_matrix.txt");
83 "Whole program (total) statistics raw data table");
130 cpu_set_t dies_mask_array[8];
131 cpu_set_t mask_core0;
134 cpu_set_t mask_proc0;
135 cpu_set_t mask_oddcores;
136 cpu_set_t mask_evencores;
139 void print_mask(cpu_set_t* mask)
141 for (
int i=0; i<64; i++) {
142 cout << CPU_ISSET(i, mask)?
"1":
"0";
163 #define SET_RANGE(mskp,start,end) CPU_ZERO(mskp); \ 164 for (int i=start; i<end; i++) CPU_SET(i,mskp) 181 CPU_ZERO(&mask_oddcores);
182 CPU_ZERO(&mask_evencores);
183 for(
int i=0; i<64; i+=2) {
184 CPU_SET(i,&mask_evencores);
185 CPU_SET(i+1,&mask_oddcores);
189 cout <<
"mask core 0 : "; print_mask(&mask_core0); cout << endl;
190 cout <<
"mask core L2 0 : "; print_mask(&mask_L20); cout << endl;
191 cout <<
"mask core die 0 : "; print_mask(&mask_die0); cout << endl;
192 cout <<
"mask core proc 0: "; print_mask(&mask_proc0); cout << endl;
193 cout <<
"mask evencores : "; print_mask(&mask_evencores); cout << endl;
194 cout <<
"mask oddcores : "; print_mask(&mask_oddcores); cout << endl;
205 cpu_set_t* msk = NULL;
209 msk = dies_mask_array + (tidx%8);
229 msk = &mask_evencores;
233 msk = &mask_oddcores;
239 VERBOSE(2,
"Warning: -af " << af
240 <<
" has not been defined. Not setting affinity");
246 cout <<
"Thread " << tidx <<
" affinity mask = ";
251 sched_setaffinity(0,
sizeof(cpu_set_t), msk);
273 #define CASE_OP(opid,method) \ 281 std::cerr <<
"ERROR: deprecated operation -- decompose LDLt2 is no longer implemented." << std::endl;
283 CASE_OP(2,Decompose_Cholesky());
286 std::cerr <<
"ERROR: Invalid matrix operation type." << std::endl;
297 {start_time = getms();}
299 {stop_time = getms();}
307 gettimeofday(&t,NULL);
308 return (t.tv_sec*1000) + (t.tv_usec/1000);
318 pthread_cond_t
cond=PTHREAD_COND_INITIALIZER;
331 parallel_routine=NULL;
333 nthreads_initialized = 0;
334 wait_for_all_init =
true;
337 void execute_n_threads(
unsigned n,
338 void (*init_routine)(
int),
339 void (*parallel_routine)(
int));
344 pthread_mutex_t* mt, pthread_cond_t* cd,
345 pthread_cond_t* mcd) :
346 tid(t), init_routine(ir), parallel_routine(pr),
347 glob_mutex(mt), cond(cd), main_cond(mcd)
350 void (*init_routine)(int);
351 void (*parallel_routine)(int);
360 void (*init_routine)(int);
362 void (*parallel_routine)(int);
377 VERBOSE(1,
"Thread " << tid <<
" calling init routine on CPU " 378 << (
int) sched_getcpu() << endl);
382 nthreads_initialized++;
383 if (nthreads_initialized == nthreads) {
384 wait_for_all_init =
false;
390 while (!run_parallel) {
394 VERBOSE(1,
"Thread " << tid <<
" calling parallel routine on CPU " 395 << (
int) sched_getcpu() << endl);
400 thread_timer[tid].start();
406 thread_timer[tid].stop();
413 void (*init_routine)(
int),
414 void (*parallel_routine)(
int))
417 nthreads_initialized = 0;
419 thread_timer.resize(nthreads);
423 &glob_mutex, &cond, &main_cond);
428 pthread_mutex_lock(&main_mutex);
429 while (wait_for_all_init) {
430 pthread_cond_wait(&main_cond, &main_mutex);
432 pthread_mutex_unlock(&main_mutex);
437 pthread_cond_broadcast(&cond);
440 for (
unsigned i=0; i<
nthreads; i++) {
446 printf(
"%7s,%10s,%10s,%10s\n",
"thread",
"elapsed",
"start",
"stop");
447 for (
unsigned i=0; i<
nthreads; i++) {
448 printf(
"%7d,%10lld,%10lld,%10lld\n", i,
449 thread_timer[i].get_elapsed(),
450 thread_timer[i].get_start(),
451 thread_timer[i].get_stop());
457 int main(
int argc,
char *argv[])
460 task_scheduler_init init;
466 cerr <<
"Error when parsing the arguments!" << endl;
478 cerr <<
"Error, nmats must be >= 1" << endl;
483 std::cout <<
"- Arguments -----------------------" << std::endl;
485 std::cout <<
"-----------------------------------" << std::endl;
494 matrix.
Read(input_file,0);
496 <<
" [DONE]" << std::endl);
507 unsigned n = matrix.
Dim();
508 uint64_t n_sky_items = 0;
509 uint64_t max_height = 0;
510 for (
unsigned i=0; i<n; i++) {
513 cout <<
"col " << i <<
" height = " << height << endl;
515 n_sky_items += height;
516 if (height > max_height) max_height = height;
519 double av_height = (double) n_sky_items / (
double) n;
520 cout <<
"N = " << n << endl;
521 cout <<
"N^2 = " << n2 << endl;
522 cout <<
"Sky items = " << n_sky_items << endl;
523 cout <<
"N^2 / Sky items = " << (double) n2 / (
double) n_sky_items << endl;
524 cout <<
"Avg. Height = " << av_height << endl;
525 cout <<
"Max. Height = " << max_height << endl;
530 VERBOSE(1,
"Dumping decomposed matrix into: " <<
534 matrix.
Write(dump_file, 0);
540 matrix.
Write(sig, 1);
544 cerr <<
"ERROR(ret=" << ret <<
") : MD5 Signature for " 545 <<
"decomposed matrixdoes not match." << endl;
549 cout <<
"Checking decomposed matrix MD5 signature: [OK]" << endl;
554 cerr <<
"ERROR (ret=" << ret <<
") when writing the " 555 <<
"decomposed matrix MD5 signature to file: " 566 VERBOSE(1,
"Checking decomposed matrix error: " <<
572 ref_matrix.
Read(ref_file,0);
573 int max_j = matrix.
Cols();
574 if (max_j != ref_matrix.
Cols()) {
575 cerr <<
"Decomposed matrix has " << max_j
576 <<
" cols while reference matrix has " 577 << ref_matrix.
Cols() << endl;
581 REAL max_error = 0.0;
582 for (
int j=0; j<max_j; j++) {
584 if (col_height != ref_matrix.
SkyHeight(j)) {
585 cerr <<
"Column " << j <<
" of decomposed matrix has " << col_height
586 <<
" non zero rows while reference matrix has " 590 int min_i = (j+1) - col_height;
591 for (
int i=min_i; i<=j; i++) {
593 REAL dm_ij = matrix.
s(i,j);
594 REAL rm_ij = ref_matrix.
s(i,j);
595 if (dm_ij != rm_ij) {
596 REAL diff =
abs(dm_ij - rm_ij);
597 if (diff >= error_tolerance) {
598 VERBOSE(1,
"diff(" << diff <<
") tolerance (" << error_tolerance
599 <<
"). dm[" << i <<
"][" << j <<
"] (" << dm_ij
600 <<
") != rm[" << i <<
"][" << j <<
"] (" << rm_ij
603 max_error = (max_error < diff)?diff:max_error;
609 cerr <<
"Error ("<< max_error <<
") > error tolerance (" 610 << error_tolerance <<
")" << endl;
Contains a class to record running statistics on CSV tables.
void set_affinity(int af, int tidx)
thread_arg_t(int t, void(*ir)(int), void(*pr)(int), pthread_mutex_t *mt, pthread_cond_t *cd, pthread_cond_t *mcd)
pthread_cond_t * main_cond
int Resize(const int64_t newDim, const int64_t) override
Redimensions a matriz keeping the previous values.
void(* parallel_routine)(int)
Contains definitions to LOGPZ_DEBUG, LOGPZ_INFO, LOGPZ_WARN, LOGPZ_ERROR and LOGPZ_FATAL, and the implementation of the inline InitializePZLOG(string) function using log4cxx library or not. It must to be called out of "#ifdef LOG4CXX" scope.
Contains declaration of the TPZMD5Stream class which implements the interface to write and check md5 ...
clarg::argString chk_dm_sig("-chk_dm_md5", "compute MD5 signature for decomposed matrix and check against MD5 at file.", "decomposed_matrix.md5")
void values(ostream &os, bool defined_only)
std::vector< thread_timer_t > thread_timer
void OpenRead(const std::string &fn)
clarg::argString gen_dm_sig("-gen_dm_md5", "generates MD5 signature for decomposed matrix into file.", "decomposed_matrix.md5")
void init_decompose(int idx)
#define VERBOSE(level,...)
void Write(TPZStream &buf, int withclassid) const override
Packs the object structure in a stream of bytes.
clarg::argDouble error_tol("-error_tol", "error tolerance.", 1.e-12)
int64_t SkyHeight(int64_t col)
return the height of the skyline for a given column
clarg::argBool h("-h", "help message", false)
TinyFad< 8, T > abs(const TinyFad< 8, T > &in)
void OpenWrite(const std::string &fn)
clarg::argInt maxcol("-maxcol", "Limit computation to max column (Use Resize(maxcol)).", 0)
clarg::argString dump_dm("-dump_dm", "dump decomposed matrix. (use -bd for binary format)", "dump_matrix.txt")
clarg::argBool br("-br", "binary reference. Reference decomposed matrix file format == binary.", false)
clarg::argString ifn("-ifn", "input matrix file name (use -bi to read from binary files)", "matrix.txt")
void compute_decompose(int idx)
Implements a skyline storage format. A Skyline matrix is symmetric so square. Matrix.
pthread_mutex_t main_mutex
TPZSkylMatrix< REAL > matrix
void execute_n_threads(unsigned n, void(*init_routine)(int), void(*parallel_routine)(int))
RunStatsTable total_rst("-tot_rdt", "Whole program (total) statistics raw data table")
void help(const char *prg)
#define PZ_PTHREAD_JOIN(thread, val, fn)
int main(int argc, char *argv[])
std::vector< pthread_t > threads
virtual TVar & s(const int64_t row, const int64_t col) override
The operators check on the bounds if the DEBUG variable is defined.
pthread_mutex_t glob_mutex
clarg::argInt nmats("-nmats", "Number of matrizes to decompose simultaneously.", 1)
int CheckMD5(const std::string &filename)
Check Stream MD5 signature against MD5 signature store on file.
clarg::argBool copy_matrix_inside_thread("-cot", "copy on thread - copy matrix inside thread.", false)
Implements reading from and writing to an ascii file. Persistency.
clarg::argString chk_dm_error("-chk_dm_error", "check the decomposed matrix error against a reference matrix. (use -br to read from binary files)", "ref_decomposed_matrix.txt")
int parse_arguments(int argc, char *argv[])
clarg::argBool bi("-bi", "binary input. Input file format == binary.", false)
std::vector< TPZSkylMatrix< REAL > *> matrices
#define SET_RANGE(mskp, start, end)
clarg::argInt affinity("-af", "affinity mode (0=no affinity, 1=heuristi 1)", 0)
clarg::argInt mstats("-mstats", "Matrix statistics vebosity level.", 0)
void * threadfunc(void *parm)
Contains TPZSkyline class which implements a skyline storage format.
void arguments_descriptions(ostream &os, string prefix, string suffix)
void(* init_routine)(int)
virtual int64_t Dim() const
Returns the dimension of the matrix if the matrix is square.
#define CASE_OP(opid, method)
clarg::argInt verb_level("-v", "verbosity level", 0)
void Read(TPZStream &buf, void *context) override
Unpacks the object structure from a stream of bytes.
int WriteMD5(const std::string &filename)
Write computed MD5 signature to file.
FileStreamWrapper(bool b)
const T & get_value() const
#define PZ_PTHREAD_CREATE(thread, attr, routine, args, fn)
int64_t Cols() const
Returns number of cols.
clarg::argBool bd("-bd", "binary dump. Dump file format == binary.", false)
Defines the interface for saving and reading data. Persistency.
clarg::argInt mop("-op", "Matrix operation", 1)
pthread_mutex_t * glob_mutex
clarg::argInt cholesky_blk("-chol_blk", "Cholesky blocking factor", 256)
Implements the interface to write and check MD5 files. Persistency.