15 #include "pzbfilestream.h" 22 #endif // USING_LIBNUMA 27 static LoggerPtr loggerconverge(Logger::getLogger(
"pz.converge"));
28 static LoggerPtr logger(Logger::getLogger(
"main"));
38 #include <sys/resource.h> 42 #include "tbb/task_scheduler_init.h" 53 cout <<
"Compute the Decompose_LDLt method for the matrix" << endl;
55 cout <<
"Usage: " << prg <<
"-if file [-v verbose_level] [-b] " 56 <<
"[-tot_rdt rdt_file] [-op matrix_operation] [-h]" << endl << endl;
57 cout <<
"matrix_operation:" << endl;
58 cout <<
" 0: Decompose_LDLt()" << endl;
59 cout <<
" 1: Decompose_LDLt2() -- deprecated (not working)" << endl;
60 cout <<
" 2: Decompose_Cholesky()" << endl;
64 clarg::argString ifn(
"-ifn",
"input matrix file name (use -bi to read from binary files)",
"matrix.txt");
69 #define VERBOSE(level,...) if (level <= verbose) cout << __VA_ARGS__ 72 clarg::argBool br(
"-br",
"binary reference. Reference decomposed matrix file format == binary.",
false);
73 clarg::argBool bi(
"-bi",
"binary input. Input file format == binary.",
false);
78 clarg::argInt maxcol(
"-maxcol",
"Limit computation to max column (Use Resize(maxcol)).", 0);
79 clarg::argString gen_dm_sig(
"-gen_dm_md5",
"generates MD5 signature for decomposed matrix into file.",
"decomposed_matrix.md5");
80 clarg::argString chk_dm_sig(
"-chk_dm_md5",
"compute MD5 signature for decomposed matrix and check against MD5 at file.",
"decomposed_matrix.md5");
81 clarg::argString chk_dm_error(
"-chk_dm_error",
"check the decomposed matrix error against a reference matrix. (use -br to read from binary files)",
"ref_decomposed_matrix.txt");
88 clarg::argBool nats(
"-nats",
"NUMA aware thread scheduling.",
false);
90 unsigned num_nodes = 0;
97 "Whole program (total) statistics raw data table");
141 std::vector< TPZAutoPointer<TPZSkylMatrix<REAL> > >
matrices;
144 cpu_set_t dies_mask_array[8];
145 cpu_set_t mask_core0;
148 cpu_set_t mask_proc0;
149 cpu_set_t mask_oddcores;
150 cpu_set_t mask_evencores;
152 void print_mask(cpu_set_t* mask)
154 for (
int i=0; i<64; i++) {
155 cout << CPU_ISSET(i, mask)?
"1":
"0";
173 #define SET_RANGE(mskp,start,end) CPU_ZERO(mskp); \ 174 for (int i=start; i<end; i++) CPU_SET(i,mskp) 190 CPU_ZERO(&mask_oddcores);
191 CPU_ZERO(&mask_evencores);
192 for(
int i=0; i<64; i+=2) {
193 CPU_SET(i,&mask_evencores);
194 CPU_SET(i+1,&mask_oddcores);
198 cout <<
"mask core 0 : "; print_mask(&mask_core0); cout << endl;
199 cout <<
"mask core L2 0 : "; print_mask(&mask_L20); cout << endl;
200 cout <<
"mask core die 0 : "; print_mask(&mask_die0); cout << endl;
201 cout <<
"mask core proc 0: "; print_mask(&mask_proc0); cout << endl;
202 cout <<
"mask evencores : "; print_mask(&mask_evencores); cout << endl;
203 cout <<
"mask oddcores : "; print_mask(&mask_oddcores); cout << endl;
212 cpu_set_t* msk = NULL;
216 msk = dies_mask_array + (tidx%8);
236 msk = &mask_evencores;
240 msk = &mask_oddcores;
246 VERBOSE(2,
"Warning: -af " << af
247 <<
" has not been defined. Not setting affinity");
253 cout <<
"Thread " << tidx <<
" affinity mask = ";
258 sched_setaffinity(0,
sizeof(cpu_set_t), msk);
264 int64_t idx = (int64_t) m;
273 if (nats.was_set()) {
274 struct bitmask* nodemask = numa_allocate_nodemask();
275 numa_bitmask_clearall(nodemask);
276 numa_bitmask_setbit(nodemask,idx%num_nodes);
278 numa_free_nodemask(nodemask);
282 int cpuid = sched_getcpu();
284 cout <<
"Thread " << idx <<
" at cpu " << cpuid << endl;
293 #define CASE_OP(opid,method) \ 301 std::cerr <<
"ERROR: deprecated operation -- decompose LDLt2 is no longer implemented." << std::endl;
303 CASE_OP(2,Decompose_Cholesky());
306 std::cerr <<
"ERROR: Invalid matrix operation type." << std::endl;
314 int main(
int argc,
char *argv[])
317 task_scheduler_init init;
326 cerr <<
"Error when parsing the arguments!" << endl;
338 cerr <<
"Error, nmats must be >= 1" << endl;
343 std::cout <<
"- Arguments -----------------------" << std::endl;
345 std::cout <<
"-----------------------------------" << std::endl;
354 matrix.
Read(input_file,0);
356 <<
" [DONE]" << std::endl);
362 std::vector<pthread_t>
threads(nthreads);
365 VERBOSE(1,
"Copying matrices" << endl);
368 num_nodes = numa_max_node()+1;
369 cout <<
"Max nodes = " << num_nodes << endl;
371 struct bitmask* nodemask = numa_allocate_nodemask();
373 numa_bitmask_clearall(nodemask);
374 unsigned node = t%num_nodes;
375 numa_bitmask_setbit(nodemask,node);
376 numa_set_membind(nodemask);
378 matrices.push_back(mp);
380 numa_bitmask_setall(nodemask);
381 numa_set_membind(nodemask);
382 numa_free_nodemask(nodemask);
387 matrices.push_back(mp);
394 matrices.push_back(mp);
398 VERBOSE(1,
"Copying matrices [DONE]" << endl);
405 (
void*) t, __FUNCTION__);
421 unsigned n = matrix.
Dim();
422 uint64_t n_sky_items = 0;
423 uint64_t max_height = 0;
424 for (
unsigned i=0; i<n; i++) {
427 cout <<
"col " << i <<
" height = " << height << endl;
429 n_sky_items += height;
430 if (height > max_height) max_height = height;
433 double av_height = (double) n_sky_items / (
double) n;
434 cout <<
"N = " << n << endl;
435 cout <<
"N^2 = " << n2 << endl;
436 cout <<
"Sky items = " << n_sky_items << endl;
437 cout <<
"N^2 / Sky items = " << (double) n2 / (
double) n_sky_items << endl;
438 cout <<
"Avg. Height = " << av_height << endl;
439 cout <<
"Max. Height = " << max_height << endl;
444 VERBOSE(1,
"Dumping decomposed matrix into: " <<
448 matrix.
Write(dump_file, 0);
454 matrix.
Write(sig, 1);
458 cerr <<
"ERROR(ret=" << ret <<
") : MD5 Signature for " 459 <<
"decomposed matrixdoes not match." << endl;
463 cout <<
"Checking decomposed matrix MD5 signature: [OK]" << endl;
468 cerr <<
"ERROR (ret=" << ret <<
") when writing the " 469 <<
"decomposed matrix MD5 signature to file: " 480 VERBOSE(1,
"Checking decomposed matrix error: " <<
486 ref_matrix.
Read(ref_file,0);
487 int max_j = matrix.
Cols();
488 if (max_j != ref_matrix.
Cols()) {
489 cerr <<
"Decomposed matrix has " << max_j
490 <<
" cols while reference matrix has " 491 << ref_matrix.
Cols() << endl;
495 REAL max_error = 0.0;
496 for (
int j=0; j<max_j; j++) {
498 if (col_height != ref_matrix.
SkyHeight(j)) {
499 cerr <<
"Column " << j <<
" of decomposed matrix has " << col_height
500 <<
" non zero rows while reference matrix has " 504 int min_i = (j+1) - col_height;
505 for (
int i=min_i; i<=j; i++) {
507 REAL dm_ij = matrix.
s(i,j);
508 REAL rm_ij = ref_matrix.
s(i,j);
509 if (dm_ij != rm_ij) {
510 REAL diff =
abs(dm_ij - rm_ij);
511 if (diff >= error_tolerance) {
512 VERBOSE(1,
"diff(" << diff <<
") tolerance (" << error_tolerance
513 <<
"). dm[" << i <<
"][" << j <<
"] (" << dm_ij
514 <<
") != rm[" << i <<
"][" << j <<
"] (" << rm_ij
517 max_error = (max_error < diff)?diff:max_error;
523 cerr <<
"Error ("<< max_error <<
") > error tolerance (" 524 << error_tolerance <<
")" << endl;
clarg::argString dump_dm("-dump_dm", "dump decomposed matrix. (use -bd for binary format)", "dump_matrix.txt")
Contains a class to record running statistics on CSV tables.
void set_affinity(int af, int tidx)
int Resize(const int64_t newDim, const int64_t) override
Redimensions a matriz keeping the previous values.
RunStatsTable total_rst("-tot_rdt", "Whole program (total) statistics raw data table")
clarg::argString chk_dm_sig("-chk_dm_md5", "compute MD5 signature for decomposed matrix and check against MD5 at file.", "decomposed_matrix.md5")
Contains definitions to LOGPZ_DEBUG, LOGPZ_INFO, LOGPZ_WARN, LOGPZ_ERROR and LOGPZ_FATAL, and the implementation of the inline InitializePZLOG(string) function using log4cxx library or not. It must to be called out of "#ifdef LOG4CXX" scope.
Contains declaration of the TPZMD5Stream class which implements the interface to write and check md5 ...
clarg::argInt nmats("-nmats", "Number of matrizes to decompose simultaneously.", 1)
void values(ostream &os, bool defined_only)
void OpenRead(const std::string &fn)
void help(const char *prg)
clarg::argBool br("-br", "binary reference. Reference decomposed matrix file format == binary.", false)
void Write(TPZStream &buf, int withclassid) const override
Packs the object structure in a stream of bytes.
clarg::argBool bi("-bi", "binary input. Input file format == binary.", false)
int64_t SkyHeight(int64_t col)
return the height of the skyline for a given column
clarg::argBool h("-h", "help message", false)
TinyFad< 8, T > abs(const TinyFad< 8, T > &in)
void OpenWrite(const std::string &fn)
clarg::argInt verb_level("-v", "verbosity level", 0)
#define VERBOSE(level,...)
void compute_decompose(int idx)
Implements a skyline storage format. A Skyline matrix is symmetric so square. Matrix.
TPZSkylMatrix< REAL > matrix
clarg::argString ifn("-ifn", "input matrix file name (use -bi to read from binary files)", "matrix.txt")
#define PZ_PTHREAD_JOIN(thread, val, fn)
virtual TVar & s(const int64_t row, const int64_t col) override
The operators check on the bounds if the DEBUG variable is defined.
clarg::argBool naa("-naDALora", "NUMA aware Dohrman Assembly List thread work objects re-allocation.", false)
clarg::argString mp("-mp", "starts execution from beginning - read a \alha_predio\input file", "../8andares02.txt")
clarg::argInt mstats("-mstats", "Matrix statistics vebosity level.", 0)
int CheckMD5(const std::string &filename)
Check Stream MD5 signature against MD5 signature store on file.
clarg::argInt affinity("-af", "affinity mode (0=no affinity, 1=heuristi 1)", 0)
Implements reading from and writing to an ascii file. Persistency.
clarg::argBool rea("-rea", "reallocate matrix inside matrix.", false)
int parse_arguments(int argc, char *argv[])
#define SET_RANGE(mskp, start, end)
Contains TPZSkyline class which implements a skyline storage format.
void arguments_descriptions(ostream &os, string prefix, string suffix)
std::vector< TPZAutoPointer< TPZSkylMatrix< REAL > > > matrices
int main(int argc, char *argv[])
virtual int64_t Dim() const
Returns the dimension of the matrix if the matrix is square.
#define CASE_OP(opid, method)
clarg::argString chk_dm_error("-chk_dm_error", "check the decomposed matrix error against a reference matrix. (use -br to read from binary files)", "ref_decomposed_matrix.txt")
void Read(TPZStream &buf, void *context) override
Unpacks the object structure from a stream of bytes.
int WriteMD5(const std::string &filename)
Write computed MD5 signature to file.
FileStreamWrapper(bool b)
clarg::argInt maxcol("-maxcol", "Limit computation to max column (Use Resize(maxcol)).", 0)
clarg::argString gen_dm_sig("-gen_dm_md5", "generates MD5 signature for decomposed matrix into file.", "decomposed_matrix.md5")
const T & get_value() const
clarg::argBool bd("-bd", "binary dump. Dump file format == binary.", false)
#define PZ_PTHREAD_CREATE(thread, attr, routine, args, fn)
int64_t Cols() const
Returns number of cols.
Defines the interface for saving and reading data. Persistency.
clarg::argString m("-m", "input matrix file name (text format)", "matrix.txt")
clarg::argDouble error_tol("-error_tol", "error tolerance.", 1.e-12)
clarg::argInt cholesky_blk("-chol_blk", "Cholesky blocking factor", 256)
void ReallocForNuma(int node)
clarg::argInt mop("-op", "Matrix operation", 1)
Implements the interface to write and check MD5 files. Persistency.
This class implements a reference counter mechanism to administer a dynamically allocated object...