39 static LoggerPtr logger(Logger::getLogger(
"structmatrix.dohrstructmatrix"));
40 static LoggerPtr loggerasm(Logger::getLogger(
"structmatrix.dohrstructmatrix.asm"));
50 #include <tbb/parallel_for.h> 51 #include <tbb/blocked_range.h> 60 static float stiff_sum = 0;
70 pthread_mutex_t* TestThread);
119 TPZBoostGraph boost(nel,nindep);
120 boost.setGType(TPZBoostGraph::KMC);
121 boost.SetElementGraph(elgraph, elgraphindex);
122 boost.CompressedResequence(perm, iperm);
133 for(isub=0; isub<
nsub; isub++)
137 std::cout <<
'.'; std::cout.flush();
147 int64_t nel = elgraphindex.
NElements()-1;
149 TPZBoostGraph boost(nel,nindep);
150 boost.setGType(TPZBoostGraph::KMC);
151 boost.SetElementGraph(elgraph, elgraphindex);
152 boost.CompressedResequence(perm, iperm);
162 filename <<
"SubMatrix" << submesh->
Index() <<
".vtk";
172 std::cout <<
"Identifying corner nodes\n";
190 assembly->fFineEqs.Resize(nsub);
191 assembly->fCoarseEqs.Resize(nsub);
192 for(isub=0; isub<
nsub; isub++)
201 int64_t neq = ((
TPZCompMesh *)submesh)->NEquations();
204 substruct->fNEquations = neq;
208 std::map<int,int> globinv,globaleqs;
212 int next = globaleqs.size();
213 substruct->fNumExternalEquations = next;
215 assembly->fFineEqs[isub].Resize(next);
216 std::map<int,int>::iterator it;
218 for(it=globaleqs.begin(); it!=globaleqs.end(); it++)
220 assembly->fFineEqs[isub][count++] = it->second;
226 typedef std::pair<ENumbering,ENumbering> Numberingpair;
227 ENumbering tsub,text,tint;
232 TPZVec<int> &toexternal = substruct->fPermutationsScatter[Numberingpair(tsub,text)];
233 TPZVec<int> &fromexternal = substruct->fPermutationsScatter[Numberingpair(text,tsub)];
234 toexternal.
Resize(neq,-1);
235 fromexternal.
Resize(neq,-1);
236 int nel = globaleqs.size();
238 for(it=globaleqs.begin(); it!=globaleqs.end(); it++)
240 toexternal[it->first] = count++;
244 for(ieq=0; ieq<neq; ieq++)
246 if(toexternal[ieq] == -1) toexternal[ieq] = count++;
248 for(ieq=0; ieq<neq; ieq++)
250 fromexternal[toexternal[ieq]] = ieq;
276 unsigned numthreads_assemble,
unsigned numthreads_decompose)
279 Assemble(*dohrgeneric, rhs, guiInterface, numthreads_assemble, numthreads_decompose);
294 template<
class TTVar>
298 fSubMeshIndex(submesh_idx), fSubstruct(substruct) {}
317 fAssembly(assembly), fMesh(mesh) {}
328 typename std::vector<work_item_t<TVar> >::iterator it = work_items.begin();
329 typename std::vector<work_item_t<TVar> >::iterator end = work_items.end();
331 for (;it != end; it++)
343 void operator()(
const blocked_range<size_t>& range)
const 345 for(
size_t i=range.begin(); i!=range.end(); ++i ) {
355 void run_parallel_for()
359 parallel_for(blocked_range<size_t>(0,work_items.size(), 1 ), *
this);
372 const std::list<TPZAutoPointer<TPZDohrSubstructCondense<STATE> > > &sublist = dohr->
SubStructures();
375 std::list<TPZAutoPointer<TPZDohrSubstructCondense<STATE> > >::const_iterator it = sublist.begin();
379 std::cout <<
"Assembling " << nsub <<
" submeshes" << std::endl;
380 for (isub=0; isub<
nsub ; isub++) {
382 if(!submesh)
continue;
389 parallel_tasks.run_parallel_for();
395 for (isub=0, it=sublist.begin(); it != sublist.end(); it++, isub++) {
397 (*it)->ContributeRhs(rhsloc);
435 RunStatsTable dohr_ass (
"-tpz_dohr_ass",
"Raw data table statistics for TPZDohrStructMatrix::Assemble assemble (first)");
436 RunStatsTable dohr_dec (
"-tpz_dohr_dec",
"Raw data table statistics for TPZDohrStructMatrix::Assemble decompose (second)");
441 unsigned numthreads_assemble,
unsigned numthreads_decompose)
450 const std::list<TPZAutoPointer<TPZDohrSubstructCondense<STATE> > > &sublist = dohr->
SubStructures();
453 std::list<TPZAutoPointer<TPZDohrSubstructCondense<STATE> > >::const_iterator it = sublist.begin();
457 for (
int isub=0; isub<
nsub ; isub++) {
459 if(!submesh)
continue;
473 std::list<TPZAutoPointer<ThreadDohrmanAssembly<STATE> > >::iterator itwork =
474 worklistAssemble.
fList.begin();
475 while (itwork != worklistAssemble.
fList.end()) {
482 float rtime, ptime, mflops;
484 PAPI_flops ( &rtime, &ptime, &flpops, &mflops );
488 if (numthreads_assemble == 0) {
492 targ.
list = &worklistAssemble;
497 std::vector<ThreadDohrmanAssemblyList_ThreadArgs_t<STATE> > args(numthreads_assemble);
500 for(
unsigned itr=0; itr<numthreads_assemble; itr++)
504 targ->
list = &worklistAssemble;
510 for(
unsigned itr=0; itr<numthreads_assemble; itr++)
519 PAPI_flops ( <ime, &ptime, &flpops, &mflops );
521 printf(
"Assemble Time: %.2f \t", ltime-rtime);
522 printf(
"Assemble Stiffness : %.2f seconds\n", stiff_sum);
526 if (logger->isDebugEnabled())
529 for (it=sublist.begin(); it!=sublist.end(); it++) {
530 std::stringstream sout;
531 sout <<
"Substructure number " << isub <<std::endl;
534 (*it)->fMatRed->Print(
"Matred",sout);
544 itwork = worklist.
fList.begin();
545 while (itwork != worklist.
fList.end()) {
548 worklistDecompose.Append(pt1);
551 worklistDecompose.Append(pt2);
556 if (numthreads_decompose == 0) {
560 targ.
list = &worklistDecompose;
565 std::vector<ThreadDohrmanAssemblyList_ThreadArgs_t<STATE> >
566 args(numthreads_decompose);
567 for(
unsigned itr=0; itr<numthreads_decompose; itr++)
571 targ.
list = &worklistDecompose;
574 &targ, __FUNCTION__);
576 for(
unsigned itr=0; itr<numthreads_decompose; itr++)
585 for (it=sublist.begin(), isub=0; it != sublist.end(); it++,isub++) {
587 (*it)->ContributeRhs(rhsloc);
609 const std::list<TPZAutoPointer<TPZDohrSubstructCondense<STATE> > > &sublist = precond->
Global();
612 std::list<TPZAutoPointer<TPZDohrSubstructCondense<STATE> > >::const_iterator it = sublist.begin();
616 for (isub=0; isub<
nsub ; isub++) {
624 (*it)->fLocalLoad.Zero();
625 fullstr.
Assemble((*it)->fLocalLoad,guiInterface);
628 for (it=sublist.begin(), isub=0; it != sublist.end(); it++,isub++) {
633 (*it)->ContributeRhs(rhsloc);
647 std::set<int> subelindexes;
650 for (iel=0; iel<nelem ; iel++) {
653 subelindexes.insert(iel);
657 std::set<int64_t> cornerconnind;
658 std::set<int> cornerconnseq;
660 std::set<int64_t>::iterator it;
661 for (it=cornerconnind.begin(); it!=cornerconnind.end(); it++) {
664 cornerconnseq.insert(seqnum);
671 int nel = elementgraphindex.
NElements()-1;
673 expelementgraphindex.
Push(0);
678 for (iel=0; iel<nel; iel++) {
679 int nc = elementgraphindex[iel+1]-elementgraphindex[iel];
681 int index = elementgraphindex[iel];
683 for (ic=0; ic<nc; ic++) {
684 expelementgraph.
Push(0);
685 expelementgraph[count++] = elementgraph[index++];
687 expelementgraph.
Push(0);
688 expelementgraph[count++] = nindep;
691 expelementgraphindex.
Push(count);
704 for (iext=0; iext<next; iext++) {
708 externalconnect[seqnum] = 1;
711 nel = expelementgraphindex.
NElements()-1;
712 for (iel=0; iel<nel; iel++) {
714 int firstnode = expelementgraphindex[iel];
715 int lastnode = expelementgraphindex[iel+1];
717 for (nodeindex= firstnode; nodeindex < lastnode; nodeindex++) {
718 int node = expelementgraph[nodeindex];
719 if (externalconnect[node] ==1) {
725 for (nodeindex= firstnode; nodeindex < lastnode; nodeindex++) {
726 int node = expelementgraph[nodeindex];
727 if (externalconnect[node] ==1) {
728 expelementgraph.
Push(node);
730 expelementgraph.
Push(nindep++);
745 for (iext=0; iext<next; iext++) {
749 expelementgraph.
Push(0);
750 expelementgraph[count++] = seqnum;
753 expelementgraphindex.
Push(count);
755 nel = expelementgraphindex.
NElements()-1;
761 std::stringstream sout;
762 renum.
Print(expelementgraph, expelementgraphindex,
"Expanded graph",sout);
763 if (logger->isDebugEnabled())
770 std::set<int> othercornereqs;
771 renum.
CornerEqs(3,nelprev,cornerconnseq,othercornereqs);
773 if (logger->isDebugEnabled())
775 std::stringstream str;
779 for (iel=0; iel<nelem; iel++) {
784 str <<
"SubCMesh " << sub << std::endl;
787 for (ic=0; ic<nc; ic++) {
790 if (othercornereqs.find(seqnum) != othercornereqs.end()) {
791 str << seqnum <<
" ";
801 std::set<int> cornerseqnums;
805 for (in=0; in<nnodes; in++) {
806 if (othercornereqs.find(in) != othercornereqs.end()) {
808 cornerseqnums.insert(in);
813 for(ieq=0; ieq<size; ieq++)
821 std::cout <<
"Number cornereqs " <<
fCornerEqs.size() << std::endl;
823 cornerseqnums = othercornereqs;
824 std::set<int> connectindices;
828 for (ic=0; ic<ncon; ic++) {
829 if (cornerseqnums.find(
fMesh->
ConnectVec()[ic].SequenceNumber()) != cornerseqnums.end()) {
830 connectindices.insert(ic);
835 for (el=0; el<numcel; el++) {
839 if(!submesh)
continue;
842 for (elsub=0; elsub<nelsub; elsub++) {
849 for (ic=0; ic<nc ; ic++) {
852 if(fatherindex != -1)
854 if (connectindices.find(fatherindex) != connectindices.end())
862 geonodeindices.
Push(nodeindex);
864 connectindices.erase(fatherindex);
875 for (igeo=0; igeo<ngeo; igeo++) {
876 nodeindices[0] = geonodeindices[igeo];
881 std::ofstream arquivo(
"PointMesh.vtk");
886 if (logger->isDebugEnabled())
888 std::stringstream str;
889 str <<
"number of corner equations " <<
fCornerEqs.size() << std::endl;
891 str <<
" corner equations ";
892 std::set<int>::iterator it;
905 str <<
"\nnumber of corner block indices after " << othercornereqs.size() << std::endl;
906 for(it=othercornereqs.begin(); it!=othercornereqs.end(); it++)
928 std::stringstream sout;
929 sout <<
"total submesh connects/glob/loc ";
931 for(ic=0; ic<ncon; ic++)
935 if(glob == -1)
continue;
936 int64_t locseq = sub->
ConnectVec()[ic].SequenceNumber();
937 int64_t globseq = super->
ConnectVec()[glob].SequenceNumber();
943 for(ieq =0; ieq<locsize; ieq++)
946 sout << ic <<
"/" << globpos+ieq <<
"/" << locpos+ieq <<
" ";
948 global[locpos+ieq] = globpos+ieq;
949 globinv[globpos+ieq] = locpos+ieq;
953 if (logger->isDebugEnabled())
965 int64_t iel, count = 0;
966 for(iel=0; iel<nel; iel++)
981 int64_t iel, count = 0;
982 for(iel=0; iel<nel; iel++)
987 if(sub && isub == count)
return sub;
1003 int64_t nblocks = origblock.
NBlocks();
1006 std::cout << __PRETTY_FUNCTION__ <<
" something seriously wrong!!!\n";
1009 for(ib=0; ib<nblocks; ib++)
1011 destblock.
Set(scatterpermuteblock[ib],origblock.
Size(ib));
1016 scatterpermute.
Resize(neq);
1017 gatherpermute.
Resize(neq);
1018 scatterpermute.
Fill(-1);
1019 gatherpermute.
Fill(-1);
1022 std::stringstream sout;
1023 sout <<
"internal submesh connects/glob/loc ";
1026 for(ic=0; ic<ncon; ic++)
1031 int64_t locseq = sub->
ConnectVec()[ic].SequenceNumber();
1033 if(locseq < 0)
continue;
1034 int destseq = scatterpermuteblock[locseq];
1035 int64_t locpos = origblock.
Position(locseq);
1036 int64_t destpos = destblock.
Position(destseq);
1037 int size = origblock.
Size(locseq);
1040 for(ieq =0; ieq<size; ieq++)
1043 sout << ic <<
"/" << locpos+ieq <<
"/" << destpos+ieq <<
" ";
1045 scatterpermute[locpos+ieq] = destpos+ieq;
1049 for(ieq = 0; ieq < neq; ieq++)
1051 gatherpermute[scatterpermute[ieq]] = ieq;
1054 if (logger->isDebugEnabled())
1067 if (logger->isDebugEnabled())
1069 std::stringstream sout;
1070 sout <<
"Input data for IdentifySubCornerEqs \nglobaltolocal";
1071 std::map<int,int>::iterator mapit;
1072 for(mapit = globaltolocal.begin(); mapit != globaltolocal.end(); mapit++)
1074 sout <<
" [" << mapit->first <<
" , " << mapit->second <<
"] ";
1076 sout <<
"\nCorner equations stored in the GenSubStructure data ";
1077 std::set<int>::iterator setit;
1080 sout << *setit <<
" , ";
1082 sout <<
"\ncornereqs " << cornereqs;
1090 std::set<int>::iterator it;
1092 int64_t localcount = 0;
1095 if(globaltolocal.find(*it) != globaltolocal.end())
1097 cornereqs[localcount] = globaltolocal[*it];
1098 coarseindex[localcount] = count;
1102 cornereqs.
Resize(localcount);
1103 coarseindex.
Resize(localcount);
1128 for (cel=0; cel<nel; cel++) {
1130 if(!compel)
continue;
1135 domaincolor[gel->
Index()] = domain_index[cel];
1137 std::ofstream vtkfile(
"partitionbefore.vtk");
1144 while (nsubnew != nsub)
1153 if (logger->isDebugEnabled())
1155 std::stringstream sout;
1156 sout <<
"Geometric mesh and domain indices\n";
1158 sout <<
"Domain indices : \n";
1160 for (int64_t el=0; el<nel; el++) {
1161 sout <<
"el " << el <<
" domain " << domain_index[el] << std::endl;
1174 for (cel=0; cel<nel; cel++) {
1176 if(!compel)
continue;
1181 domaincolor[gel->
Index()] = domain_index[cel];
1183 std::ofstream vtkfile(
"partition.vtk");
1189 for (isub=0; isub<
nsub; isub++) {
1192 std::cout <<
'^'; std::cout.flush();
1196 domain_index[index] = -1;
1200 for (iel=0; iel<nel; iel++) {
1201 int domindex = domain_index[iel];
1202 if (domindex >= 0) {
1207 submeshes[domindex]->TransferElement(
fMesh,iel);
1210 for (isub = 0; isub<
nsub; isub++) {
1211 int64_t nel = submeshes[isub]->
NElements();
1213 delete submeshes[isub];
1214 submeshes[isub] = 0;
1218 for (isub=0; isub<
nsub; isub++) {
1219 if (submeshes[isub])
1221 submeshes[isub]->MakeAllInternal();
1222 submeshes[isub]->PermuteExternalConnects();
1224 std::cout <<
'*'; std::cout.flush();
1235 pthread_mutex_t* TestThread)
1247 typedef std::pair<ENumbering,ENumbering> pairnumbering;
1249 TPZVec<int> &permutescatter = substruct->fPermutationsScatter[fromsub];
1262 matredbig->
SetK00(Stiffness);
1263 substruct->fMatRedComplete = matredbig;
1272 if (logger->isDebugEnabled())
1274 std::stringstream sout;
1275 sout <<
"SubMesh Index = " << submesh->
Index() <<
" Before permutation sequence numbers ";
1278 for (i=0; i<ncon; i++) {
1279 sout << i <<
'|' << submesh->
ConnectVec()[i].SequenceNumber() <<
" ";
1290 if (logger->isDebugEnabled())
1292 std::stringstream sout;
1293 sout <<
"SubMesh Index = " << submesh->
Index() <<
" After permutation sequence numbers ";
1296 for (i=0; i<ncon; i++) {
1297 sout << i <<
'|' << submesh->
ConnectVec()[i].SequenceNumber() <<
" ";
1303 if (logger->isDebugEnabled())
1305 std::stringstream sout;
1306 sout <<
"SubMesh Index = " << submesh->
Index() <<
"\nComputed scatter vector ";
1307 sout << permuteconnectscatter;
1308 sout <<
"\nStored scatter vector " << permutescatter;
1326 for (iel=0; iel < permuteconnectscatter.
NElements(); iel++) {
1327 invpermuteconnectscatter[permuteconnectscatter[iel]] = iel;
1333 filename <<
"SubMatrixInternal" << submesh->
Index() <<
".vtk";
1340 submesh->
Permute(invpermuteconnectscatter);
1347 substruct->fLocalLoad.Redim(Stiffness->Rows(),1);
1349 float rtime, ptime, mflops, ltime;
1352 PAPI_flops ( &rtime, &ptime, &flpops, &mflops );
1354 pairstructmatrix.Assemble(Stiffness.operator->(), matredptr, substruct->fLocalLoad);
1356 PAPI_flops ( <ime, &ptime, &flpops, &mflops );
1359 stiff_sum += ltime-rtime;
1365 substruct->fWeights.Resize(Stiffness->Rows());
1367 for(i=0; i<substruct->fWeights.NElements(); i++)
1369 substruct->fWeights[i] = Stiffness->GetVal(i,i);
1372 int64_t ncoarse = substruct->fCoarseNodes.NElements(), ic;
1373 int64_t neq = Stiffness->Rows();
1374 for(ic=0; ic<ncoarse; ic++)
1376 int coarse = substruct->fCoarseNodes[ic];
1377 Stiffness->operator()(coarse,coarse) += 10.;
1380 matredbig->operator()(neq+ic,coarse) = 1.;
1381 matredbig->operator()(coarse,neq+ic) = 1.;
1385 InvertedStiffness->
SetMatrix(Stiffness);
1390 #ifdef USE_LDLT_DECOMPOSITION 1395 matredbig->
SetSolver(InvertedStiffness);
1399 InvertedInternalStiffness->
SetMatrix(InternalStiffness);
1400 #ifdef DUMP_LDLT_MATRICES 1405 matredptr->
SetSolver(InvertedInternalStiffness);
1408 substruct->fMatRed = matredfull;
1414 #ifdef DUMP_LDLT_MATRICES 1416 #include "pzbfilestream.h" 1417 pthread_mutex_t dump_matrix_mutex = PTHREAD_MUTEX_INITIALIZER;
1418 unsigned matrix_unique_id = 0;
1423 std::cout <<
"Dump stiffness matrix at DecomposeBig..." << std::endl;
1424 std::stringstream fname;
1425 fname <<
"matrix_" << matrix_unique_id++ <<
".bin";
1428 Stiffness->Write(fs, 0);
1429 std::cout <<
"Dump stiffness matrix at DecomposeBig... [Done]" << std::endl;
1438 #ifdef USING_LIBNUMA 1444 max_node_id = numa_max_node();
1449 unsigned get_num_nodes() {
return (max_node_id+1);}
1451 unsigned get_rr_node_id() {
return (next_rr_node++);}
1455 unsigned max_node_id;
1457 unsigned next_rr_node;
1463 clarg::argBool naa(
"-naDALora",
"NUMA aware Dohrman Assembly List thread work objects re-allocation.",
false);
1464 clarg::argInt naat(
"-naDALorat",
"NUMA aware Dohrman Assembly List thread work objects re-allocation threshold.", 0);
1466 #ifdef USING_LIBNUMA 1467 clarg::argBool nats(
"-naDALtws",
"NUMA aware (node round-robin) Dohrman Assembly List thread work scheduling.",
false);
1475 if (Stiffness->MemoryFootprint() > naat.
get_value()) {
1479 #ifdef USE_LDLT_DECOMPOSITION 1480 Stiffness->Decompose_LDLt();
1482 Stiffness->Decompose_Cholesky();
1485 substruct->Initialize();
1493 if (InternalStiffness->MemoryFootprint() > naat.
get_value()) {
1497 #ifdef USE_LDLT_DECOMPOSITION 1498 InternalStiffness->Decompose_LDLt();
1500 InternalStiffness->Decompose_Cholesky();
1505 template<
class TVar>
1511 case EComputeMatrix:
1514 case EDecomposeInternal:
1525 if (fTask == EComputeMatrix)
1526 if (logger->isDebugEnabled())
1528 std::stringstream sout;
1531 sout <<
"Substructure for submesh " << fSubMeshIndex << std::endl;
1532 fSubstruct->Print(sout);
1539 template<class TVar>
1546 template<
class TVar>
1553 template<
class TVar>
1560 template<
class TVar>
1564 fList.push_back(
object);
1568 template<
class TVar>
1574 result = *
fList.begin();
1581 template<
class TVar>
1590 #ifdef USING_LIBNUMA 1592 struct bitmask* nodemask = numa_allocate_nodemask();
1593 numa_bitmask_clearall(nodemask);
1594 numa_bitmask_setbit(nodemask,args->
thread_idx%NUMA.get_num_nodes());
1595 numa_bind(nodemask);
1596 numa_free_nodemask(nodemask);
1599 node_id = args->
thread_idx%NUMA.get_num_nodes();
1610 runner->AssembleMatrices(args->
list->fTestThreads,node_id);
1611 runner = args->
list->NextObject();
1621 std::set<int64_t> connectindexes;
1623 int64_t nel = fMesh->NElements();
1624 for (iel=0; iel<nel; iel++) {
1626 TPZCompEl *cel = fMesh->ElementVec()[iel];
1650 for (is=0; is<ns; is++)
1662 for (ic=0; ic<ncomp; ic++) {
1673 for (isc=0; isc<nsconnect; isc++) {
1675 connectindexes.insert(ind);
1680 std::set<int64_t>::iterator it;
1681 fExternalConnectIndexes.Resize(connectindexes.size());
1683 for (it=connectindexes.begin(); it != connectindexes.end(); it++,i++) {
1684 fExternalConnectIndexes[i] = *it;
1692 std::map<int,int> domain_index_count;
1694 int64_t nel = fMesh->NElements();
1695 for (iel=0; iel<nel; iel++) {
1696 TPZCompEl *cel = fMesh->ElementVec()[iel];
1700 int mydomainindex = domain_index[cel->
Index()];
1701 domain_index_count[mydomainindex]++;
1703 std::set<int> domain_check;
1705 for (iel=0; iel<nel; iel++) {
1706 TPZCompEl *cel = fMesh->ElementVec()[iel];
1710 int mydomainindex = domain_index[cel->
Index()];
1711 if (domain_check.find(mydomainindex) != domain_check.end()) {
1714 domain_check.insert(mydomainindex);
1722 std::set<TPZCompEl *> gelcluster;
1726 if (gelcluster.find(gel->
Reference()) != gelcluster.end()) {
1729 int beforesize = gelcluster.size();
1731 int checksize = gelcluster.size();
1732 if (checksize == beforesize) {
1736 int nsides = gel->
NSides();
1738 for (is=0; is<nsides; is++) {
1740 if (sidedim != connectdimension) {
1748 for (neigh = 0; neigh <nneigh; neigh++) {
1751 if (domain_index[celloc->
Index()] != mydomainindex) {
1754 if (gelcluster.find(celloc) == gelcluster.end()) {
1761 if (gelcluster.size() != (std::set<TPZCompEl *>::size_type)domain_index_count[mydomainindex]) {
1762 if (gelcluster.size() > (std::set<TPZCompEl *>::size_type)domain_index_count[mydomainindex]) {
1765 domain_index_count[mydomainindex] -= gelcluster.size();
1766 domain_index_count[
nsub] = gelcluster.size();
1767 std::set<TPZCompEl *>::iterator it;
1768 domain_check.erase(mydomainindex);
1769 domain_check.insert(nsub);
1770 for (it=gelcluster.begin(); it!=gelcluster.end(); it++) {
1771 domain_index[(*it)->Index()]=
nsub;
1778 if (logger->isDebugEnabled())
1780 std::stringstream sout;
1781 sout <<
"Number of elements per domain ";
1782 std::map<int,int>::iterator it;
1784 for (it=domain_index_count.begin(); it != domain_index_count.end(); it++) {
1785 if (! (count++ %40)) {
1788 sout << it->first <<
" " << it->second <<
" " << std::endl;
1800 int meshdim = fMesh->Dimension();
1801 int64_t nel = fMesh->NElements();
1802 int64_t mincount = nel/nsub/20;
1806 std::map<int,int> domain_index_count;
1808 for (iel=0; iel<nel; iel++) {
1809 TPZCompEl *cel = fMesh->ElementVec()[iel];
1813 int mydomainindex = domain_index[cel->
Index()];
1823 domain_index_count[mydomainindex]++;
1828 int nsides = gel->
NSides();
1831 for (is=0; is<nsides; is++) {
1833 if (sidedim != connectdimension && geldim>=connectdimension) {
1836 if (geldim < connectdimension && is != nsides-1)
1845 int64_t nneighvalid = 0;
1846 for (neigh = 0; neigh <nneigh; neigh++) {
1854 int celdomain = domain_index[celloc->
Index()];
1855 if (celdomain != mydomainindex)
1857 domain_neighbours[mydomainindex].insert(celdomain);
1860 if (nneighvalid == 0)
1863 domain_neighbours[mydomainindex].insert(-1);
1868 if(logger->isDebugEnabled())
1870 std::stringstream sout;
1871 for (int64_t i=0; i<domain_neighbours.
size(); i++) {
1872 std::set<int>::const_iterator it;
1873 sout <<
"Domain index " << i <<
" neighbours ";
1874 for (it=domain_neighbours[i].begin(); it != domain_neighbours[i].
end(); it++) {
1879 std::map<int,int>::const_iterator it = domain_index_count.
begin();
1880 while (it != domain_index_count.end()) {
1881 sout <<
"Domain index " << it->first <<
" number of elements " << it->second << std::endl;
1892 for (isub=0; isub <
nsub; isub++)
1897 if (domain_neighbours[isub].size() == 1 )
1900 int target = *(domain_neighbours[isub].
begin());
1905 if (domain_dest[target] == -1 && domain_dest[isub] == -1)
1907 domain_dest[isub] = count;
1908 domain_dest[target] = count;
1911 else if (domain_dest[target] == -1)
1913 domain_dest[target] = domain_dest[isub];
1917 domain_dest[isub] = domain_dest[target];
1921 else if(domain_dest[isub] == -1 && domain_index_count[isub] < mincount)
1925 std::map<int,int> sizeDomain;
1926 std::set<int>::iterator it;
1927 for (it = domain_neighbours[isub].begin(); it != domain_neighbours[isub].
end(); it++) {
1929 sizeDomain[domain_index_count[isub]] = *it;
1932 int domaintargetindex = sizeDomain.rbegin()->second;
1933 int destdomainindexcount = domain_index_count[domaintargetindex];
1934 int domainshrinkcount = domain_index_count[isub];
1935 domain_index_count[domaintargetindex] = destdomainindexcount+domainshrinkcount;
1936 domain_index_count[isub] = 0;
1937 if(domain_dest[domaintargetindex] == -1)
1939 domain_dest[domaintargetindex] = count;
1940 domain_dest[isub] = count;
1944 domain_dest[isub] = domain_dest[domaintargetindex];
1948 else if (domain_dest[isub] == -1)
1950 domain_dest[isub] = count++;
1954 if (logger->isDebugEnabled())
1956 std::stringstream sout;
1958 for (isub=0; isub <
nsub; isub++) {
1959 sout <<
"isub = " << isub <<
" number of neighbours " << domain_neighbours[isub].
size() <<
" domains ";
1960 std::set<int>::iterator it;
1961 for (it = domain_neighbours[isub].begin(); it != domain_neighbours[isub].
end(); it++) {
1966 sout <<
"Destination domain " << domain_dest << std::endl;
1972 for (d=0; d<domsize; d++) {
1973 domain_index[d] = domain_dest[domain_index[d]];
1976 if (logger->isDebugEnabled())
1978 std::stringstream sout;
1979 sout <<
"Number of elements per domain ";
1980 std::map<int,int>::iterator it;
1982 for (it=domain_index_count.begin(); it != domain_index_count.end(); it++) {
1983 if (! (count++ %40)) {
1986 sout << it->first <<
" " << it->second <<
" ";
1998 int hasdohrassembly = 0;
1999 if (fDohrAssembly) {
2000 hasdohrassembly = 1;
2002 str.
Write(&hasdohrassembly);
2003 if (hasdohrassembly) {
2006 str.
Write( fExternalConnectIndexes);
2007 str.
Write(fCornerEqs);
2013 int hasdohrassembly;
2014 str.
Read(&hasdohrassembly);
2015 if (hasdohrassembly) {
2018 str.
Read( fExternalConnectIndexes);
2019 str.
Read( fCornerEqs);
2027 bool changed =
true;
2031 for (int64_t el=0; el<nel; el++) {
2040 int nsides = gel->
NSides();
2042 if (neighbour.
Element() != gel) {
2047 int64_t neighindex = neighcel->
Index();
2048 if (domainindex[el] != domainindex[neighindex]) {
2049 domainindex[el] = domainindex[neighindex];
void IdentifySubCornerEqs(std::map< int, int > &globaltolocal, TPZVec< int > &cornereqs, TPZVec< int > &coarseindex)
Identify the corner equations associated with a substructure.
int64_t NElements() const
Number of computational elements allocated.
Contains a class to record running statistics on CSV tables.
int fNumThreads
Number of threads in Assemble process.
TPZGeoMesh * Reference() const
Returns a pointer to the geometrical mesh associated.
#define PZ_PTHREAD_MUTEX_UNLOCK(mutex, fn)
void Assemble(int isub, const TPZFMatrix< TVar > &local, TPZFMatrix< TVar > &global) const
Sum the values in the local matrix into the global matrix.
REAL ft4identcorner
Total Time for Identifying Corner Nodes = Convert Graph + AnalyseGraph.
void ComputeInternalEquationPermutation(TPZSubCompMesh *sub, TPZVec< int > &scatterpermute, TPZVec< int > &gatherpermute)
Computes the permutation vectors from the subcompmesh ordening to the "internal first" ordering...
Contains the TPZTimeTemp class which takes times.
void CorrectNeighbourDomainIndex(TPZCompMesh *cmesh, TPZVec< int > &domainindex)
Set the domain index of the lower dimension elements equal to the domain index of their neighbour...
int Position(const int block_diagonal) const
Returns the position of first element block dependent on matrix diagonal.
void AssembleMatrices(pthread_mutex_t &testthread, int numa_node)
void SetElementGraph(TPZVec< int64_t > &elgraph, TPZVec< int64_t > &elgraphindex)
This method declares the element graph to the object.
Implements computational element and a side. Computational Element.
int Set(const int index, const int dim, const int pos=-1)
Modifies existing block dimensions or creates a new block with given index.
Represents a set of shape functions associated with a computational element/side. Computational Eleme...
Contains definitions to LOGPZ_DEBUG, LOGPZ_INFO, LOGPZ_WARN, LOGPZ_ERROR and LOGPZ_FATAL, and the implementation of the inline InitializePZLOG(string) function using log4cxx library or not. It must to be called out of "#ifdef LOG4CXX" scope.
clarg::argInt naat("-naDALorat", "NUMA aware Dohrman Assembly List thread work objects re-allocation threshold.", 0)
void VisualMatrix(TPZFMatrix< TVar > &matrix, const std::string &outfilename)
This function calls the function that create a Data Explorer file or VTK file that allow to visualiz...
TPZAutoPointer< TPZDohrAssembly< STATE > > fDohrAssembly
void SetReduced()
changes the declared dimension of the matrix to fDim1
TPZAutoPointer< TPZCompMesh > fMesh
virtual TPZGeoEl * CreateGeoElement(MElementType type, TPZVec< int64_t > &cornerindexes, int matid, int64_t &index, int reftype=1)
Generic method for creating a geometric element. Putting this method centrally facilitates the modifi...
int64_t NodeIndex(int64_t nolocal, TPZCompMesh *super)
Gives the id node of one local node in containing mesh.
TPZTimeTemp tempo
External variable to TPZTimeTemp (to take time)
Implements renumbering for elements of a mesh. Utility.
void SimetrizeMatRed()
If fK00 is simetric, only part of the matrix is accessible to external objects.
clarg::argInt nsub("-nsub", "number of substructs", 4)
int64_t NEquations()
This computes the number of equations associated with non-restrained nodes.
int64_t NIndependentConnects()
Number of independent connect objects.
TPZAutoPointer< TPZDohrAssembly< TVar > > fAssembly
static void DecomposeInternal(TPZAutoPointer< TPZDohrSubstructCondense< STATE > > substruct, int numa_node)
TPZGeoElSide Reference() const
Reference to the geometric element.
TPZAutoPointer< TPZCompMesh > fMesh
virtual void Resequence(TPZVec< int64_t > &perm, TPZVec< int64_t > &iperm)
static TPZSubCompMesh * SubMesh(TPZAutoPointer< TPZCompMesh > compmesh, int isub)
return a pointer to the isub submesh
Implements assembling by Dohrman algorithm.
std::vector< work_item_t< TVar > > work_items
Defines step solvers class. Solver.
Contains the TPZRenumbering class which defines the behavior to implementing node sequence numbering ...
Calculate the Times. Utility.
virtual TPZEquationFilter & EquationFilter()
access method for the equation filter
virtual int Resize(const int64_t newRows, const int64_t newCols)
Redimensions a matriz keeping the previous values.
virtual int NCornerNodes() const =0
Returns the number of corner nodes of the element.
Contains the TPZDohrMatrix class which implements a matrix divided into substructures. Also contains the TPZDohrThreadMultData and TPZDohrThreadMultList structs.
void Subdivide(int nParts, TPZVec< int > &Domains)
Subdivides a Graph in nParts.
int64_t NElements() const
Number of elements of the mesh.
TPZAutoPointer< TPZDohrSubstructCondense< TTVar > > fSubstruct
void IdentifyCornerNodes()
Identify cornernodes.
void SetNumCornerEqs(int nc)
Utility class which represents an element with its side. The Geometric approximation classes Geometry...
void AddSubstruct(TPZAutoPointer< TSubStruct > substruct)
It adds a substruct.
int64_t NElements() const
Access method to query the number of elements of the vector.
Implements a chunk vector with free store administration. Utility.
Contains TPZMatRed class which implements a simple substructuring of a linear system of equations...
void ComputeFillIn(int64_t resolution, TPZFMatrix< REAL > &fillin)
This method will fill the matrix passed as parameter with a representation of the fillin of the globa...
virtual int NSides() const =0
Returns the number of connectivities of the element.
Implements a matrix divided into substructures. Matrix Sub structure.
virtual int64_t NodeIndex(int i) const =0
Returns the index of the ith node the index is the location of the node in the nodevector of the mesh...
pthread_mutex_t fAccessElement
Mutexes (to choose which submesh is next)
virtual int SideDimension(int side) const =0
Return the dimension of side.
virtual void BuildCornerConnectList(std::set< int64_t > &connectindexes) const
adds the connect indexes associated with base shape functions to the set
virtual void Resize(const int64_t newsize, const T &object)
Resizes the vector object.
Contains the declaration of the VisualMatrix functions to VTK and DX packages.
Iterações do laço podem ser executadas em paralelo. .
TPZAutoPointer< TPZDohrAssembly< TVar > > fAssembly
std::set< int > fCornerEqs
The global equations defining the coarse matrix.
RunStatsTable dohr_dec("-tpz_dohr_dec", "Raw data table statistics for TPZDohrStructMatrix::Assemble decompose (second)")
void SetK00(TPZAutoPointer< TPZMatrix< TVar > > K00)
Sets the matrix pointer of the upper left matrix to K00.
void IdentifyEqNumbers(TPZSubCompMesh *sub, std::map< int, int > &global, std::map< int, int > &globinv)
void ComputeElGraph(TPZStack< int64_t > &elgraph, TPZVec< int64_t > &elgraphindex)
Computes the connectivity graph of the elements, as appropriate for the TPZRenumbering class...
Contains the TPZDohrStructMatrix class which implements structural matrix divided in sub structures...
void PermuteInternalFirst(TPZVec< int64_t > &permute)
Permutes the potentially internal connects to the first on the list Respect the previous order of th...
Contains the TPZFStructMatrix class which implements Full Structural Matrices.
std::list< TPZAutoPointer< TSubStruct > > & Global()
The matrix class is a placeholder for a list of substructures.
void SetSolver(TPZAutoPointer< TPZMatrixSolver< TVar > > solver)
Implements SkyLine Structural Matrices. Structural Matrix.
Refines geometrical mesh (all the elements) num times.
TPZCompMesh * fMesh
Pointer to the computational mesh from which the matrix will be generated.
#define PZ_PTHREAD_JOIN(thread, val, fn)
static TPZAutoPointer< TPZSavable > GetAutoPointer(const int64_t &objId)
Implements a matrix which computes the preconditioner developed by Dohrmann. Sub Structure.
clarg::argBool naa("-naDALora", "NUMA aware Dohrman Assembly List thread work objects re-allocation.", false)
int64_t size() const
Returns the number of elements of the vector.
virtual void Resize(const int64_t newsize, const T &object)
Resizes the vector object reallocating the necessary storage, copying the existing objects to the new...
Contains the TPZBoostGraph class which implements an interface to the BGL for graph operations...
void AssembleTBB(TPZMatrix< STATE > &mat, TPZFMatrix< STATE > &rhs, TPZAutoPointer< TPZGuiInterface > guiInterface)
void Write(TPZStream &str, int withclassid) const override
Writes this object to the TPZStream buffer. Include the classid if withclassid = true.
int64_t SequenceNumber() const
Returns the Sequence number of the connect object.
virtual TPZMatrix< STATE > * Create()
void Push(const T object)
Pushes a copy of the object on the stack.
parallel_assemble_task_t(TPZAutoPointer< TPZDohrAssembly< TVar > > assembly, TPZAutoPointer< TPZCompMesh > mesh)
#define PZ_PTHREAD_MUTEX_LOCK(mutex, fn)
virtual void Write(const bool val)
virtual void Print(std::ostream &out=std::cout) const
Print the information of the grid to an ostream.
~ThreadDohrmanAssemblyList()
Defines the behaviour of all geometric elements. GeometryTPZGeoEl is the common denominator for all g...
TPZAutoPointer< TPZMatrix< STATE > > fDohrPrecond
This abstract class which defines the behavior which derived classes need to implement for implement...
To condense matrix divided in sub structures. Sub Structure.
int64_t Index() const
Returns the index of the element within the element vector of the mesh.
Implements a group of computational elements as a mesh and an element. Computational Mesh...
#define DebugStop()
Returns a message to user put a breakpoint in.
void CornerEqs(unsigned int mincorners, int64_t nelconsider, std::set< int > &eligible, std::set< int > &cornernodes)
Analyzes the graph, finds the corner nodes Number of elements which should be considered for determi...
int ClusterIslands(TPZVec< int > &domain_index, int nsub, int connectdimension)
Eliminates subdomains who are embedded in other subdomains.
Implements a matrix whose nonzero elements are stored in binary tree. Matrix.
#define LOGPZ_DEBUG(A, B)
Define log for debug info.
static int64_t NSubMesh(TPZAutoPointer< TPZCompMesh > compmesh)
Return the number of submeshes.
Contains the TPZSkylineStructMatrix class which implements SkyLine Structural Matrices.
void Append(TPZAutoPointer< ThreadDohrmanAssembly< TVar > > object)
int Dimension() const
Returns the dimension of the simulation.
virtual void ComputeNodElCon()
Compute the number of elements connected to each connect object.
virtual void SetMatrix(TPZAutoPointer< TPZMatrix< TVar > > Refmat)
Sets a matrix to the current object.
virtual void Assemble(TPZMatrix< STATE > &mat, TPZFMatrix< STATE > &rhs, TPZAutoPointer< TPZGuiInterface > guiInterface, unsigned numthreads_assemble, unsigned numthreads_decompose) override
Assemble the global system of equations into the matrix which has already been created.
void parallel_for(int n, body_t &obj)
TPZAdmChunkVector< TPZGeoNode > & NodeVec()
Contains the TPZDohrPrecond class which implements a matrix which computes the preconditioner develop...
TPZCompEl * Reference() const
Return a pointer to the element referenced by the geometric element.
static void * ThreadWork(void *voidptr)
static void DecomposeBig(TPZAutoPointer< TPZDohrSubstructCondense< STATE > > substruct, int numa_node)
int HasDependency() const
Returns whether exist dependecy information.
virtual TPZMatrix< STATE > * Create() override
This will create a DohrMatrix.
Contains the TPZPairStructMatrix class.
const TPZBlock< STATE > & Block() const
Access the block structure of the solution vector.
#define PZ_PTHREAD_MUTEX_DESTROY(mutex, fn)
Contains the TPZDohrSubstructCondense class which condenses matrix divided in sub structures...
void IdentifyExternalConnectIndexes()
Identify the external connects.
double ReturnTimeDouble()
When called, returns the time since the creation of the object in a double.
void SetNumThreads(int numthreads)
TPZAdmChunkVector< TPZConnect > & ConnectVec()
Return a reference to the connect pointers vector.
T * begin() const
Casting operator. Returns The fStore pointer.
int64_t Index() const
Returns element index of the mesh fELementVec list.
virtual TPZGeoElSide Neighbour(int side)=0
Returns a pointer to the neighbour and the neighbourside along side of the current element...
Contains the TPZfTime class which calculates times.
Contains macros and functions to support performance analysis.
static void PrintGMeshVTK(TPZGeoMesh *gmesh, std::ofstream &file, bool matColor=true)
Default constructor for graphical mesh with VTK format.
int Redim(const int64_t newRows, const int64_t newCols) override
Redimension a matrix and ZERO your elements.
void Print(TPZVec< int64_t > &grapho, TPZVec< int64_t > &graphoindex, const char *name=0, std::ostream &out=std::cout)
Prints graph.
pthread_mutex_t fAccessElement
Mutexes (to choose which submesh is next)
virtual void Assemble(TPZMatrix< STATE > &mat, TPZFMatrix< STATE > &rhs, TPZAutoPointer< TPZGuiInterface > guiInterface) override
Assemble the global system of equations into the matrix which has already been created.
virtual TPZConnect & Connect(int i) const
Returns a pointer to the ith node.
void ConnectedCompElementList(TPZStack< TPZCompElSide > &elsidevec, int onlyinterpolated, int removeduplicates)
Returns all connected computational elements to the current element if onlyinterpolated == 1 only e...
virtual int64_t ConnectIndex(int i) const =0
Returns the index of the ith connectivity of the element.
int NBlocks() const
Returns number of blocks on diagonal.
virtual int NConnects() const =0
Returns the number of nodes of the element.
Implements Full Structural Matrices. Structural Matrix.
TPZGeoEl * Element() const
void ComputePermutationInternalFirst(TPZVec< int64_t > &permute) const
Computes the permutation vector which puts the internal connects to the first on the list Respect th...
TPZAutoPointer< ThreadDohrmanAssembly< TVar > > NextObject()
Returns an object and removes it from the list in a thread safe way.
void BuildConnectivity()
Build the connectivity of the grid.
static void AssembleMatrices(TPZSubCompMesh *submesh, TPZAutoPointer< TPZDohrSubstructCondense< STATE > > substruct, TPZAutoPointer< TPZDohrAssembly< STATE > > dohrassembly, pthread_mutex_t *TestThread)
Contains declaration of TPZSubCompMesh class which implements a group of computational elements as a ...
T Pop()
Retrieve an object from the stack.
virtual void CleanUpUnconnectedNodes()
Delete the nodes which have no elements connected to them.
virtual int Dimension() const =0
Returns the dimension of the element.
TPZCompEl * Element() const
Gives a pointer to the reference computational element.
Implements a simple substructuring of a linear system of equations, composed of 4 submatrices...
TPZAutoPointer< TPZMatrix< TVar > > K00()
TPZGeoEl * Reference() const
Return a pointer to the corresponding geometric element if such exists, return 0 otherwise.
const SubsList & SubStructures() const
Contains the TPZMatRedStructMatrix class.
virtual int NSideConnects(int iside) const override=0
Returns the number of dof nodes along side iside.
This class implements a geometric mesh for the pz environment. Geometry.
void push_work_item(unsigned submesh_idx, const TPZAutoPointer< TPZDohrSubstructCondense< TVar > > &substruct)
int Dimension() const
the dimension associated with the element/side
int64_t SideConnectIndex(int icon, int is) const
Returns the index of the c th connect object along side is.
TPZManVector< int > fExternalConnectIndexes
The connect indexes which are external.
virtual void ComputeNodElCon() override
Computes the number of elements connected to each connect object.
Implements computational mesh. Computational Mesh.
pthread_mutex_t fTestThreads
mutex to debug the assembly process
int Size(const int block_diagonal) const
Returns block dimension.
const T & get_value() const
TPZAutoPointer< TPZDohrSubstructCondense< TVar > > fSubstruct
TPZAdmChunkVector< TPZCompEl * > & ElementVec()
Returns a reference to the element pointers vector.
#define PZ_PTHREAD_CREATE(thread, attr, routine, args, fn)
REAL ft1comput
Time for Computing the system of equations for each substructure.
Contains declaration of TPZInterpolatedElement class which implements computational element of the in...
int64_t NumInternalEquations()
Computes the number of internal equations.
void Fill(const T ©, const int64_t from=0, const int64_t numelem=-1)
Will fill the elements of the vector with a copy object.
virtual ~TPZDohrStructMatrix()
Contains TPZVerySparseMatrix class which implements a matrix whose nonzero elements are stored in bin...
ThreadDohrmanAssemblyList_ThreadArgs_t()
void OpenWrite(const std::string &fileName)
Defines the interface for saving and reading data. Persistency.
int64_t NElements() const
Returns the number of elements of the vector.
void Initialize()
Initialize the necessary datastructures.
int SeparateUnconnected(TPZVec< int > &domain_index, int nsub, int connectdimension)
Verifies if the subdomains are connected by sides of connectdimension and separate them if not...
Contains the TPZSloan class.
work_item_t(unsigned submesh_idx, const TPZAutoPointer< TPZDohrSubstructCondense< TTVar > > &substruct)
void SetDirect(const DecomposeType decomp)
void InitializeBlock()
Resequence the block object, remove unconnected connect objects and reset the dimension of the soluti...
#define PZ_PTHREAD_MUTEX_INIT(mutex, attr, fn)
ThreadDohrmanAssemblyList< T > * list
virtual TPZMatrix< STATE > * CreateAssemble(TPZFMatrix< STATE > &rhs, TPZAutoPointer< TPZGuiInterface > guiInterface, unsigned numthreads_assemble, unsigned numthreads_decompose) override
This will create a DohrMatrix and compute its matrices.
std::list< TPZAutoPointer< ThreadDohrmanAssembly< TVar > > > fList
void Initialize()
Initialize the necessary datastructures.
void ReallocForNuma(int node)
Contains the TPZNodesetCompute class which computes the cardinality of a nodegraph.
Defines the interface of a computational element. Computational Element.
Contains the TPZVTKGeoMesh class which implements the graphical mesh to VTK environment to geometric ...
bool IsCondensed() const
Access method to return the indication whether the connect is condensed or not.
Interface to sloan subrotines. Utility.
RunStatsTable dohr_ass("-tpz_dohr_ass", "Raw data table statistics for TPZDohrStructMatrix::Assemble assemble (first)")
Assemble the global system of equations into the matrix which has already been created.
T * end() const
Returns a pointer to the last+1 element.
void Read(TPZStream &str, void *context) override
read objects from the stream
static void WritePointer(const TPZSavable *obj, TPZStream *stream)
TPZAutoPointer< TPZCompMesh > fCompMesh
Autopointer control of the computational mesh.
Contains TPZMetis class which implements the renumbering for elements of a mesh to minimize the band...
Implements computational element based on an interpolation space. Computational Element.
void SubStructure(int nsub)
Partition the mesh in submeshes.
void Reset()
Reset method.
int Resequence(const int start=0)
Resequences blocks positioning.
virtual void Read(bool &val)
Implements structural matrix divided in sub structures. Structural Matrix Sub structure.
ThreadDohrmanAssemblyList()
void Permute(TPZVec< int64_t > &permute)
Permute the sequence number of the connect objects It is a permute gather operation.