diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index c6df298..7aa59d2 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -26,6 +26,7 @@ list( "${dir}/modsparse_crs.f90" "${dir}/modsparse.f90" "${dir}/modsparse_gen.f90" + "${dir}/modsparse_inv.f90" "${dir}/modsparse_ll.f90" "${dir}/modsparse_metisgraph.f90" "${dir}/modsparse_mkl.f90" diff --git a/src/Makefile b/src/Makefile index c7af528..6865b08 100644 --- a/src/Makefile +++ b/src/Makefile @@ -9,7 +9,7 @@ ifeq ($(METISENABLE),1) endif ifeq ($(SPAINVENABLE),1) - OBJSPAINV = sgtrsm.o dgtrsm.o smbfct.o modspainv.o + OBJSPAINV = sgtrsm.o dgtrsm.o modspainv.o endif @@ -17,6 +17,7 @@ all: libsparse.a OBJ = modcommon.o \ modrandom.o \ + modsparse_inv.o smbfct.o\ $(OBJPARDISO) $(OBJMETIS) $(OBJSPAINV) \ modsparse_mkl.o modhash.o modsparse.o modsparse_gen.o modsparse_coo.o \ modsparse_crs64.o \ @@ -36,6 +37,11 @@ libsparse.a: $(OBJ) modsparse_coo.o: modsparse.o modhash.o modsparse_crs.o: modsparse_mkl.o modsparse.o $(OBJSPAINV) $(OBJPARDISO) $(OBJMETIS) modsparse_crs64.o: modsparse.o $(OBJPARDISO) +ifeq ($(SPAINVENABLE),1) +modsparse_inv.o: modspainv.o smbfct.o +else +modsparse_inv.o: smbfct.o +endif modspainv.o: modcommon.o modsparse_mkl.o modsparse.o: $(OBJPARDISO) modsparse_gen.o: modsparse.o diff --git a/src/gsfct.f b/src/gsfct.f new file mode 100644 index 0000000..324e3df --- /dev/null +++ b/src/gsfct.f @@ -0,0 +1,144 @@ +c####################################################################### +c# this is subroutine "GSFCT" as given in : 'Computer Solution # +c# of Sparse Linear Systems' by A. George, J. Liu and # +c# E. Ng, 1994, pp. 180-183; # +c####################################################################### + +C----- SUBROUTINE GSFCT +C*************************************************************** +C*************************************************************** +C****** GSFCT ..... GENERAL SPARSE SYMMETRIC FACT ****** +C*************************************************************** +C*************************************************************** +C +C PURPOSE - THIS SUBROUTINE PERFORMS THE SYMMETRIC +C FACTORIZATION FOR A GENERAL SPARSE SYSTEM, STORED IN +C THE COMPRESSED SUBSCRIPT DATA FORMAT. +C +C INPUT PARAMETERS - +C NEQNS - NUMBER OF EQUATIONS. +C XLNZ - INDEX VECTOR FOR LNZ. XLNZ(I) POINTS TO THE +C START OF NONZEROS IN COLUMN I OF FACTOR L. +C (XNZSUB, NZSUB) - THE COMPRESSED SUBSCRIPT DATA +C STRUCTURE FOR FACTOR L. +C +C UPDATED PARAMETERS - +C LNZ - ON INPUT, CONTAINS NONZEROS OF A, AND ON +C RETURN, THE NONZEROS OF L. +C DIAG - THE DIAGONAL OF L OVERWRITES THAT OF A. +C IFLAG - THE ERROR FLAG. IT IS SET TO 1 IF A ZERO OR +C NEGATIVE SQUARE ROOT OCCURS DURING THE +C FACTORIZATION. +C OPS - A DOUBLE PRECISION COMMON PARAMETER THAT IS +C INCREMENTED BY THE NUMBER OF OPERATIONS +C PERFORMED BY THE SUBROUTINE. +C +C WORKING PARAMETERS - +C LINK - AT STEP J, THE LIST IN +C LINK(J), LINK(LINK(J)), ........... +C CONSISTS OF THOSE COLUMNS THAT WILL MODIFY +C THE COLUMN L(*,J). +C FIRST - TEMPORARY VECTOR TO POINT TO THE FIRST +C NONZERO IN EACH COLUMN THAT WILL BE USED +C NEXT FOR MODIFICATION. +C TEMP - A TEMPORARY VECTOR TO ACCUMULATE MODIFICATIONS. +C +C*************************************************************** +C + SUBROUTINE GSFCT ( NEQNS, XLNZ, LNZ, XNZSUB, NZSUB, DIAG, + 1 LINK, FIRST, TEMP, IFLAG ) +C +C*************************************************************** +C + DOUBLE PRECISION COUNT, OPS + COMMON /SPKOPS/ OPS + REAL DIAG(1), LNZ(1), TEMP(1), DIAGJ, LJK + INTEGER LINK(1), NZSUB(1) + INTEGER FIRST(1), XLNZ(1), XNZSUB(1), + 1 I, IFLAG, II, ISTOP, ISTRT, ISUB, J, + 1 K, KFIRST, NEQNS, NEWK +C +C*************************************************************** +C +C ------------------------------ +C INITIALIZE WORKING VECTORS ... +C ------------------------------ + DO 100 I = 1, NEQNS + LINK(I) = 0 + TEMP(I) = 0.0E0 + 100 CONTINUE +C -------------------------------------------- +C COMPUTE COLUMN L(*,J) FOR J = 1,...., NEQNS. +C -------------------------------------------- + DO 600 J = 1, NEQNS +C ------------------------------------------- +C FOR EACH COLUMN L(*,K) THAT AFFECTS L(*,J). +C ------------------------------------------- + DIAGJ = 0.0E0 + NEWK = LINK(J) + 200 K = NEWK + IF ( K .EQ. 0 ) GO TO 400 + NEWK = LINK(K) +C --------------------------------------- +C OUTER PRODUCT MODIFICATION OF L(*,J) BY +C L(*,K) STARTING AT FIRST(K) OF L(*,K). +C --------------------------------------- + KFIRST = FIRST(K) + LJK = LNZ(KFIRST) + DIAGJ = DIAGJ + LJK*LJK + OPS = OPS + 1.0D0 + ISTRT = KFIRST + 1 + ISTOP = XLNZ(K+1) - 1 + IF ( ISTOP .LT. ISTRT ) GO TO 200 +C ------------------------------------------ +C BEFORE MODIFICATION, UPDATE VECTORS FIRST, +C AND LINK FOR FUTURE MODIFICATION STEPS. +C ------------------------------------------ + FIRST(K) = ISTRT + I = XNZSUB(K) + (KFIRST-XLNZ(K)) + 1 + ISUB = NZSUB(I) + LINK(K) = LINK(ISUB) + LINK(ISUB) = K +C --------------------------------------- +C THE ACTUAL MOD IS SAVED IN VECTOR TEMP. +C --------------------------------------- + DO 300 II = ISTRT, ISTOP + ISUB = NZSUB(I) + TEMP(ISUB) = TEMP(ISUB) + LNZ(II)*LJK + I = I + 1 + 300 CONTINUE + COUNT = ISTOP - ISTRT + 1 + OPS = OPS + COUNT + GO TO 200 +C ---------------------------------------------- +C APPLY THE MODIFICATIONS ACCUMULATED IN TEMP TO +C COLUMN L(*,J). +C ---------------------------------------------- + 400 DIAGJ = DIAG(J) - DIAGJ + IF ( DIAGJ .LE. 0.0E0 ) GO TO 700 + DIAGJ = SQRT(DIAGJ) + DIAG(J) = DIAGJ + ISTRT = XLNZ(J) + ISTOP = XLNZ(J+1) - 1 + IF ( ISTOP .LT. ISTRT ) GO TO 600 + FIRST(J) = ISTRT + I = XNZSUB(J) + ISUB = NZSUB(I) + LINK(J) = LINK(ISUB) + LINK(ISUB) = J + DO 500 II = ISTRT, ISTOP + ISUB = NZSUB(I) + LNZ(II) = ( LNZ(II)-TEMP(ISUB) ) / DIAGJ + TEMP(ISUB) = 0.0E0 + I = I + 1 + 500 CONTINUE + COUNT = ISTOP - ISTRT + 1 + OPS = OPS + COUNT + 600 CONTINUE + RETURN +C ------------------------------------------------------ +C ERROR - ZERO OR NEGATIVE SQUARE ROOT IN FACTORIZATION. +C ------------------------------------------------------ + 700 IFLAG = 1 + RETURN + END diff --git a/src/modspainv.f90 b/src/modspainv.f90 index 56db986..3dc7a4c 100644 --- a/src/modspainv.f90 +++ b/src/modspainv.f90 @@ -1,9 +1,7 @@ !> Module containing subroutines for sparse inverses of symmetric positive definite matrices -!> @todo Still raw and not very efficient - !Based on Karin Meyer 's code (didgeridoo.une.edu.au/womwiki/doku.php?id=fortran:fortran). -!The content of Karin Meyer 's wiki is licensed under CC Attribution-Sahre Alike 4.0 International. +!The content of Karin Meyer 's wiki is licensed under CC Attribution-Share Alike 4.0 International. ! !Support of sparse inverse of SPSD matrices by implementing the S. D. Kachman modifications !(https://www.ars.usda.gov/ARSUserFiles/80420530/MTDFREML/MTDFMan.pdf ; Chapter 6) @@ -12,33 +10,19 @@ module modspainv #if (_DP==0) - use iso_fortran_env,only:output_unit,int32,int64,real32,real64,wp=>real32 + use iso_fortran_env,only:output_unit,int32,real32,real64,wp=>real32 use modsparse_mkl, only: spotrf, spotri, strsm, ssymm, sgemm #else - use iso_fortran_env,only:output_unit,int32,int64,real32,real64,wp=>real64 + use iso_fortran_env,only:output_unit,int32,real32,real64,wp=>real64 use modsparse_mkl, only: dpotrf, dpotri, dtrsm, dsymm, dgemm #endif use modcommon, only: progress - !$ use omp_lib - implicit none + implicit none(type, external) private - public::get_chol,get_ichol,get_spainv + public :: super_nodes, super_gsfct, super_sparsinv - integer(kind=int32),parameter::minsizesupernode=0 !values other than 0 (e.g., 256) may give troubles real(kind=wp),parameter::tol=1.e-8_wp - interface get_chol - module procedure get_chol_crs - end interface - - interface get_ichol - module procedure get_ichol_crs - end interface - - interface get_spainv - module procedure get_spainv_crs - end interface - interface SUBROUTINE dgtrsm(SIDE,UPLO,TRANSA,DIAG,M,N,ALPHA,A,LDA,B,LDB) !.. Scalar Arguments .. @@ -50,260 +34,8 @@ SUBROUTINE dgtrsm(SIDE,UPLO,TRANSA,DIAG,M,N,ALPHA,A,LDA,B,LDB) end subroutine end interface - interface - subroutine smbfct(neqns,xadj,adjncy,perm,invp,& - xlnz,maxlnz,xnzsub,nzsub,maxsub,& - rchlnk,mrglnk,marker,flag& - ) - integer::neqns - integer::maxsub,flag,maxlnz - integer::xadj(*),adjncy(*) - integer::perm(*),invp(*) - integer::xlnz(*),xnzsub(*),nzsub(*),rchlnk(*),mrglnk(*),marker(*) - end subroutine - end interface - contains -!PUBLIC -!Should return the Cholesky factor in the permuted order -subroutine get_ichol_crs(ia,ja,a,xadj,adjncy,perm,minsizenode,un) - integer(kind=int32),intent(inout)::ia(:) - integer(kind=int32),intent(inout)::ja(:) - integer(kind=int32),intent(in)::xadj(:),adjncy(:) - integer(kind=int32),intent(in)::perm(:) !Ap(i,:)=A(perm(i),:) - integer(kind=int32),intent(in),optional::minsizenode - integer(kind=int32),intent(in),optional::un - real(kind=wp),intent(inout)::a(:) - - integer(kind=int32)::unlog,neqns - integer(kind=int32)::mssn - integer(kind=int32),allocatable::xlnz(:),xnzsub(:),nzsub(:) - real(kind=wp),allocatable::xspars(:),diag(:) - !$ real(kind=real64)::t1 - real(kind=real64)::time(6) - - mssn=minsizesupernode - if(present(minsizenode))mssn=minsizenode - - unlog=output_unit - if(present(un))unlog=un - - time=0._real64 - - neqns=size(ia)-1 - - call get_ichol_spainv_crs(neqns,ia,ja,a,xadj,adjncy,perm,.false.,xlnz,xspars,xnzsub,nzsub,diag,mssn,time) - - ! Cholesky factorization - !Convert to ija - !$ t1=omp_get_wtime() - call converttoija_noperm(neqns,xlnz,xspars,xnzsub,nzsub,diag,ia,ja,a,perm) - !$ time(6)=omp_get_wtime()-t1 - - call writetime(unlog,time,'SPARSE CHOL FACT.') - -end subroutine - -subroutine get_chol_crs(ia,ja,a,xadj,adjncy,perm,minsizenode,un) - integer(kind=int32),intent(inout),allocatable::ia(:) - integer(kind=int32),intent(inout),allocatable::ja(:) - integer(kind=int32),intent(in)::xadj(:),adjncy(:) - integer(kind=int32),intent(in)::perm(:) !Ap(i,:)=A(perm(i),:) - integer(kind=int32),intent(in),optional::minsizenode - integer(kind=int32),intent(in),optional::un - real(kind=wp),intent(inout),allocatable::a(:) - - integer(kind=int32)::unlog,neqns - integer(kind=int32)::mssn - integer(kind=int32),allocatable::xlnz(:),xnzsub(:),nzsub(:) - real(kind=wp),allocatable::xspars(:),diag(:) - !$ real(kind=real64)::t1 - real(kind=real64)::time(6) - - mssn=minsizesupernode - if(present(minsizenode))mssn=minsizenode - - unlog=output_unit - if(present(un))unlog=un - - time=0._real64 - - neqns=size(ia)-1 - - call get_ichol_spainv_crs(neqns,ia,ja,a,xadj,adjncy,perm,.false.,xlnz,xspars,xnzsub,nzsub,diag,mssn,time) - - !Convert to ija - !$ t1=omp_get_wtime() - call convertfactortoija(neqns,xlnz,xspars,xnzsub,nzsub,diag,ia,ja,a) - !$ time(6)=omp_get_wtime()-t1 - - call writetime(unlog,time,'CHOL FACT.') - -end subroutine - -subroutine get_spainv_crs(ia,ja,a,xadj,adjncy,perm,minsizenode,un) - integer(kind=int32),intent(in)::ia(:) - integer(kind=int32),intent(in)::ja(:) - integer(kind=int32),intent(in)::xadj(:),adjncy(:) - integer(kind=int32),intent(inout)::perm(:) !Ap(i,:)=A(perm(i),:) - integer(kind=int32),intent(in),optional::minsizenode - integer(kind=int32),intent(in),optional::un - real(kind=wp),intent(inout)::a(:) - - integer(kind=int32)::unlog,neqns - integer(kind=int32)::mssn - integer(kind=int32),allocatable::xlnz(:),xnzsub(:),nzsub(:) - real(kind=wp),allocatable::xspars(:),diag(:) - !$ real(kind=real64)::t1 - real(kind=real64)::time(6) - - mssn=minsizesupernode - if(present(minsizenode))mssn=minsizenode - - unlog=output_unit - if(present(un))unlog=un - - time=0._real64 - - neqns=size(ia)-1 - - call get_ichol_spainv_crs(neqns,ia,ja,a,xadj,adjncy,perm,.true.,xlnz,xspars,xnzsub,nzsub,diag,mssn,time) - - !Convert to ija - !$ t1=omp_get_wtime() - call converttoija(neqns,xlnz,xspars,xnzsub,nzsub,diag,ia,ja,a,perm) - !$ time(6)=omp_get_wtime()-t1 - - call writetime(unlog,time,'INVERSION') - -end subroutine - -subroutine get_ichol_spainv_crs(neqns,ia,ja,a,xadj,adjncy,perm,lspainv,xlnz,xspars,xnzsub,nzsub,diag,mssn,time) - integer(kind=int32),intent(in)::neqns - integer(kind=int32),intent(inout)::mssn - integer(kind=int32),intent(in)::ia(:) - integer(kind=int32),intent(in)::ja(:) - integer(kind=int32),intent(in)::xadj(:),adjncy(:) - integer(kind=int32),intent(in)::perm(:) !Ap(i,:)=A(perm(i),:) - real(kind=wp),intent(inout)::a(:) - real(kind=real64),intent(inout)::time(:) - logical,intent(in)::lspainv - - integer(kind=int32),allocatable,intent(out)::xlnz(:),xnzsub(:),nzsub(:) - real(kind=wp),allocatable,intent(out)::xspars(:),diag(:) - -#if (_VERBOSE >2) - integer(kind=int32)::i -#endif - integer(kind=int32)::nnode - integer(kind=int32)::maxnode - integer(kind=int32)::maxsub,flag,maxlnz - integer(kind=int32)::rank - integer(kind=int32),allocatable::inode(:) -!$ real(kind=real64)::t1 - - !symbolic factorization - !$ t1=omp_get_wtime() - call symbolicfact(neqns,ia(neqns+1)-1,xadj,adjncy,perm,xlnz,maxlnz,xnzsub,nzsub,maxsub,flag) - !$ time(1)=omp_get_wtime()-t1 - !$ t1=omp_get_wtime() -#if (_VERBOSE >0) - !$ write(*,'(1x,a,t30,a,g0)')'CRS Symb. fact.',': Elapsed time = ',time(1) -#endif - - call computexsparsdiag(neqns,ia,ja,a,xlnz,nzsub,xnzsub,maxlnz,xspars,diag,perm) - !$ time(2)=omp_get_wtime()-t1 - !$ t1=omp_get_wtime() -#if (_VERBOSE >0) - !$ write(*,'(1x,a,t30,a,g0)')'CRS Compute xspars',': Elapsed time = ',time(2) -#endif - - !super following Karin Meyer - allocate(inode(neqns+1)) !to allow diagonal matrices (for which the number of super-nodes is equal to the number of equations - call super_nodes(mssn,neqns,xlnz,xnzsub,nzsub,nnode,inode,maxnode) - !$ time(3)=omp_get_wtime()-t1 - !$ t1=omp_get_wtime() -#if (_VERBOSE >0) - !$ write(*,'(1x,a,t30,a,g0)')'CRS Super nodes',': Elapsed time = ',time(3) -#endif - - ! Cholesky factorization - call super_gsfct(neqns,xlnz,xspars,xnzsub,nzsub,diag,nnode,inode,rank) - !$ time(4)=omp_get_wtime()-t1 - !$ t1=omp_get_wtime() -#if (_VERBOSE >0) - !$ write(*,'(1x,a,t30,a,g0)')'CRS Chol. fact.',': Elapsed time = ',time(4) -#endif - - if(lspainv)then - ! Matrix inverse - call super_sparsinv(neqns,xlnz,xspars,xnzsub,nzsub,diag,nnode,inode) - !$ time(5)=omp_get_wtime()-t1 - !$ t1=omp_get_wtime() -#if (_VERBOSE >0) - !$ write(*,'(1x,a,t30,a,g0)')'CRS Inversion',': Elapsed time = ',time(5) -#endif - endif - -#if (_VERBOSE >0) - write(*,'(/2x,a,i0)')'Flag symbolic factorization : ',flag - !write(*,'(2x,a,i0)')'Number of non-zero in the facor: ',maxlnz - write(*,'(2x,a,i0)')'Number of super-nodes : ',nnode - write(*,'(2x,a,i0)')'Min size of super-nodes : ',mssn - write(*,'(2x,a,i0)')'Max size of super-nodes : ',maxnode - write(*,'(2x,a,i0/)')'Rank of the matrix : ',rank -#endif -#if (_VERBOSE >2) - do i=1,nnode - write(*,'(1x,4(a,i8))') 'node',i,' from row', inode(i+1)+1,' to row', inode(i),' size ',inode(i)-inode(i+1) - end do -#endif - -end subroutine - -!PRIVATE -subroutine symbolicfact(neqns,nnzeros,xadj,adjncy,perm,xlnz,maxlnz,xnzsub,nzsub,maxsub,flag) - integer(kind=int32),intent(in)::neqns,nnzeros - integer(kind=int32),intent(in)::xadj(:),adjncy(:),perm(:) - integer(kind=int32),intent(out)::maxlnz,maxsub,flag - integer(kind=int32),allocatable,intent(out)::xlnz(:),xnzsub(:),nzsub(:) - - integer(kind=int32)::i,maxsubinit - integer(kind=int32),allocatable::invp(:),rchlnk(:),mrglnk(:),marker(:) - - allocate(invp(size(perm))) - do i=1,size(perm) - invp(perm(i))=i - enddo - - allocate(xlnz(neqns+1),xnzsub(neqns+1)) - allocate(rchlnk(neqns),mrglnk(neqns),marker(neqns)) - xlnz=0 - xnzsub=0 - rchlnk=0 - mrglnk=0 - marker=0 - !maxsub=ia(neqns+1)-1 - maxsub=nnzeros - - do - flag=0 - maxsubinit=maxsub - if(allocated(nzsub))deallocate(nzsub) - allocate(nzsub(maxsub)) - nzsub=0 - call smbfct(neqns,xadj,adjncy,perm,invp,xlnz,maxlnz,xnzsub,nzsub,maxsub,& - rchlnk,mrglnk,marker,flag& - ) - if(maxsub.ne.maxsubinit)cycle - if(flag.eq.0)exit - maxsub=maxsub*2 - enddo - deallocate(rchlnk,mrglnk,marker) - -end subroutine - subroutine super_nodes(mssn, neqns, xlnz, xnzsub, ixsub, nnode, inode,maxnode) integer(kind=int32),intent(inout)::mssn integer(kind=int32),intent(in)::neqns @@ -316,7 +48,7 @@ subroutine super_nodes(mssn, neqns, xlnz, xnzsub, ixsub, nnode, inode,maxnode) integer(kind=int32)::minnode real(kind=wp)::xx - ! establish boundaries between diaggonal blocks 100% full + ! find boundaries between diagonal blocks 100% full ilast = neqns nnode = 0 inode = 0 @@ -331,7 +63,7 @@ subroutine super_nodes(mssn, neqns, xlnz, xnzsub, ixsub, nnode, inode,maxnode) if( kk > ilast ) exit n = n + 1 end do - xx = dble( n ) / dble( ilast - i ) + xx = real( n, kind=wp ) / real( ilast - i, kind=wp ) if( xx < 1._wp .and. (ilast-i).ge.mssn ) then nnode = nnode +1 minnode=min(minnode,ilast-i) @@ -797,204 +529,6 @@ subroutine super_sparsinv(neqns,xlnz,xspars,xnzsub,ixsub,diag,nnode,inode) end subroutine -pure subroutine computexsparsdiag(neqns,ia,ja,a,xlnz,nzsub,xnzsub,maxlnz,xspars,diag,perm) - integer(kind=int32),intent(in)::neqns,maxlnz - integer(kind=int32),intent(in)::ia(:),ja(:) - integer(kind=int32),intent(in)::perm(:) - integer(kind=int32),intent(in)::xlnz(:),nzsub(:),xnzsub(:) - real(kind=wp),intent(in)::a(:) - real(kind=wp),intent(out),allocatable::xspars(:),diag(:) - - integer(kind=int32)::irow,iirow,icol,i,j,k - - allocate(xspars(maxlnz),diag(neqns)) - xspars=0._wp - diag=0._wp - - do i=1,neqns - irow=perm(i) - diag(i)=a(ia(irow)) - do k=xlnz(i),xlnz(i+1)-1 - iirow=irow - icol=perm(nzsub(xnzsub(i)+k-xlnz(i))) - if(iirow.gt.icol)then - iirow=icol - icol=irow - endif - do j=ia(iirow)+1,ia(iirow+1)-1 - if(ja(j).eq.icol)then - xspars(k)=a(j) - exit - endif - enddo - enddo - enddo - -end subroutine - -pure subroutine converttoija(neqns,xlnz,xspars,xnzsub,ixsub,diag,ia,ja,a,perm) - integer(kind=int32),intent(in)::neqns - integer(kind=int32),intent(in)::ixsub(:),xlnz(:),xnzsub(:) - integer(kind=int32),intent(in)::ia(:),ja(:),perm(:) - real(kind=wp),intent(in)::xspars(:),diag(:) - real(kind=wp),intent(inout):: a(:) - - integer(kind=int32)::irow,ksub,i,icol - integer(kind=int32)::pirow,ppirow,picol,ip - - do irow = 1, neqns - pirow=perm(irow) - a(ia(pirow))=diag(irow) - ksub = xnzsub(irow) - do i = xlnz(irow), xlnz(irow+1)-1 - ppirow=pirow - icol = ixsub(ksub) - picol=perm(icol) - ksub = ksub + 1 - if(ppirow.gt.picol)then - ppirow=picol - picol=pirow - endif - intloop: do ip=ia(ppirow)+1,ia(ppirow+1)-1 - if(ja(ip).eq.picol)then - a(ip)=xspars(i) - exit intloop - endif - enddo intloop - end do - end do - -end subroutine - -pure subroutine converttoija_noperm(neqns,xlnz,xspars,xnzsub,ixsub,diag,ia,ja,a,perm) - integer(kind=int32),intent(in)::neqns - integer(kind=int32),intent(in)::ixsub(:),xlnz(:),xnzsub(:) - integer(kind=int32),intent(in)::perm(:) - integer(kind=int32),intent(inout)::ia(:),ja(:) - real(kind=wp),intent(in)::xspars(:),diag(:) - real(kind=wp),intent(out):: a(:) - - integer(kind=int32)::irow,ksub,i,icol - integer(kind=int32)::pirow,ppirow,picol,ip - integer(kind=int32)::nel - integer(kind=int32),allocatable::iperm(:) - integer(kind=int32),allocatable::iia(:),jja(:) - - - !inefficient but it works - - !1. Permute ia and ja - allocate(iperm,source=perm) - do i=1,neqns - iperm(perm(i))=i - enddo - nel=ia(neqns+1)-1 - allocate(iia(nel),jja(nel)) - - do irow=1,neqns - pirow=iperm(irow) - do i=ia(irow),ia(irow+1)-1 - if(pirow.lt.iperm(ja(i)))then - iia(i)=pirow - jja(i)=iperm(ja(i)) - else - iia(i)=iperm(ja(i)) - jja(i)=pirow - endif - enddo - enddo - - ia=0 - ia(1)=1 - do i=1,nel - ia(iia(i)+1)=ia(iia(i)+1)+1 - enddo - - ja=0 - do i=1,neqns - ia(i+1)=ia(i+1)+ia(i) - ja(ia(i))=i - enddo - - iperm=1 - do i=1,nel - if(iia(i).ne.jja(i))then - ja(ia(iia(i))+iperm(iia(i)))=jja(i) - iperm(iia(i))=iperm(iia(i))+1 - endif - enddo - - !2. Replace a - a=0._wp - do irow = 1, neqns - pirow=irow - a(ia(pirow))=diag(irow) - ksub = xnzsub(irow) - do i = xlnz(irow), xlnz(irow+1)-1 - ppirow=pirow - icol = ixsub(ksub) - picol=icol - ksub = ksub + 1 - if(ppirow.gt.picol)then - ppirow=picol - picol=pirow - endif - intloop: do ip=ia(ppirow)+1,ia(ppirow+1)-1 - if(ja(ip).eq.picol)then - a(ip)=xspars(i) - exit intloop - endif - enddo intloop - end do - end do - -end subroutine - -pure subroutine convertfactortoija(neqns,xlnz,xspars,xnzsub,ixsub,diag,ia,ja,a) - integer(kind=int32),intent(in)::neqns - integer(kind=int32),intent(in)::ixsub(:),xlnz(:),xnzsub(:) - integer(kind=int32),intent(inout)::ia(:) - integer(kind=int32),intent(out),allocatable::ja(:) - real(kind=wp),intent(in)::xspars(:),diag(:) - real(kind=wp),intent(out),allocatable::a(:) - - integer(kind=int32)::j,irow,ksub,icol - - allocate(ja(size(xspars)+neqns), source=0) - allocate(a(size(xspars)+neqns), source=0._wp) - - do irow=1,neqns - ia(irow)=xlnz(irow)+irow-1 - ja(ia(irow))=irow - a(ia(irow))=diag(irow) - ksub=xnzsub(irow) - do j=xlnz(irow),xlnz(irow+1)-1 - icol=ixsub(ksub) - ksub=ksub+1 - ja(j+irow)=icol - a(j+irow)=xspars(j) - enddo - enddo - ia(neqns+1)=xlnz(neqns+1)+neqns - -end subroutine - -subroutine writetime(unlog,time,a) - integer(kind=int32)::unlog - real(kind=real64),intent(in)::time(:) - character(len=*),intent(in)::a - - write(unlog,'(/a)')' CRS MATRIX '//trim(a) - !$ write(unlog,'(2x,a,t31,a,t33,f0.5,a)')'Symbolic factorization',':',time(1),' s' - !$ write(unlog,'(2x,a,t31,a,t33,f0.5,a)')'Setup of tmp arrays',':',time(2),' s' - !$ write(unlog,'(2x,a,t31,a,t33,f0.5,a)')'Node determination',':',time(3),' s' - !$ write(unlog,'(2x,a,t31,a,t33,f0.5,a)')'Cholesky factorization',':',time(4),' s' - !$ write(unlog,'(2x,a,t31,a,t33,f0.5,a)')'Matrix inversion',':',time(5),' s' - !$ write(unlog,'(2x,a,t31,a,t33,f0.5,a)')'Conversion to CRS',':',time(6),' s' - !$ write(unlog,'(2x,a,t31,a,t33,f0.5,a)')'Total time',':',sum(time),' s' - -end subroutine - pure subroutine split_inode(inode, nnode, jnode) integer(kind=int32), intent(inout) :: inode(:) integer(kind=int32), intent(inout) :: nnode diff --git a/src/modsparse.f90 b/src/modsparse.f90 index 1592a92..e26279c 100644 --- a/src/modsparse.f90 +++ b/src/modsparse.f90 @@ -447,10 +447,8 @@ module subroutine submatrix_index_coo(sparse,subsparse,indvector,sizeblock,unlog private !> @brief Adds the value val to mat(row,col); e.g., call mat\%add(row,col,val) procedure,public::add=>add_crs -#if (_SPAINV==1) !> @brief Computes and replaces the sparse matrix by the (complete) Cholesky factor procedure,public::chol=>getchol_crs -#endif !> @brief Deallocates the sparse matrix and sets to default values procedure,public::destroy=>destroy_crs procedure::diag_vect_crs @@ -460,10 +458,8 @@ module subroutine submatrix_index_coo(sparse,subsparse,indvector,sizeblock,unlog generic,public::diag=>diag_vect_crs,diag_mat_crs !> @brief Returns the value of mat(row,col); e.g., ...=mat\%get(row,col) procedure,public::get=>get_crs -#if (_SPAINV==1) !> @brief Computes and replaces the sparse matrix by the (complete) LDLt (L is stored in the upper triangle and D in the diagonal) procedure,public::getldlt=>getldlt_crs -#endif !> @brief Gets memory used procedure,public::getmem=>getmem_crs !> @brief Initializes the vectors ia,ja,and a from external vectors @@ -476,10 +472,8 @@ module subroutine submatrix_index_coo(sparse,subsparse,indvector,sizeblock,unlog procedure,public::get_nzval=>get_nzval_crs !> @brief Get diagonal elements of an approximate inverse using Harville (1999) procedure,public::harville=>harville_crs -#if (_SPAINV==1) !> @brief Computes and replaces the sparse matrix by an incomplete Cholesky factor procedure,public::ichol=>getichol_crs -#endif !> @brief Iniates crssparse procedure,public::init=>constructor_sub_crs !> @brief Solver with a triangular factor (e.g., a Cholesky factor or an incomplete Cholesky factor) @@ -522,10 +516,8 @@ module subroutine submatrix_index_coo(sparse,subsparse,indvector,sizeblock,unlog generic,public::solve=>solve_crs_vector,solve_crs_array !> @brief Sorts the elements in a ascending order within a row procedure,public::sort=>sort_crs -#if (_SPAINV==1) !> @brief Computes and replaces by the sparse inverse procedure,public::spainv=>getspainv_crs -#endif !> @brief Gets a submatrix from a sparse matrix procedure,public::submatrix=>submatrix_crs !> @brief Gets a dense submatrix from a sparse matrix @@ -627,7 +619,6 @@ module function totalnumberofelements_crs(sparse) result(nel) class(crssparse),intent(in)::sparse integer(kind=int64)::nel end function -#if (_SPAINV==1) !**GET (COMPLETE) CHOLESKY FACTOR module subroutine getchol_crs(sparse,minsizenode) class(crssparse),intent(inout)::sparse @@ -638,7 +629,6 @@ module subroutine getldlt_crs(sparse,minsizenode) class(crssparse),intent(inout)::sparse integer(kind=int32),intent(in),optional::minsizenode end subroutine -#endif #if (_METIS==1) !**GET ORDER module function getordering_crs(sparse& @@ -650,7 +640,6 @@ module function getordering_crs(sparse& integer(kind=int32),allocatable::perm(:) end function #endif -#if (_SPAINV==1) !**GET INCOMPLETE CHOLESKY FACTOR module subroutine getichol_crs(sparse,minsizenode) class(crssparse),intent(inout)::sparse @@ -661,7 +650,6 @@ module subroutine getspainv_crs(sparse,minsizenode) class(crssparse),intent(inout)::sparse integer(kind=int32),intent(in),optional::minsizenode end subroutine -#endif #if (_PARDISO==1) pure module function isdecomposed_crs(sparse) result(ll) class(crssparse),intent(in)::sparse diff --git a/src/modsparse_crs.f90 b/src/modsparse_crs.f90 index d011f7c..8846b40 100644 --- a/src/modsparse_crs.f90 +++ b/src/modsparse_crs.f90 @@ -5,9 +5,7 @@ , mkl_scsrmm, mkl_dcsrmm & , mkl_scsrtrsv, mkl_dcsrtrsv & , mkl_scsrsymv, mkl_dcsrsymv -#if (_SPAINV==1) - use modspainv, only: get_chol, get_ichol, get_spainv -#endif + use modsparse_inv, only: get_chol, get_ichol, get_spainv #if (_PARDISO==1) use modvariablepardiso, only: checkpardiso, pardiso_variable #endif @@ -407,7 +405,6 @@ module function totalnumberofelements_crs(sparse) result(nel) end function -#if (_SPAINV==1) !**GET (COMPLETE) CHOLESKY FACTOR module subroutine getchol_crs(sparse,minsizenode) class(crssparse),intent(inout)::sparse @@ -517,7 +514,6 @@ module subroutine getldlt_crs(sparse,minsizenode) #endif end subroutine -#endif #if (_METIS==1) !**GET ORDER @@ -609,7 +605,6 @@ module function getordering_crs(sparse& end function #endif -#if (_SPAINV==1) !**GET INCOMPLETE CHOLESKY FACTOR module subroutine getichol_crs(sparse,minsizenode) class(crssparse),intent(inout)::sparse @@ -722,7 +717,6 @@ module subroutine getspainv_crs(sparse,minsizenode) #endif end subroutine -#endif #if (_PARDISO==1) !**RESET PARDISO MEMORY diff --git a/src/modsparse_inv.f90 b/src/modsparse_inv.f90 new file mode 100644 index 0000000..0cd1681 --- /dev/null +++ b/src/modsparse_inv.f90 @@ -0,0 +1,489 @@ +!> Module containing subroutines for sparse inverses of symmetric positive definite matrices + +!> @todo Still raw and not very efficient + +module modsparse_inv +#if (_DP==0) + use iso_fortran_env,only:output_unit,int32,int64,real32,real64,wp=>real32 +#else + use iso_fortran_env,only:output_unit,int32,int64,real32,real64,wp=>real64 +#endif +#if (_SPAINV==1) + use modspainv, only: super_nodes, super_gsfct, super_sparsinv +#endif + !$ use omp_lib + implicit none + private + public::get_chol,get_ichol,get_spainv + + integer(kind=int32),parameter::minsizesupernode=0 !values other than 0 (e.g., 256) may give troubles + + interface get_chol + module procedure get_chol_crs + end interface + + interface get_ichol + module procedure get_ichol_crs + end interface + + interface get_spainv + module procedure get_spainv_crs + end interface + + interface + subroutine smbfct(neqns,xadj,adjncy,perm,invp,& + xlnz,maxlnz,xnzsub,nzsub,maxsub,& + rchlnk,mrglnk,marker,flag& + ) + integer::neqns + integer::maxsub,flag,maxlnz + integer::xadj(*),adjncy(*) + integer::perm(*),invp(*) + integer::xlnz(*),xnzsub(*),nzsub(*),rchlnk(*),mrglnk(*),marker(*) + end subroutine + end interface + +contains + +!PUBLIC +!Should return the Cholesky factor in the permuted order +subroutine get_ichol_crs(ia,ja,a,xadj,adjncy,perm,minsizenode,un) + integer(kind=int32),intent(inout)::ia(:) + integer(kind=int32),intent(inout)::ja(:) + integer(kind=int32),intent(in)::xadj(:),adjncy(:) + integer(kind=int32),intent(in)::perm(:) !Ap(i,:)=A(perm(i),:) + integer(kind=int32),intent(in),optional::minsizenode + integer(kind=int32),intent(in),optional::un + real(kind=wp),intent(inout)::a(:) + + integer(kind=int32)::unlog,neqns + integer(kind=int32)::mssn + integer(kind=int32),allocatable::xlnz(:),xnzsub(:),nzsub(:) + real(kind=wp),allocatable::xspars(:),diag(:) + !$ real(kind=real64)::t1 + real(kind=real64)::time(6) + + mssn=minsizesupernode + if(present(minsizenode))mssn=minsizenode + + unlog=output_unit + if(present(un))unlog=un + + time=0._real64 + + neqns=size(ia)-1 + + call get_ichol_spainv_crs(neqns,ia,ja,a,xadj,adjncy,perm,.false.,xlnz,xspars,xnzsub,nzsub,diag,mssn,time) + + ! Cholesky factorization + !Convert to ija + !$ t1=omp_get_wtime() + call converttoija_noperm(neqns,xlnz,xspars,xnzsub,nzsub,diag,ia,ja,a,perm) + !$ time(6)=omp_get_wtime()-t1 + + call writetime(unlog,time,'SPARSE CHOL FACT.') + +end subroutine + +subroutine get_chol_crs(ia,ja,a,xadj,adjncy,perm,minsizenode,un) + integer(kind=int32),intent(inout),allocatable::ia(:) + integer(kind=int32),intent(inout),allocatable::ja(:) + integer(kind=int32),intent(in)::xadj(:),adjncy(:) + integer(kind=int32),intent(in)::perm(:) !Ap(i,:)=A(perm(i),:) + integer(kind=int32),intent(in),optional::minsizenode + integer(kind=int32),intent(in),optional::un + real(kind=wp),intent(inout),allocatable::a(:) + + integer(kind=int32)::unlog,neqns + integer(kind=int32)::mssn + integer(kind=int32),allocatable::xlnz(:),xnzsub(:),nzsub(:) + real(kind=wp),allocatable::xspars(:),diag(:) + !$ real(kind=real64)::t1 + real(kind=real64)::time(6) + + mssn=minsizesupernode + if(present(minsizenode))mssn=minsizenode + + unlog=output_unit + if(present(un))unlog=un + + time=0._real64 + + neqns=size(ia)-1 + + call get_ichol_spainv_crs(neqns,ia,ja,a,xadj,adjncy,perm,.false.,xlnz,xspars,xnzsub,nzsub,diag,mssn,time) + + !Convert to ija + !$ t1=omp_get_wtime() + call convertfactortoija(neqns,xlnz,xspars,xnzsub,nzsub,diag,ia,ja,a) + !$ time(6)=omp_get_wtime()-t1 + + call writetime(unlog,time,'CHOL FACT.') + +end subroutine + +subroutine get_spainv_crs(ia,ja,a,xadj,adjncy,perm,minsizenode,un) + integer(kind=int32),intent(in)::ia(:) + integer(kind=int32),intent(in)::ja(:) + integer(kind=int32),intent(in)::xadj(:),adjncy(:) + integer(kind=int32),intent(inout)::perm(:) !Ap(i,:)=A(perm(i),:) + integer(kind=int32),intent(in),optional::minsizenode + integer(kind=int32),intent(in),optional::un + real(kind=wp),intent(inout)::a(:) + + integer(kind=int32)::unlog,neqns + integer(kind=int32)::mssn + integer(kind=int32),allocatable::xlnz(:),xnzsub(:),nzsub(:) + real(kind=wp),allocatable::xspars(:),diag(:) + !$ real(kind=real64)::t1 + real(kind=real64)::time(6) + + mssn=minsizesupernode + if(present(minsizenode))mssn=minsizenode + + unlog=output_unit + if(present(un))unlog=un + + time=0._real64 + + neqns=size(ia)-1 + + call get_ichol_spainv_crs(neqns,ia,ja,a,xadj,adjncy,perm,.true.,xlnz,xspars,xnzsub,nzsub,diag,mssn,time) + + !Convert to ija + !$ t1=omp_get_wtime() + call converttoija(neqns,xlnz,xspars,xnzsub,nzsub,diag,ia,ja,a,perm) + !$ time(6)=omp_get_wtime()-t1 + + call writetime(unlog,time,'INVERSION') + +end subroutine + +subroutine get_ichol_spainv_crs(neqns,ia,ja,a,xadj,adjncy,perm,lspainv,xlnz,xspars,xnzsub,nzsub,diag,mssn,time) + integer(kind=int32),intent(in)::neqns + integer(kind=int32),intent(inout)::mssn + integer(kind=int32),intent(in)::ia(:) + integer(kind=int32),intent(in)::ja(:) + integer(kind=int32),intent(in)::xadj(:),adjncy(:) + integer(kind=int32),intent(in)::perm(:) !Ap(i,:)=A(perm(i),:) + real(kind=wp),intent(inout)::a(:) + real(kind=real64),intent(inout)::time(:) + logical,intent(in)::lspainv + + integer(kind=int32),allocatable,intent(out)::xlnz(:),xnzsub(:),nzsub(:) + real(kind=wp),allocatable,intent(out)::xspars(:),diag(:) + +#if (_VERBOSE >2) + integer(kind=int32)::i +#endif + integer(kind=int32)::nnode + integer(kind=int32)::maxnode + integer(kind=int32)::maxsub,flag,maxlnz + integer(kind=int32)::rank + integer(kind=int32),allocatable::inode(:) +!$ real(kind=real64)::t1 + + !symbolic factorization + !$ t1=omp_get_wtime() + call symbolicfact(neqns,ia(neqns+1)-1,xadj,adjncy,perm,xlnz,maxlnz,xnzsub,nzsub,maxsub,flag) + !$ time(1)=omp_get_wtime()-t1 + !$ t1=omp_get_wtime() +#if (_VERBOSE >0) + !$ write(*,'(1x,a,t30,a,g0)')'CRS Symb. fact.',': Elapsed time = ',time(1) +#endif + + call computexsparsdiag(neqns,ia,ja,a,xlnz,nzsub,xnzsub,maxlnz,xspars,diag,perm) + !$ time(2)=omp_get_wtime()-t1 + !$ t1=omp_get_wtime() +#if (_VERBOSE >0) + !$ write(*,'(1x,a,t30,a,g0)')'CRS Compute xspars',': Elapsed time = ',time(2) +#endif + +#if (_SPAINV==1) + allocate(inode(neqns+1)) !to allow diagonal matrices (for which the number of super-nodes is equal to the number of equations + call super_nodes(mssn,neqns,xlnz,xnzsub,nzsub,nnode,inode,maxnode) + !$ time(3)=omp_get_wtime()-t1 + !$ t1=omp_get_wtime() +#if (_VERBOSE >0) + !$ write(*,'(1x,a,t30,a,g0)')'CRS Super nodes',': Elapsed time = ',time(3) +#endif + + ! Cholesky factorization + call super_gsfct(neqns,xlnz,xspars,xnzsub,nzsub,diag,nnode,inode,rank) + !$ time(4)=omp_get_wtime()-t1 + !$ t1=omp_get_wtime() +#if (_VERBOSE >0) + !$ write(*,'(1x,a,t30,a,g0)')'CRS Chol. fact.',': Elapsed time = ',time(4) +#endif + + if(lspainv)then + ! Matrix inverse + call super_sparsinv(neqns,xlnz,xspars,xnzsub,nzsub,diag,nnode,inode) + !$ time(5)=omp_get_wtime()-t1 + !$ t1=omp_get_wtime() +#if (_VERBOSE >0) + !$ write(*,'(1x,a,t30,a,g0)')'CRS Inversion',': Elapsed time = ',time(5) +#endif + endif + +#if (_VERBOSE >0) + write(*,'(/2x,a,i0)')'Flag symbolic factorization : ',flag + !write(*,'(2x,a,i0)')'Number of non-zero in the facor: ',maxlnz + write(*,'(2x,a,i0)')'Number of super-nodes : ',nnode + write(*,'(2x,a,i0)')'Min size of super-nodes : ',mssn + write(*,'(2x,a,i0)')'Max size of super-nodes : ',maxnode + write(*,'(2x,a,i0/)')'Rank of the matrix : ',rank +#endif +#if (_VERBOSE >2) + do i=1,nnode + write(*,'(1x,4(a,i8))') 'node',i,' from row', inode(i+1)+1,' to row', inode(i),' size ',inode(i)-inode(i+1) + end do +#endif + +#else + error stop 'SPAINV: not enabled' +#endif + +end subroutine + +!PRIVATE +subroutine symbolicfact(neqns,nnzeros,xadj,adjncy,perm,xlnz,maxlnz,xnzsub,nzsub,maxsub,flag) + integer(kind=int32),intent(in)::neqns,nnzeros + integer(kind=int32),intent(in)::xadj(:),adjncy(:),perm(:) + integer(kind=int32),intent(out)::maxlnz,maxsub,flag + integer(kind=int32),allocatable,intent(out)::xlnz(:),xnzsub(:),nzsub(:) + + integer(kind=int32)::i,maxsubinit + integer(kind=int32),allocatable::invp(:),rchlnk(:),mrglnk(:),marker(:) + + allocate(invp(size(perm))) + do i=1,size(perm) + invp(perm(i))=i + enddo + + allocate(xlnz(neqns+1),xnzsub(neqns+1)) + allocate(rchlnk(neqns),mrglnk(neqns),marker(neqns)) + xlnz=0 + xnzsub=0 + rchlnk=0 + mrglnk=0 + marker=0 + !maxsub=ia(neqns+1)-1 + maxsub=nnzeros + + do + flag=0 + maxsubinit=maxsub + if(allocated(nzsub))deallocate(nzsub) + allocate(nzsub(maxsub)) + nzsub=0 + call smbfct(neqns,xadj,adjncy,perm,invp,xlnz,maxlnz,xnzsub,nzsub,maxsub,& + rchlnk,mrglnk,marker,flag& + ) + if(maxsub.ne.maxsubinit)cycle + if(flag.eq.0)exit + maxsub=maxsub*2 + enddo + deallocate(rchlnk,mrglnk,marker) + +end subroutine + +pure subroutine computexsparsdiag(neqns,ia,ja,a,xlnz,nzsub,xnzsub,maxlnz,xspars,diag,perm) + integer(kind=int32),intent(in)::neqns,maxlnz + integer(kind=int32),intent(in)::ia(:),ja(:) + integer(kind=int32),intent(in)::perm(:) + integer(kind=int32),intent(in)::xlnz(:),nzsub(:),xnzsub(:) + real(kind=wp),intent(in)::a(:) + real(kind=wp),intent(out),allocatable::xspars(:),diag(:) + + integer(kind=int32)::irow,iirow,icol,i,j,k + + allocate(xspars(maxlnz),diag(neqns)) + xspars=0._wp + diag=0._wp + + do i=1,neqns + irow=perm(i) + diag(i)=a(ia(irow)) + do k=xlnz(i),xlnz(i+1)-1 + iirow=irow + icol=perm(nzsub(xnzsub(i)+k-xlnz(i))) + if(iirow.gt.icol)then + iirow=icol + icol=irow + endif + do j=ia(iirow)+1,ia(iirow+1)-1 + if(ja(j).eq.icol)then + xspars(k)=a(j) + exit + endif + enddo + enddo + enddo + +end subroutine + +pure subroutine converttoija(neqns,xlnz,xspars,xnzsub,ixsub,diag,ia,ja,a,perm) + integer(kind=int32),intent(in)::neqns + integer(kind=int32),intent(in)::ixsub(:),xlnz(:),xnzsub(:) + integer(kind=int32),intent(in)::ia(:),ja(:),perm(:) + real(kind=wp),intent(in)::xspars(:),diag(:) + real(kind=wp),intent(inout):: a(:) + + integer(kind=int32)::irow,ksub,i,icol + integer(kind=int32)::pirow,ppirow,picol,ip + + do irow = 1, neqns + pirow=perm(irow) + a(ia(pirow))=diag(irow) + ksub = xnzsub(irow) + do i = xlnz(irow), xlnz(irow+1)-1 + ppirow=pirow + icol = ixsub(ksub) + picol=perm(icol) + ksub = ksub + 1 + if(ppirow.gt.picol)then + ppirow=picol + picol=pirow + endif + intloop: do ip=ia(ppirow)+1,ia(ppirow+1)-1 + if(ja(ip).eq.picol)then + a(ip)=xspars(i) + exit intloop + endif + enddo intloop + end do + end do + +end subroutine + +pure subroutine converttoija_noperm(neqns,xlnz,xspars,xnzsub,ixsub,diag,ia,ja,a,perm) + integer(kind=int32),intent(in)::neqns + integer(kind=int32),intent(in)::ixsub(:),xlnz(:),xnzsub(:) + integer(kind=int32),intent(in)::perm(:) + integer(kind=int32),intent(inout)::ia(:),ja(:) + real(kind=wp),intent(in)::xspars(:),diag(:) + real(kind=wp),intent(out):: a(:) + + integer(kind=int32)::irow,ksub,i,icol + integer(kind=int32)::pirow,ppirow,picol,ip + integer(kind=int32)::nel + integer(kind=int32),allocatable::iperm(:) + integer(kind=int32),allocatable::iia(:),jja(:) + + + !inefficient but it works + + !1. Permute ia and ja + allocate(iperm,source=perm) + do i=1,neqns + iperm(perm(i))=i + enddo + nel=ia(neqns+1)-1 + allocate(iia(nel),jja(nel)) + + do irow=1,neqns + pirow=iperm(irow) + do i=ia(irow),ia(irow+1)-1 + if(pirow.lt.iperm(ja(i)))then + iia(i)=pirow + jja(i)=iperm(ja(i)) + else + iia(i)=iperm(ja(i)) + jja(i)=pirow + endif + enddo + enddo + + ia=0 + ia(1)=1 + do i=1,nel + ia(iia(i)+1)=ia(iia(i)+1)+1 + enddo + + ja=0 + do i=1,neqns + ia(i+1)=ia(i+1)+ia(i) + ja(ia(i))=i + enddo + + iperm=1 + do i=1,nel + if(iia(i).ne.jja(i))then + ja(ia(iia(i))+iperm(iia(i)))=jja(i) + iperm(iia(i))=iperm(iia(i))+1 + endif + enddo + + !2. Replace a + a=0._wp + do irow = 1, neqns + pirow=irow + a(ia(pirow))=diag(irow) + ksub = xnzsub(irow) + do i = xlnz(irow), xlnz(irow+1)-1 + ppirow=pirow + icol = ixsub(ksub) + picol=icol + ksub = ksub + 1 + if(ppirow.gt.picol)then + ppirow=picol + picol=pirow + endif + intloop: do ip=ia(ppirow)+1,ia(ppirow+1)-1 + if(ja(ip).eq.picol)then + a(ip)=xspars(i) + exit intloop + endif + enddo intloop + end do + end do + +end subroutine + +pure subroutine convertfactortoija(neqns,xlnz,xspars,xnzsub,ixsub,diag,ia,ja,a) + integer(kind=int32),intent(in)::neqns + integer(kind=int32),intent(in)::ixsub(:),xlnz(:),xnzsub(:) + integer(kind=int32),intent(inout)::ia(:) + integer(kind=int32),intent(out),allocatable::ja(:) + real(kind=wp),intent(in)::xspars(:),diag(:) + real(kind=wp),intent(out),allocatable::a(:) + + integer(kind=int32)::j,irow,ksub,icol + + allocate(ja(size(xspars)+neqns), source=0) + allocate(a(size(xspars)+neqns), source=0._wp) + + do irow=1,neqns + ia(irow)=xlnz(irow)+irow-1 + ja(ia(irow))=irow + a(ia(irow))=diag(irow) + ksub=xnzsub(irow) + do j=xlnz(irow),xlnz(irow+1)-1 + icol=ixsub(ksub) + ksub=ksub+1 + ja(j+irow)=icol + a(j+irow)=xspars(j) + enddo + enddo + ia(neqns+1)=xlnz(neqns+1)+neqns + +end subroutine + +subroutine writetime(unlog,time,a) + integer(kind=int32)::unlog + real(kind=real64),intent(in)::time(:) + character(len=*),intent(in)::a + + write(unlog,'(/a)')' CRS MATRIX '//trim(a) + !$ write(unlog,'(2x,a,t31,a,t33,f0.5,a)')'Symbolic factorization',':',time(1),' s' + !$ write(unlog,'(2x,a,t31,a,t33,f0.5,a)')'Setup of tmp arrays',':',time(2),' s' + !$ write(unlog,'(2x,a,t31,a,t33,f0.5,a)')'Node determination',':',time(3),' s' + !$ write(unlog,'(2x,a,t31,a,t33,f0.5,a)')'Cholesky factorization',':',time(4),' s' + !$ write(unlog,'(2x,a,t31,a,t33,f0.5,a)')'Matrix inversion',':',time(5),' s' + !$ write(unlog,'(2x,a,t31,a,t33,f0.5,a)')'Conversion to CRS',':',time(6),' s' + !$ write(unlog,'(2x,a,t31,a,t33,f0.5,a)')'Total time',':',sum(time),' s' + +end subroutine + +end module