diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index c6df298..7aa59d2 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -26,6 +26,7 @@ list(
   "${dir}/modsparse_crs.f90"
   "${dir}/modsparse.f90"
   "${dir}/modsparse_gen.f90"
+  "${dir}/modsparse_inv.f90"
   "${dir}/modsparse_ll.f90"
   "${dir}/modsparse_metisgraph.f90"
   "${dir}/modsparse_mkl.f90"
diff --git a/src/Makefile b/src/Makefile
index c7af528..6865b08 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -9,7 +9,7 @@ ifeq ($(METISENABLE),1)
 endif
 
 ifeq ($(SPAINVENABLE),1)
- OBJSPAINV = sgtrsm.o dgtrsm.o smbfct.o modspainv.o
+ OBJSPAINV = sgtrsm.o dgtrsm.o modspainv.o
 endif
 
 
@@ -17,6 +17,7 @@ all: libsparse.a
 
 OBJ = modcommon.o \
       modrandom.o \
+      modsparse_inv.o  smbfct.o\
       $(OBJPARDISO) $(OBJMETIS) $(OBJSPAINV) \
       modsparse_mkl.o modhash.o modsparse.o modsparse_gen.o modsparse_coo.o \
       modsparse_crs64.o \
@@ -36,6 +37,11 @@ libsparse.a: $(OBJ)
 modsparse_coo.o: modsparse.o modhash.o
 modsparse_crs.o: modsparse_mkl.o modsparse.o $(OBJSPAINV) $(OBJPARDISO) $(OBJMETIS)
 modsparse_crs64.o: modsparse.o $(OBJPARDISO)
+ifeq ($(SPAINVENABLE),1)
+modsparse_inv.o: modspainv.o smbfct.o
+else
+modsparse_inv.o: smbfct.o
+endif
 modspainv.o: modcommon.o modsparse_mkl.o
 modsparse.o: $(OBJPARDISO)
 modsparse_gen.o: modsparse.o
diff --git a/src/gsfct.f b/src/gsfct.f
new file mode 100644
index 0000000..324e3df
--- /dev/null
+++ b/src/gsfct.f
@@ -0,0 +1,144 @@
+c#######################################################################
+c#      this is subroutine "GSFCT" as given in : 'Computer Solution    #
+c#      of Sparse Linear Systems' by A. George, J. Liu and             #
+c#      E. Ng, 1994, pp. 180-183;                                      #
+c#######################################################################
+
+C----- SUBROUTINE GSFCT
+C***************************************************************
+C***************************************************************
+C******     GSFCT ..... GENERAL SPARSE SYMMETRIC FACT     ******
+C***************************************************************
+C***************************************************************
+C
+C     PURPOSE - THIS SUBROUTINE PERFORMS THE SYMMETRIC
+C        FACTORIZATION FOR A GENERAL SPARSE SYSTEM, STORED IN
+C        THE COMPRESSED SUBSCRIPT DATA FORMAT.
+C
+C     INPUT PARAMETERS -
+C        NEQNS - NUMBER OF EQUATIONS.
+C        XLNZ - INDEX VECTOR FOR LNZ. XLNZ(I) POINTS TO THE
+C               START OF NONZEROS IN COLUMN I OF FACTOR L.
+C        (XNZSUB, NZSUB) - THE COMPRESSED SUBSCRIPT DATA
+C               STRUCTURE FOR FACTOR L.
+C
+C     UPDATED PARAMETERS -
+C        LNZ - ON INPUT, CONTAINS NONZEROS OF A, AND ON
+C              RETURN, THE NONZEROS OF L.
+C        DIAG - THE DIAGONAL OF L OVERWRITES THAT OF A.
+C        IFLAG - THE ERROR FLAG. IT IS SET TO 1 IF A ZERO OR
+C               NEGATIVE SQUARE ROOT OCCURS DURING THE
+C               FACTORIZATION.
+C        OPS - A DOUBLE PRECISION COMMON PARAMETER THAT IS
+C              INCREMENTED BY THE NUMBER OF OPERATIONS
+C              PERFORMED BY THE SUBROUTINE.
+C
+C     WORKING PARAMETERS -
+C        LINK - AT STEP J, THE LIST IN
+C                  LINK(J), LINK(LINK(J)), ...........
+C               CONSISTS OF THOSE COLUMNS THAT WILL MODIFY
+C               THE COLUMN L(*,J).
+C        FIRST - TEMPORARY VECTOR TO POINT TO THE FIRST
+C                NONZERO IN EACH COLUMN THAT WILL BE USED
+C                NEXT FOR MODIFICATION.
+C        TEMP - A TEMPORARY VECTOR TO ACCUMULATE MODIFICATIONS.
+C
+C***************************************************************
+C
+      SUBROUTINE GSFCT ( NEQNS, XLNZ, LNZ, XNZSUB, NZSUB, DIAG,
+     1                   LINK, FIRST, TEMP, IFLAG )
+C
+C***************************************************************
+C
+         DOUBLE PRECISION COUNT, OPS
+         COMMON /SPKOPS/ OPS
+         REAL DIAG(1), LNZ(1), TEMP(1), DIAGJ, LJK
+         INTEGER LINK(1), NZSUB(1)
+         INTEGER FIRST(1), XLNZ(1), XNZSUB(1),
+     1           I, IFLAG, II, ISTOP, ISTRT, ISUB, J,
+     1           K, KFIRST, NEQNS, NEWK
+C
+C***************************************************************
+C
+C       ------------------------------
+C       INITIALIZE WORKING VECTORS ...
+C       ------------------------------
+        DO 100 I = 1, NEQNS
+           LINK(I) = 0
+           TEMP(I) = 0.0E0
+  100   CONTINUE
+C       --------------------------------------------
+C       COMPUTE COLUMN L(*,J) FOR J = 1,...., NEQNS.
+C       --------------------------------------------
+        DO 600 J = 1, NEQNS
+C          -------------------------------------------
+C          FOR EACH COLUMN L(*,K) THAT AFFECTS L(*,J).
+C          -------------------------------------------
+           DIAGJ = 0.0E0
+           NEWK = LINK(J)
+  200      K    = NEWK
+           IF ( K .EQ. 0 ) GO TO 400
+              NEWK = LINK(K)
+C             ---------------------------------------
+C             OUTER PRODUCT MODIFICATION OF L(*,J) BY
+C             L(*,K) STARTING AT FIRST(K) OF L(*,K).
+C             ---------------------------------------
+              KFIRST = FIRST(K)
+             LJK     = LNZ(KFIRST)
+             DIAGJ = DIAGJ + LJK*LJK
+             OPS = OPS + 1.0D0
+             ISTRT = KFIRST + 1
+             ISTOP = XLNZ(K+1) - 1
+             IF ( ISTOP .LT. ISTRT ) GO TO 200
+C               ------------------------------------------
+C               BEFORE MODIFICATION, UPDATE VECTORS FIRST,
+C               AND LINK FOR FUTURE MODIFICATION STEPS.
+C               ------------------------------------------
+                FIRST(K) = ISTRT
+                I = XNZSUB(K) + (KFIRST-XLNZ(K)) + 1
+                ISUB = NZSUB(I)
+                LINK(K) = LINK(ISUB)
+                LINK(ISUB) = K
+C               ---------------------------------------
+C               THE ACTUAL MOD IS SAVED IN VECTOR TEMP.
+C               ---------------------------------------
+                DO 300 II = ISTRT, ISTOP
+                   ISUB = NZSUB(I)
+                   TEMP(ISUB) = TEMP(ISUB) + LNZ(II)*LJK
+                   I = I + 1
+  300           CONTINUE
+                COUNT = ISTOP - ISTRT + 1
+                OPS = OPS + COUNT
+             GO TO 200
+C         ----------------------------------------------
+C         APPLY THE MODIFICATIONS ACCUMULATED IN TEMP TO
+C         COLUMN L(*,J).
+C         ----------------------------------------------
+  400     DIAGJ = DIAG(J) - DIAGJ
+          IF ( DIAGJ .LE. 0.0E0 ) GO TO 700
+          DIAGJ = SQRT(DIAGJ)
+          DIAG(J) = DIAGJ
+          ISTRT = XLNZ(J)
+          ISTOP = XLNZ(J+1) - 1
+          IF ( ISTOP .LT. ISTRT ) GO TO 600
+             FIRST(J) = ISTRT
+             I = XNZSUB(J)
+             ISUB = NZSUB(I)
+             LINK(J) = LINK(ISUB)
+             LINK(ISUB) = J
+             DO 500 II = ISTRT, ISTOP
+                ISUB = NZSUB(I)
+                LNZ(II) = ( LNZ(II)-TEMP(ISUB) ) / DIAGJ
+                TEMP(ISUB) = 0.0E0
+                I = I + 1
+  500        CONTINUE
+             COUNT = ISTOP - ISTRT + 1
+             OPS = OPS + COUNT
+  600   CONTINUE
+        RETURN
+C       ------------------------------------------------------
+C       ERROR - ZERO OR NEGATIVE SQUARE ROOT IN FACTORIZATION.
+C       ------------------------------------------------------
+  700   IFLAG = 1
+        RETURN
+        END
diff --git a/src/modspainv.f90 b/src/modspainv.f90
index 56db986..3dc7a4c 100644
--- a/src/modspainv.f90
+++ b/src/modspainv.f90
@@ -1,9 +1,7 @@
 !> Module containing subroutines for sparse inverses of symmetric positive definite matrices
 
-!> @todo Still raw and not very efficient
-
 !Based on Karin Meyer 's code (didgeridoo.une.edu.au/womwiki/doku.php?id=fortran:fortran).
-!The content of Karin Meyer 's wiki is licensed under CC Attribution-Sahre Alike 4.0 International.
+!The content of Karin Meyer 's wiki is licensed under CC Attribution-Share Alike 4.0 International.
 !
 !Support of sparse inverse of SPSD matrices by implementing the S. D. Kachman modifications
 !(https://www.ars.usda.gov/ARSUserFiles/80420530/MTDFREML/MTDFMan.pdf ; Chapter 6)
@@ -12,33 +10,19 @@
 
 module modspainv
 #if (_DP==0)
- use iso_fortran_env,only:output_unit,int32,int64,real32,real64,wp=>real32
+ use iso_fortran_env,only:output_unit,int32,real32,real64,wp=>real32
  use modsparse_mkl, only: spotrf, spotri, strsm, ssymm, sgemm
 #else
- use iso_fortran_env,only:output_unit,int32,int64,real32,real64,wp=>real64
+ use iso_fortran_env,only:output_unit,int32,real32,real64,wp=>real64
  use modsparse_mkl, only: dpotrf, dpotri, dtrsm, dsymm, dgemm
 #endif
  use modcommon, only: progress
- !$ use omp_lib
- implicit none
+ implicit none(type, external)
  private
- public::get_chol,get_ichol,get_spainv
+ public :: super_nodes, super_gsfct, super_sparsinv
 
- integer(kind=int32),parameter::minsizesupernode=0  !values other than 0 (e.g., 256) may give troubles
  real(kind=wp),parameter::tol=1.e-8_wp
 
- interface get_chol
-  module procedure get_chol_crs
- end interface
-
- interface get_ichol
-  module procedure get_ichol_crs
- end interface
-
- interface get_spainv
-  module procedure get_spainv_crs
- end interface
-
  interface
   SUBROUTINE dgtrsm(SIDE,UPLO,TRANSA,DIAG,M,N,ALPHA,A,LDA,B,LDB)
    !.. Scalar Arguments ..
@@ -50,260 +34,8 @@ SUBROUTINE dgtrsm(SIDE,UPLO,TRANSA,DIAG,M,N,ALPHA,A,LDA,B,LDB)
   end subroutine
  end interface
 
- interface
-  subroutine smbfct(neqns,xadj,adjncy,perm,invp,&
-              xlnz,maxlnz,xnzsub,nzsub,maxsub,&
-              rchlnk,mrglnk,marker,flag&
-              )
-   integer::neqns
-   integer::maxsub,flag,maxlnz
-   integer::xadj(*),adjncy(*)
-   integer::perm(*),invp(*)
-   integer::xlnz(*),xnzsub(*),nzsub(*),rchlnk(*),mrglnk(*),marker(*)
-  end subroutine
- end interface
-
 contains
 
-!PUBLIC
-!Should return the Cholesky factor in the permuted order
-subroutine get_ichol_crs(ia,ja,a,xadj,adjncy,perm,minsizenode,un)
- integer(kind=int32),intent(inout)::ia(:)
- integer(kind=int32),intent(inout)::ja(:)
- integer(kind=int32),intent(in)::xadj(:),adjncy(:)
- integer(kind=int32),intent(in)::perm(:)  !Ap(i,:)=A(perm(i),:)
- integer(kind=int32),intent(in),optional::minsizenode
- integer(kind=int32),intent(in),optional::un
- real(kind=wp),intent(inout)::a(:)
- 
- integer(kind=int32)::unlog,neqns
- integer(kind=int32)::mssn
- integer(kind=int32),allocatable::xlnz(:),xnzsub(:),nzsub(:)
- real(kind=wp),allocatable::xspars(:),diag(:)
- !$ real(kind=real64)::t1
- real(kind=real64)::time(6)
-
- mssn=minsizesupernode
- if(present(minsizenode))mssn=minsizenode
-
- unlog=output_unit
- if(present(un))unlog=un
-
- time=0._real64
-
- neqns=size(ia)-1
-
- call get_ichol_spainv_crs(neqns,ia,ja,a,xadj,adjncy,perm,.false.,xlnz,xspars,xnzsub,nzsub,diag,mssn,time)
-
- ! Cholesky factorization
- !Convert to ija
- !$ t1=omp_get_wtime()
- call converttoija_noperm(neqns,xlnz,xspars,xnzsub,nzsub,diag,ia,ja,a,perm)
- !$ time(6)=omp_get_wtime()-t1
-
- call writetime(unlog,time,'SPARSE CHOL FACT.')
-
-end subroutine
-
-subroutine get_chol_crs(ia,ja,a,xadj,adjncy,perm,minsizenode,un)
- integer(kind=int32),intent(inout),allocatable::ia(:)
- integer(kind=int32),intent(inout),allocatable::ja(:)
- integer(kind=int32),intent(in)::xadj(:),adjncy(:)
- integer(kind=int32),intent(in)::perm(:)  !Ap(i,:)=A(perm(i),:)
- integer(kind=int32),intent(in),optional::minsizenode
- integer(kind=int32),intent(in),optional::un
- real(kind=wp),intent(inout),allocatable::a(:)
- 
- integer(kind=int32)::unlog,neqns
- integer(kind=int32)::mssn
- integer(kind=int32),allocatable::xlnz(:),xnzsub(:),nzsub(:)
- real(kind=wp),allocatable::xspars(:),diag(:)
- !$ real(kind=real64)::t1
- real(kind=real64)::time(6)
-
- mssn=minsizesupernode
- if(present(minsizenode))mssn=minsizenode
-
- unlog=output_unit
- if(present(un))unlog=un
-
- time=0._real64
-
- neqns=size(ia)-1
-
- call get_ichol_spainv_crs(neqns,ia,ja,a,xadj,adjncy,perm,.false.,xlnz,xspars,xnzsub,nzsub,diag,mssn,time)
-
- !Convert to ija
- !$ t1=omp_get_wtime()
- call convertfactortoija(neqns,xlnz,xspars,xnzsub,nzsub,diag,ia,ja,a)
- !$ time(6)=omp_get_wtime()-t1
-
- call writetime(unlog,time,'CHOL FACT.')
-
-end subroutine
-
-subroutine get_spainv_crs(ia,ja,a,xadj,adjncy,perm,minsizenode,un)
- integer(kind=int32),intent(in)::ia(:)
- integer(kind=int32),intent(in)::ja(:)
- integer(kind=int32),intent(in)::xadj(:),adjncy(:)
- integer(kind=int32),intent(inout)::perm(:)  !Ap(i,:)=A(perm(i),:)
- integer(kind=int32),intent(in),optional::minsizenode
- integer(kind=int32),intent(in),optional::un
- real(kind=wp),intent(inout)::a(:)
- 
- integer(kind=int32)::unlog,neqns
- integer(kind=int32)::mssn
- integer(kind=int32),allocatable::xlnz(:),xnzsub(:),nzsub(:)
- real(kind=wp),allocatable::xspars(:),diag(:)
- !$ real(kind=real64)::t1
- real(kind=real64)::time(6)
-
- mssn=minsizesupernode
- if(present(minsizenode))mssn=minsizenode
-
- unlog=output_unit
- if(present(un))unlog=un
-
- time=0._real64
-
- neqns=size(ia)-1
-
- call get_ichol_spainv_crs(neqns,ia,ja,a,xadj,adjncy,perm,.true.,xlnz,xspars,xnzsub,nzsub,diag,mssn,time)
-
- !Convert to ija
- !$ t1=omp_get_wtime()
- call converttoija(neqns,xlnz,xspars,xnzsub,nzsub,diag,ia,ja,a,perm)
- !$ time(6)=omp_get_wtime()-t1
-
- call writetime(unlog,time,'INVERSION')
-
-end subroutine
-
-subroutine get_ichol_spainv_crs(neqns,ia,ja,a,xadj,adjncy,perm,lspainv,xlnz,xspars,xnzsub,nzsub,diag,mssn,time)
- integer(kind=int32),intent(in)::neqns
- integer(kind=int32),intent(inout)::mssn
- integer(kind=int32),intent(in)::ia(:)
- integer(kind=int32),intent(in)::ja(:)
- integer(kind=int32),intent(in)::xadj(:),adjncy(:)
- integer(kind=int32),intent(in)::perm(:)  !Ap(i,:)=A(perm(i),:)
- real(kind=wp),intent(inout)::a(:)
- real(kind=real64),intent(inout)::time(:)
- logical,intent(in)::lspainv
-
- integer(kind=int32),allocatable,intent(out)::xlnz(:),xnzsub(:),nzsub(:)
- real(kind=wp),allocatable,intent(out)::xspars(:),diag(:)
-  
-#if (_VERBOSE >2)
- integer(kind=int32)::i
-#endif
- integer(kind=int32)::nnode
- integer(kind=int32)::maxnode
- integer(kind=int32)::maxsub,flag,maxlnz
- integer(kind=int32)::rank
- integer(kind=int32),allocatable::inode(:)
-!$ real(kind=real64)::t1
-
- !symbolic factorization
- !$ t1=omp_get_wtime()
- call symbolicfact(neqns,ia(neqns+1)-1,xadj,adjncy,perm,xlnz,maxlnz,xnzsub,nzsub,maxsub,flag)
- !$ time(1)=omp_get_wtime()-t1
- !$ t1=omp_get_wtime()
-#if (_VERBOSE >0)
- !$ write(*,'(1x,a,t30,a,g0)')'CRS Symb. fact.',': Elapsed time = ',time(1)
-#endif
-
- call computexsparsdiag(neqns,ia,ja,a,xlnz,nzsub,xnzsub,maxlnz,xspars,diag,perm)
- !$ time(2)=omp_get_wtime()-t1
- !$ t1=omp_get_wtime()
-#if (_VERBOSE >0)
- !$ write(*,'(1x,a,t30,a,g0)')'CRS Compute xspars',': Elapsed time = ',time(2)
-#endif
-
- !super following Karin Meyer
- allocate(inode(neqns+1))  !to allow diagonal matrices (for which the number of super-nodes is equal to the number of equations
- call super_nodes(mssn,neqns,xlnz,xnzsub,nzsub,nnode,inode,maxnode)
- !$ time(3)=omp_get_wtime()-t1
- !$ t1=omp_get_wtime()
-#if (_VERBOSE >0)
- !$ write(*,'(1x,a,t30,a,g0)')'CRS Super nodes',': Elapsed time = ',time(3)
-#endif
-
- ! Cholesky factorization
- call super_gsfct(neqns,xlnz,xspars,xnzsub,nzsub,diag,nnode,inode,rank)
- !$ time(4)=omp_get_wtime()-t1
- !$ t1=omp_get_wtime()
-#if (_VERBOSE >0)
- !$ write(*,'(1x,a,t30,a,g0)')'CRS Chol. fact.',': Elapsed time = ',time(4)
-#endif
-
- if(lspainv)then
-  ! Matrix inverse
-  call super_sparsinv(neqns,xlnz,xspars,xnzsub,nzsub,diag,nnode,inode)
-  !$ time(5)=omp_get_wtime()-t1
-  !$ t1=omp_get_wtime()
-#if (_VERBOSE >0)
-  !$ write(*,'(1x,a,t30,a,g0)')'CRS Inversion',': Elapsed time = ',time(5)
-#endif
- endif
- 
-#if (_VERBOSE >0)
- write(*,'(/2x,a,i0)')'Flag symbolic factorization    : ',flag
- !write(*,'(2x,a,i0)')'Number of non-zero in the facor: ',maxlnz
- write(*,'(2x,a,i0)')'Number of super-nodes          : ',nnode
- write(*,'(2x,a,i0)')'Min size of super-nodes        : ',mssn
- write(*,'(2x,a,i0)')'Max size of super-nodes        : ',maxnode
- write(*,'(2x,a,i0/)')'Rank of the matrix             : ',rank
-#endif
-#if (_VERBOSE >2)
- do i=1,nnode
-  write(*,'(1x,4(a,i8))') 'node',i,' from row', inode(i+1)+1,'  to row', inode(i),' size ',inode(i)-inode(i+1)
- end do
-#endif
-
-end subroutine
-
-!PRIVATE
-subroutine symbolicfact(neqns,nnzeros,xadj,adjncy,perm,xlnz,maxlnz,xnzsub,nzsub,maxsub,flag)
- integer(kind=int32),intent(in)::neqns,nnzeros
- integer(kind=int32),intent(in)::xadj(:),adjncy(:),perm(:)
- integer(kind=int32),intent(out)::maxlnz,maxsub,flag
- integer(kind=int32),allocatable,intent(out)::xlnz(:),xnzsub(:),nzsub(:)
-
- integer(kind=int32)::i,maxsubinit
- integer(kind=int32),allocatable::invp(:),rchlnk(:),mrglnk(:),marker(:)
-
- allocate(invp(size(perm)))
- do i=1,size(perm)
-  invp(perm(i))=i
- enddo
-
- allocate(xlnz(neqns+1),xnzsub(neqns+1))
- allocate(rchlnk(neqns),mrglnk(neqns),marker(neqns))
- xlnz=0
- xnzsub=0
- rchlnk=0
- mrglnk=0
- marker=0
- !maxsub=ia(neqns+1)-1
- maxsub=nnzeros
-
- do
-  flag=0
-  maxsubinit=maxsub
-  if(allocated(nzsub))deallocate(nzsub)
-  allocate(nzsub(maxsub))
-  nzsub=0
-  call smbfct(neqns,xadj,adjncy,perm,invp,xlnz,maxlnz,xnzsub,nzsub,maxsub,&
-              rchlnk,mrglnk,marker,flag&
-              )
-  if(maxsub.ne.maxsubinit)cycle
-  if(flag.eq.0)exit
-  maxsub=maxsub*2
- enddo
- deallocate(rchlnk,mrglnk,marker)
-
-end subroutine
-
 subroutine super_nodes(mssn, neqns, xlnz, xnzsub, ixsub, nnode, inode,maxnode)
  integer(kind=int32),intent(inout)::mssn
  integer(kind=int32),intent(in)::neqns
@@ -316,7 +48,7 @@ subroutine super_nodes(mssn, neqns, xlnz, xnzsub, ixsub, nnode, inode,maxnode)
  integer(kind=int32)::minnode
  real(kind=wp)::xx
 
- ! establish boundaries between diaggonal blocks 100% full
+ ! find boundaries between diagonal blocks 100% full
  ilast = neqns
  nnode = 0
  inode = 0
@@ -331,7 +63,7 @@ subroutine super_nodes(mssn, neqns, xlnz, xnzsub, ixsub, nnode, inode,maxnode)
        if(  kk > ilast ) exit
        n = n + 1
     end do
-    xx = dble( n ) / dble( ilast - i )
+    xx = real( n, kind=wp ) / real( ilast - i, kind=wp )
     if( xx < 1._wp .and. (ilast-i).ge.mssn ) then
      nnode = nnode +1
      minnode=min(minnode,ilast-i)
@@ -797,204 +529,6 @@ subroutine super_sparsinv(neqns,xlnz,xspars,xnzsub,ixsub,diag,nnode,inode)
 
 end subroutine
 
-pure subroutine computexsparsdiag(neqns,ia,ja,a,xlnz,nzsub,xnzsub,maxlnz,xspars,diag,perm)
- integer(kind=int32),intent(in)::neqns,maxlnz
- integer(kind=int32),intent(in)::ia(:),ja(:)
- integer(kind=int32),intent(in)::perm(:)
- integer(kind=int32),intent(in)::xlnz(:),nzsub(:),xnzsub(:)
- real(kind=wp),intent(in)::a(:)
- real(kind=wp),intent(out),allocatable::xspars(:),diag(:)
-
- integer(kind=int32)::irow,iirow,icol,i,j,k
-
- allocate(xspars(maxlnz),diag(neqns))
- xspars=0._wp
- diag=0._wp
-
- do i=1,neqns
-  irow=perm(i)
-  diag(i)=a(ia(irow))
-  do k=xlnz(i),xlnz(i+1)-1
-   iirow=irow
-   icol=perm(nzsub(xnzsub(i)+k-xlnz(i)))
-   if(iirow.gt.icol)then
-    iirow=icol
-    icol=irow
-   endif
-   do j=ia(iirow)+1,ia(iirow+1)-1
-    if(ja(j).eq.icol)then
-     xspars(k)=a(j)
-     exit
-    endif
-   enddo
-  enddo
- enddo
-
-end subroutine
-
-pure subroutine converttoija(neqns,xlnz,xspars,xnzsub,ixsub,diag,ia,ja,a,perm)
- integer(kind=int32),intent(in)::neqns
- integer(kind=int32),intent(in)::ixsub(:),xlnz(:),xnzsub(:)
- integer(kind=int32),intent(in)::ia(:),ja(:),perm(:)
- real(kind=wp),intent(in)::xspars(:),diag(:)
- real(kind=wp),intent(inout):: a(:)
-
- integer(kind=int32)::irow,ksub,i,icol
- integer(kind=int32)::pirow,ppirow,picol,ip
-
- do irow = 1, neqns
-  pirow=perm(irow)
-  a(ia(pirow))=diag(irow)
-  ksub = xnzsub(irow)
-  do i = xlnz(irow), xlnz(irow+1)-1
-   ppirow=pirow
-   icol = ixsub(ksub)
-   picol=perm(icol)
-   ksub = ksub + 1
-   if(ppirow.gt.picol)then
-    ppirow=picol
-    picol=pirow
-   endif
-   intloop: do ip=ia(ppirow)+1,ia(ppirow+1)-1
-    if(ja(ip).eq.picol)then
-     a(ip)=xspars(i)
-     exit intloop
-    endif
-   enddo intloop
-  end do
- end do
-
-end subroutine 
-
-pure subroutine converttoija_noperm(neqns,xlnz,xspars,xnzsub,ixsub,diag,ia,ja,a,perm)
- integer(kind=int32),intent(in)::neqns
- integer(kind=int32),intent(in)::ixsub(:),xlnz(:),xnzsub(:)
- integer(kind=int32),intent(in)::perm(:)
- integer(kind=int32),intent(inout)::ia(:),ja(:)
- real(kind=wp),intent(in)::xspars(:),diag(:)
- real(kind=wp),intent(out):: a(:)
-
- integer(kind=int32)::irow,ksub,i,icol
- integer(kind=int32)::pirow,ppirow,picol,ip
- integer(kind=int32)::nel
- integer(kind=int32),allocatable::iperm(:)
- integer(kind=int32),allocatable::iia(:),jja(:)
-
- 
- !inefficient but it works
- 
- !1. Permute ia and ja
- allocate(iperm,source=perm)
- do i=1,neqns
-  iperm(perm(i))=i
- enddo
- nel=ia(neqns+1)-1
- allocate(iia(nel),jja(nel))
- 
- do irow=1,neqns
-  pirow=iperm(irow)
-  do i=ia(irow),ia(irow+1)-1
-   if(pirow.lt.iperm(ja(i)))then
-    iia(i)=pirow
-    jja(i)=iperm(ja(i))
-   else
-    iia(i)=iperm(ja(i))
-    jja(i)=pirow
-   endif
-  enddo
- enddo
-
- ia=0
- ia(1)=1
- do i=1,nel
-  ia(iia(i)+1)=ia(iia(i)+1)+1
- enddo
-
- ja=0
- do i=1,neqns
-  ia(i+1)=ia(i+1)+ia(i)
-  ja(ia(i))=i
- enddo
-
- iperm=1
- do i=1,nel
-  if(iia(i).ne.jja(i))then
-   ja(ia(iia(i))+iperm(iia(i)))=jja(i)
-   iperm(iia(i))=iperm(iia(i))+1
-  endif
- enddo
-
- !2. Replace a
- a=0._wp
- do irow = 1, neqns
-  pirow=irow
-  a(ia(pirow))=diag(irow)
-  ksub = xnzsub(irow)
-  do i = xlnz(irow), xlnz(irow+1)-1
-   ppirow=pirow
-   icol = ixsub(ksub)
-   picol=icol
-   ksub = ksub + 1
-   if(ppirow.gt.picol)then
-    ppirow=picol
-    picol=pirow
-   endif
-   intloop: do ip=ia(ppirow)+1,ia(ppirow+1)-1
-    if(ja(ip).eq.picol)then
-     a(ip)=xspars(i)
-     exit intloop
-    endif
-   enddo intloop
-  end do
- end do
-
-end subroutine 
-
-pure subroutine convertfactortoija(neqns,xlnz,xspars,xnzsub,ixsub,diag,ia,ja,a)
- integer(kind=int32),intent(in)::neqns
- integer(kind=int32),intent(in)::ixsub(:),xlnz(:),xnzsub(:)
- integer(kind=int32),intent(inout)::ia(:)
- integer(kind=int32),intent(out),allocatable::ja(:)
- real(kind=wp),intent(in)::xspars(:),diag(:)
- real(kind=wp),intent(out),allocatable::a(:)
-
- integer(kind=int32)::j,irow,ksub,icol
-
- allocate(ja(size(xspars)+neqns), source=0)
- allocate(a(size(xspars)+neqns), source=0._wp)
-
- do irow=1,neqns
-  ia(irow)=xlnz(irow)+irow-1
-  ja(ia(irow))=irow
-  a(ia(irow))=diag(irow)
-  ksub=xnzsub(irow)
-  do j=xlnz(irow),xlnz(irow+1)-1
-   icol=ixsub(ksub)
-   ksub=ksub+1
-   ja(j+irow)=icol
-   a(j+irow)=xspars(j)
-  enddo
- enddo
- ia(neqns+1)=xlnz(neqns+1)+neqns
-
-end subroutine 
-
-subroutine writetime(unlog,time,a)
- integer(kind=int32)::unlog
- real(kind=real64),intent(in)::time(:)
- character(len=*),intent(in)::a
-
- write(unlog,'(/a)')' CRS MATRIX '//trim(a)
- !$ write(unlog,'(2x,a,t31,a,t33,f0.5,a)')'Symbolic factorization',':',time(1),' s'
- !$ write(unlog,'(2x,a,t31,a,t33,f0.5,a)')'Setup of tmp arrays',':',time(2),' s'
- !$ write(unlog,'(2x,a,t31,a,t33,f0.5,a)')'Node determination',':',time(3),' s'
- !$ write(unlog,'(2x,a,t31,a,t33,f0.5,a)')'Cholesky factorization',':',time(4),' s'
- !$ write(unlog,'(2x,a,t31,a,t33,f0.5,a)')'Matrix inversion',':',time(5),' s'
- !$ write(unlog,'(2x,a,t31,a,t33,f0.5,a)')'Conversion to CRS',':',time(6),' s'
- !$ write(unlog,'(2x,a,t31,a,t33,f0.5,a)')'Total time',':',sum(time),' s'
-
-end subroutine
-
 pure subroutine split_inode(inode, nnode, jnode)
  integer(kind=int32), intent(inout) :: inode(:)
  integer(kind=int32), intent(inout) :: nnode
diff --git a/src/modsparse.f90 b/src/modsparse.f90
index 1592a92..e26279c 100644
--- a/src/modsparse.f90
+++ b/src/modsparse.f90
@@ -447,10 +447,8 @@ module subroutine submatrix_index_coo(sparse,subsparse,indvector,sizeblock,unlog
   private
   !> @brief Adds the value val to mat(row,col); e.g., call mat\%add(row,col,val)
   procedure,public::add=>add_crs
-#if (_SPAINV==1)
   !> @brief Computes and replaces the sparse matrix by the (complete) Cholesky factor
   procedure,public::chol=>getchol_crs
-#endif
   !> @brief Deallocates the sparse matrix and sets to default values
   procedure,public::destroy=>destroy_crs
   procedure::diag_vect_crs
@@ -460,10 +458,8 @@ module subroutine submatrix_index_coo(sparse,subsparse,indvector,sizeblock,unlog
   generic,public::diag=>diag_vect_crs,diag_mat_crs
   !> @brief Returns the value of mat(row,col); e.g., ...=mat\%get(row,col)
   procedure,public::get=>get_crs
-#if (_SPAINV==1)
   !> @brief Computes and replaces the sparse matrix by the (complete) LDLt (L is stored in the upper triangle and D in the diagonal)
   procedure,public::getldlt=>getldlt_crs
-#endif
   !> @brief Gets memory used
   procedure,public::getmem=>getmem_crs
   !> @brief Initializes the vectors ia,ja,and a from external vectors
@@ -476,10 +472,8 @@ module subroutine submatrix_index_coo(sparse,subsparse,indvector,sizeblock,unlog
   procedure,public::get_nzval=>get_nzval_crs
   !> @brief Get diagonal elements of an approximate inverse using Harville (1999)
   procedure,public::harville=>harville_crs
-#if (_SPAINV==1)
   !> @brief Computes and replaces the sparse matrix by an incomplete Cholesky factor
   procedure,public::ichol=>getichol_crs
-#endif
   !> @brief Iniates crssparse
   procedure,public::init=>constructor_sub_crs
   !> @brief Solver with a triangular factor (e.g., a Cholesky factor or an incomplete Cholesky factor)
@@ -522,10 +516,8 @@ module subroutine submatrix_index_coo(sparse,subsparse,indvector,sizeblock,unlog
   generic,public::solve=>solve_crs_vector,solve_crs_array
   !> @brief Sorts the elements in a ascending order within a row
   procedure,public::sort=>sort_crs
-#if (_SPAINV==1)
   !> @brief Computes and replaces by the sparse inverse
   procedure,public::spainv=>getspainv_crs
-#endif
   !> @brief Gets a submatrix from a sparse matrix
   procedure,public::submatrix=>submatrix_crs
   !> @brief Gets a dense submatrix from a sparse matrix
@@ -627,7 +619,6 @@ module function totalnumberofelements_crs(sparse) result(nel)
    class(crssparse),intent(in)::sparse
    integer(kind=int64)::nel
   end function
-#if (_SPAINV==1)
   !**GET (COMPLETE) CHOLESKY FACTOR
   module subroutine getchol_crs(sparse,minsizenode)
    class(crssparse),intent(inout)::sparse
@@ -638,7 +629,6 @@ module subroutine getldlt_crs(sparse,minsizenode)
    class(crssparse),intent(inout)::sparse
    integer(kind=int32),intent(in),optional::minsizenode
   end subroutine
-#endif
 #if (_METIS==1)
   !**GET ORDER
   module function getordering_crs(sparse&
@@ -650,7 +640,6 @@ module function getordering_crs(sparse&
    integer(kind=int32),allocatable::perm(:)
   end function
 #endif
-#if (_SPAINV==1)
   !**GET INCOMPLETE CHOLESKY FACTOR
   module subroutine getichol_crs(sparse,minsizenode)
    class(crssparse),intent(inout)::sparse
@@ -661,7 +650,6 @@ module subroutine getspainv_crs(sparse,minsizenode)
    class(crssparse),intent(inout)::sparse
    integer(kind=int32),intent(in),optional::minsizenode
   end subroutine
-#endif
 #if (_PARDISO==1)
   pure module function isdecomposed_crs(sparse) result(ll)
    class(crssparse),intent(in)::sparse
diff --git a/src/modsparse_crs.f90 b/src/modsparse_crs.f90
index d011f7c..8846b40 100644
--- a/src/modsparse_crs.f90
+++ b/src/modsparse_crs.f90
@@ -5,9 +5,7 @@
                           , mkl_scsrmm, mkl_dcsrmm &
                           , mkl_scsrtrsv, mkl_dcsrtrsv &
                           , mkl_scsrsymv, mkl_dcsrsymv
-#if (_SPAINV==1)
- use modspainv, only: get_chol, get_ichol, get_spainv
-#endif
+ use modsparse_inv, only: get_chol, get_ichol, get_spainv
 #if (_PARDISO==1)
  use modvariablepardiso, only: checkpardiso, pardiso_variable
 #endif
@@ -407,7 +405,6 @@ module function totalnumberofelements_crs(sparse) result(nel)
 
 end function
 
-#if (_SPAINV==1)
 !**GET (COMPLETE) CHOLESKY FACTOR
 module subroutine getchol_crs(sparse,minsizenode)
  class(crssparse),intent(inout)::sparse
@@ -517,7 +514,6 @@ module subroutine getldlt_crs(sparse,minsizenode)
 #endif
 
 end subroutine
-#endif
 
 #if (_METIS==1)
 !**GET ORDER
@@ -609,7 +605,6 @@ module function getordering_crs(sparse&
 end function
 #endif
 
-#if (_SPAINV==1)
 !**GET INCOMPLETE CHOLESKY FACTOR
 module subroutine getichol_crs(sparse,minsizenode)
  class(crssparse),intent(inout)::sparse
@@ -722,7 +717,6 @@ module subroutine getspainv_crs(sparse,minsizenode)
 #endif
 
 end subroutine
-#endif
 
 #if (_PARDISO==1)
 !**RESET PARDISO MEMORY
diff --git a/src/modsparse_inv.f90 b/src/modsparse_inv.f90
new file mode 100644
index 0000000..0cd1681
--- /dev/null
+++ b/src/modsparse_inv.f90
@@ -0,0 +1,489 @@
+!> Module containing subroutines for sparse inverses of symmetric positive definite matrices
+
+!> @todo Still raw and not very efficient
+
+module modsparse_inv
+#if (_DP==0)
+ use iso_fortran_env,only:output_unit,int32,int64,real32,real64,wp=>real32
+#else
+ use iso_fortran_env,only:output_unit,int32,int64,real32,real64,wp=>real64
+#endif
+#if (_SPAINV==1)
+ use modspainv, only: super_nodes, super_gsfct, super_sparsinv
+#endif
+ !$ use omp_lib
+ implicit none
+ private
+ public::get_chol,get_ichol,get_spainv
+
+ integer(kind=int32),parameter::minsizesupernode=0  !values other than 0 (e.g., 256) may give troubles
+
+ interface get_chol
+  module procedure get_chol_crs
+ end interface
+
+ interface get_ichol
+  module procedure get_ichol_crs
+ end interface
+
+ interface get_spainv
+  module procedure get_spainv_crs
+ end interface
+
+ interface
+  subroutine smbfct(neqns,xadj,adjncy,perm,invp,&
+              xlnz,maxlnz,xnzsub,nzsub,maxsub,&
+              rchlnk,mrglnk,marker,flag&
+              )
+   integer::neqns
+   integer::maxsub,flag,maxlnz
+   integer::xadj(*),adjncy(*)
+   integer::perm(*),invp(*)
+   integer::xlnz(*),xnzsub(*),nzsub(*),rchlnk(*),mrglnk(*),marker(*)
+  end subroutine
+ end interface
+
+contains
+
+!PUBLIC
+!Should return the Cholesky factor in the permuted order
+subroutine get_ichol_crs(ia,ja,a,xadj,adjncy,perm,minsizenode,un)
+ integer(kind=int32),intent(inout)::ia(:)
+ integer(kind=int32),intent(inout)::ja(:)
+ integer(kind=int32),intent(in)::xadj(:),adjncy(:)
+ integer(kind=int32),intent(in)::perm(:)  !Ap(i,:)=A(perm(i),:)
+ integer(kind=int32),intent(in),optional::minsizenode
+ integer(kind=int32),intent(in),optional::un
+ real(kind=wp),intent(inout)::a(:)
+ 
+ integer(kind=int32)::unlog,neqns
+ integer(kind=int32)::mssn
+ integer(kind=int32),allocatable::xlnz(:),xnzsub(:),nzsub(:)
+ real(kind=wp),allocatable::xspars(:),diag(:)
+ !$ real(kind=real64)::t1
+ real(kind=real64)::time(6)
+
+ mssn=minsizesupernode
+ if(present(minsizenode))mssn=minsizenode
+
+ unlog=output_unit
+ if(present(un))unlog=un
+
+ time=0._real64
+
+ neqns=size(ia)-1
+
+ call get_ichol_spainv_crs(neqns,ia,ja,a,xadj,adjncy,perm,.false.,xlnz,xspars,xnzsub,nzsub,diag,mssn,time)
+
+ ! Cholesky factorization
+ !Convert to ija
+ !$ t1=omp_get_wtime()
+ call converttoija_noperm(neqns,xlnz,xspars,xnzsub,nzsub,diag,ia,ja,a,perm)
+ !$ time(6)=omp_get_wtime()-t1
+
+ call writetime(unlog,time,'SPARSE CHOL FACT.')
+
+end subroutine
+
+subroutine get_chol_crs(ia,ja,a,xadj,adjncy,perm,minsizenode,un)
+ integer(kind=int32),intent(inout),allocatable::ia(:)
+ integer(kind=int32),intent(inout),allocatable::ja(:)
+ integer(kind=int32),intent(in)::xadj(:),adjncy(:)
+ integer(kind=int32),intent(in)::perm(:)  !Ap(i,:)=A(perm(i),:)
+ integer(kind=int32),intent(in),optional::minsizenode
+ integer(kind=int32),intent(in),optional::un
+ real(kind=wp),intent(inout),allocatable::a(:)
+ 
+ integer(kind=int32)::unlog,neqns
+ integer(kind=int32)::mssn
+ integer(kind=int32),allocatable::xlnz(:),xnzsub(:),nzsub(:)
+ real(kind=wp),allocatable::xspars(:),diag(:)
+ !$ real(kind=real64)::t1
+ real(kind=real64)::time(6)
+
+ mssn=minsizesupernode
+ if(present(minsizenode))mssn=minsizenode
+
+ unlog=output_unit
+ if(present(un))unlog=un
+
+ time=0._real64
+
+ neqns=size(ia)-1
+
+ call get_ichol_spainv_crs(neqns,ia,ja,a,xadj,adjncy,perm,.false.,xlnz,xspars,xnzsub,nzsub,diag,mssn,time)
+
+ !Convert to ija
+ !$ t1=omp_get_wtime()
+ call convertfactortoija(neqns,xlnz,xspars,xnzsub,nzsub,diag,ia,ja,a)
+ !$ time(6)=omp_get_wtime()-t1
+
+ call writetime(unlog,time,'CHOL FACT.')
+
+end subroutine
+
+subroutine get_spainv_crs(ia,ja,a,xadj,adjncy,perm,minsizenode,un)
+ integer(kind=int32),intent(in)::ia(:)
+ integer(kind=int32),intent(in)::ja(:)
+ integer(kind=int32),intent(in)::xadj(:),adjncy(:)
+ integer(kind=int32),intent(inout)::perm(:)  !Ap(i,:)=A(perm(i),:)
+ integer(kind=int32),intent(in),optional::minsizenode
+ integer(kind=int32),intent(in),optional::un
+ real(kind=wp),intent(inout)::a(:)
+ 
+ integer(kind=int32)::unlog,neqns
+ integer(kind=int32)::mssn
+ integer(kind=int32),allocatable::xlnz(:),xnzsub(:),nzsub(:)
+ real(kind=wp),allocatable::xspars(:),diag(:)
+ !$ real(kind=real64)::t1
+ real(kind=real64)::time(6)
+
+ mssn=minsizesupernode
+ if(present(minsizenode))mssn=minsizenode
+
+ unlog=output_unit
+ if(present(un))unlog=un
+
+ time=0._real64
+
+ neqns=size(ia)-1
+
+ call get_ichol_spainv_crs(neqns,ia,ja,a,xadj,adjncy,perm,.true.,xlnz,xspars,xnzsub,nzsub,diag,mssn,time)
+
+ !Convert to ija
+ !$ t1=omp_get_wtime()
+ call converttoija(neqns,xlnz,xspars,xnzsub,nzsub,diag,ia,ja,a,perm)
+ !$ time(6)=omp_get_wtime()-t1
+
+ call writetime(unlog,time,'INVERSION')
+
+end subroutine
+
+subroutine get_ichol_spainv_crs(neqns,ia,ja,a,xadj,adjncy,perm,lspainv,xlnz,xspars,xnzsub,nzsub,diag,mssn,time)
+ integer(kind=int32),intent(in)::neqns
+ integer(kind=int32),intent(inout)::mssn
+ integer(kind=int32),intent(in)::ia(:)
+ integer(kind=int32),intent(in)::ja(:)
+ integer(kind=int32),intent(in)::xadj(:),adjncy(:)
+ integer(kind=int32),intent(in)::perm(:)  !Ap(i,:)=A(perm(i),:)
+ real(kind=wp),intent(inout)::a(:)
+ real(kind=real64),intent(inout)::time(:)
+ logical,intent(in)::lspainv
+
+ integer(kind=int32),allocatable,intent(out)::xlnz(:),xnzsub(:),nzsub(:)
+ real(kind=wp),allocatable,intent(out)::xspars(:),diag(:)
+  
+#if (_VERBOSE >2)
+ integer(kind=int32)::i
+#endif
+ integer(kind=int32)::nnode
+ integer(kind=int32)::maxnode
+ integer(kind=int32)::maxsub,flag,maxlnz
+ integer(kind=int32)::rank
+ integer(kind=int32),allocatable::inode(:)
+!$ real(kind=real64)::t1
+
+ !symbolic factorization
+ !$ t1=omp_get_wtime()
+ call symbolicfact(neqns,ia(neqns+1)-1,xadj,adjncy,perm,xlnz,maxlnz,xnzsub,nzsub,maxsub,flag)
+ !$ time(1)=omp_get_wtime()-t1
+ !$ t1=omp_get_wtime()
+#if (_VERBOSE >0)
+ !$ write(*,'(1x,a,t30,a,g0)')'CRS Symb. fact.',': Elapsed time = ',time(1)
+#endif
+
+ call computexsparsdiag(neqns,ia,ja,a,xlnz,nzsub,xnzsub,maxlnz,xspars,diag,perm)
+ !$ time(2)=omp_get_wtime()-t1
+ !$ t1=omp_get_wtime()
+#if (_VERBOSE >0)
+ !$ write(*,'(1x,a,t30,a,g0)')'CRS Compute xspars',': Elapsed time = ',time(2)
+#endif
+
+#if (_SPAINV==1)
+ allocate(inode(neqns+1))  !to allow diagonal matrices (for which the number of super-nodes is equal to the number of equations
+ call super_nodes(mssn,neqns,xlnz,xnzsub,nzsub,nnode,inode,maxnode)
+ !$ time(3)=omp_get_wtime()-t1
+ !$ t1=omp_get_wtime()
+#if (_VERBOSE >0)
+ !$ write(*,'(1x,a,t30,a,g0)')'CRS Super nodes',': Elapsed time = ',time(3)
+#endif
+
+ ! Cholesky factorization
+ call super_gsfct(neqns,xlnz,xspars,xnzsub,nzsub,diag,nnode,inode,rank)
+ !$ time(4)=omp_get_wtime()-t1
+ !$ t1=omp_get_wtime()
+#if (_VERBOSE >0)
+ !$ write(*,'(1x,a,t30,a,g0)')'CRS Chol. fact.',': Elapsed time = ',time(4)
+#endif
+
+ if(lspainv)then
+  ! Matrix inverse
+  call super_sparsinv(neqns,xlnz,xspars,xnzsub,nzsub,diag,nnode,inode)
+  !$ time(5)=omp_get_wtime()-t1
+  !$ t1=omp_get_wtime()
+#if (_VERBOSE >0)
+  !$ write(*,'(1x,a,t30,a,g0)')'CRS Inversion',': Elapsed time = ',time(5)
+#endif
+ endif
+ 
+#if (_VERBOSE >0)
+ write(*,'(/2x,a,i0)')'Flag symbolic factorization    : ',flag
+ !write(*,'(2x,a,i0)')'Number of non-zero in the facor: ',maxlnz
+ write(*,'(2x,a,i0)')'Number of super-nodes          : ',nnode
+ write(*,'(2x,a,i0)')'Min size of super-nodes        : ',mssn
+ write(*,'(2x,a,i0)')'Max size of super-nodes        : ',maxnode
+ write(*,'(2x,a,i0/)')'Rank of the matrix             : ',rank
+#endif
+#if (_VERBOSE >2)
+ do i=1,nnode
+  write(*,'(1x,4(a,i8))') 'node',i,' from row', inode(i+1)+1,'  to row', inode(i),' size ',inode(i)-inode(i+1)
+ end do
+#endif
+
+#else
+ error stop 'SPAINV: not enabled'
+#endif
+
+end subroutine
+
+!PRIVATE
+subroutine symbolicfact(neqns,nnzeros,xadj,adjncy,perm,xlnz,maxlnz,xnzsub,nzsub,maxsub,flag)
+ integer(kind=int32),intent(in)::neqns,nnzeros
+ integer(kind=int32),intent(in)::xadj(:),adjncy(:),perm(:)
+ integer(kind=int32),intent(out)::maxlnz,maxsub,flag
+ integer(kind=int32),allocatable,intent(out)::xlnz(:),xnzsub(:),nzsub(:)
+
+ integer(kind=int32)::i,maxsubinit
+ integer(kind=int32),allocatable::invp(:),rchlnk(:),mrglnk(:),marker(:)
+
+ allocate(invp(size(perm)))
+ do i=1,size(perm)
+  invp(perm(i))=i
+ enddo
+
+ allocate(xlnz(neqns+1),xnzsub(neqns+1))
+ allocate(rchlnk(neqns),mrglnk(neqns),marker(neqns))
+ xlnz=0
+ xnzsub=0
+ rchlnk=0
+ mrglnk=0
+ marker=0
+ !maxsub=ia(neqns+1)-1
+ maxsub=nnzeros
+
+ do
+  flag=0
+  maxsubinit=maxsub
+  if(allocated(nzsub))deallocate(nzsub)
+  allocate(nzsub(maxsub))
+  nzsub=0
+  call smbfct(neqns,xadj,adjncy,perm,invp,xlnz,maxlnz,xnzsub,nzsub,maxsub,&
+              rchlnk,mrglnk,marker,flag&
+              )
+  if(maxsub.ne.maxsubinit)cycle
+  if(flag.eq.0)exit
+  maxsub=maxsub*2
+ enddo
+ deallocate(rchlnk,mrglnk,marker)
+
+end subroutine
+
+pure subroutine computexsparsdiag(neqns,ia,ja,a,xlnz,nzsub,xnzsub,maxlnz,xspars,diag,perm)
+ integer(kind=int32),intent(in)::neqns,maxlnz
+ integer(kind=int32),intent(in)::ia(:),ja(:)
+ integer(kind=int32),intent(in)::perm(:)
+ integer(kind=int32),intent(in)::xlnz(:),nzsub(:),xnzsub(:)
+ real(kind=wp),intent(in)::a(:)
+ real(kind=wp),intent(out),allocatable::xspars(:),diag(:)
+
+ integer(kind=int32)::irow,iirow,icol,i,j,k
+
+ allocate(xspars(maxlnz),diag(neqns))
+ xspars=0._wp
+ diag=0._wp
+
+ do i=1,neqns
+  irow=perm(i)
+  diag(i)=a(ia(irow))
+  do k=xlnz(i),xlnz(i+1)-1
+   iirow=irow
+   icol=perm(nzsub(xnzsub(i)+k-xlnz(i)))
+   if(iirow.gt.icol)then
+    iirow=icol
+    icol=irow
+   endif
+   do j=ia(iirow)+1,ia(iirow+1)-1
+    if(ja(j).eq.icol)then
+     xspars(k)=a(j)
+     exit
+    endif
+   enddo
+  enddo
+ enddo
+
+end subroutine
+
+pure subroutine converttoija(neqns,xlnz,xspars,xnzsub,ixsub,diag,ia,ja,a,perm)
+ integer(kind=int32),intent(in)::neqns
+ integer(kind=int32),intent(in)::ixsub(:),xlnz(:),xnzsub(:)
+ integer(kind=int32),intent(in)::ia(:),ja(:),perm(:)
+ real(kind=wp),intent(in)::xspars(:),diag(:)
+ real(kind=wp),intent(inout):: a(:)
+
+ integer(kind=int32)::irow,ksub,i,icol
+ integer(kind=int32)::pirow,ppirow,picol,ip
+
+ do irow = 1, neqns
+  pirow=perm(irow)
+  a(ia(pirow))=diag(irow)
+  ksub = xnzsub(irow)
+  do i = xlnz(irow), xlnz(irow+1)-1
+   ppirow=pirow
+   icol = ixsub(ksub)
+   picol=perm(icol)
+   ksub = ksub + 1
+   if(ppirow.gt.picol)then
+    ppirow=picol
+    picol=pirow
+   endif
+   intloop: do ip=ia(ppirow)+1,ia(ppirow+1)-1
+    if(ja(ip).eq.picol)then
+     a(ip)=xspars(i)
+     exit intloop
+    endif
+   enddo intloop
+  end do
+ end do
+
+end subroutine 
+
+pure subroutine converttoija_noperm(neqns,xlnz,xspars,xnzsub,ixsub,diag,ia,ja,a,perm)
+ integer(kind=int32),intent(in)::neqns
+ integer(kind=int32),intent(in)::ixsub(:),xlnz(:),xnzsub(:)
+ integer(kind=int32),intent(in)::perm(:)
+ integer(kind=int32),intent(inout)::ia(:),ja(:)
+ real(kind=wp),intent(in)::xspars(:),diag(:)
+ real(kind=wp),intent(out):: a(:)
+
+ integer(kind=int32)::irow,ksub,i,icol
+ integer(kind=int32)::pirow,ppirow,picol,ip
+ integer(kind=int32)::nel
+ integer(kind=int32),allocatable::iperm(:)
+ integer(kind=int32),allocatable::iia(:),jja(:)
+
+ 
+ !inefficient but it works
+ 
+ !1. Permute ia and ja
+ allocate(iperm,source=perm)
+ do i=1,neqns
+  iperm(perm(i))=i
+ enddo
+ nel=ia(neqns+1)-1
+ allocate(iia(nel),jja(nel))
+ 
+ do irow=1,neqns
+  pirow=iperm(irow)
+  do i=ia(irow),ia(irow+1)-1
+   if(pirow.lt.iperm(ja(i)))then
+    iia(i)=pirow
+    jja(i)=iperm(ja(i))
+   else
+    iia(i)=iperm(ja(i))
+    jja(i)=pirow
+   endif
+  enddo
+ enddo
+
+ ia=0
+ ia(1)=1
+ do i=1,nel
+  ia(iia(i)+1)=ia(iia(i)+1)+1
+ enddo
+
+ ja=0
+ do i=1,neqns
+  ia(i+1)=ia(i+1)+ia(i)
+  ja(ia(i))=i
+ enddo
+
+ iperm=1
+ do i=1,nel
+  if(iia(i).ne.jja(i))then
+   ja(ia(iia(i))+iperm(iia(i)))=jja(i)
+   iperm(iia(i))=iperm(iia(i))+1
+  endif
+ enddo
+
+ !2. Replace a
+ a=0._wp
+ do irow = 1, neqns
+  pirow=irow
+  a(ia(pirow))=diag(irow)
+  ksub = xnzsub(irow)
+  do i = xlnz(irow), xlnz(irow+1)-1
+   ppirow=pirow
+   icol = ixsub(ksub)
+   picol=icol
+   ksub = ksub + 1
+   if(ppirow.gt.picol)then
+    ppirow=picol
+    picol=pirow
+   endif
+   intloop: do ip=ia(ppirow)+1,ia(ppirow+1)-1
+    if(ja(ip).eq.picol)then
+     a(ip)=xspars(i)
+     exit intloop
+    endif
+   enddo intloop
+  end do
+ end do
+
+end subroutine 
+
+pure subroutine convertfactortoija(neqns,xlnz,xspars,xnzsub,ixsub,diag,ia,ja,a)
+ integer(kind=int32),intent(in)::neqns
+ integer(kind=int32),intent(in)::ixsub(:),xlnz(:),xnzsub(:)
+ integer(kind=int32),intent(inout)::ia(:)
+ integer(kind=int32),intent(out),allocatable::ja(:)
+ real(kind=wp),intent(in)::xspars(:),diag(:)
+ real(kind=wp),intent(out),allocatable::a(:)
+
+ integer(kind=int32)::j,irow,ksub,icol
+
+ allocate(ja(size(xspars)+neqns), source=0)
+ allocate(a(size(xspars)+neqns), source=0._wp)
+
+ do irow=1,neqns
+  ia(irow)=xlnz(irow)+irow-1
+  ja(ia(irow))=irow
+  a(ia(irow))=diag(irow)
+  ksub=xnzsub(irow)
+  do j=xlnz(irow),xlnz(irow+1)-1
+   icol=ixsub(ksub)
+   ksub=ksub+1
+   ja(j+irow)=icol
+   a(j+irow)=xspars(j)
+  enddo
+ enddo
+ ia(neqns+1)=xlnz(neqns+1)+neqns
+
+end subroutine 
+
+subroutine writetime(unlog,time,a)
+ integer(kind=int32)::unlog
+ real(kind=real64),intent(in)::time(:)
+ character(len=*),intent(in)::a
+
+ write(unlog,'(/a)')' CRS MATRIX '//trim(a)
+ !$ write(unlog,'(2x,a,t31,a,t33,f0.5,a)')'Symbolic factorization',':',time(1),' s'
+ !$ write(unlog,'(2x,a,t31,a,t33,f0.5,a)')'Setup of tmp arrays',':',time(2),' s'
+ !$ write(unlog,'(2x,a,t31,a,t33,f0.5,a)')'Node determination',':',time(3),' s'
+ !$ write(unlog,'(2x,a,t31,a,t33,f0.5,a)')'Cholesky factorization',':',time(4),' s'
+ !$ write(unlog,'(2x,a,t31,a,t33,f0.5,a)')'Matrix inversion',':',time(5),' s'
+ !$ write(unlog,'(2x,a,t31,a,t33,f0.5,a)')'Conversion to CRS',':',time(6),' s'
+ !$ write(unlog,'(2x,a,t31,a,t33,f0.5,a)')'Total time',':',sum(time),' s'
+
+end subroutine
+
+end module