!----------------------------------------------------------------------- ! Copyright (C) 2010-2015 Henry Lambert, Feliciano Giustino ! This file is distributed under the terms of the GNU General Public ! License. See the file `LICENSE' in the root directory of the ! present distribution, or http://www.gnu.org/copyleft.gpl.txt . !----------------------------------------------------------------------- SUBROUTINE cbicgstabl(h_psi, cg_psi, e, d0psi, dpsi, h_diag, & ndmx, ndim, ethr, ik, kter, conv_root, anorm, nbnd, npol, cw, lmresloc, tprec) ! !----------------------------------------------------------------------- ! ! Iterative solution of the linear system: ! ! ( h - e + w + i * eta ) * x = b ! ( h - cw + i * eta ) * G(G,G') = -\delta(G,G') ! ! where h is a complex hermitian matrix, e, w, and eta are ! real scalar, x and b are complex vectors ! USE kinds, ONLY: DP USE mp_global, ONLY: intra_pool_comm USE mp, ONLY: mp_sum USE control_gw, ONLY: maxter_green USE units_gw, ONLY: iunresid, lrresid, iunalphabeta, lralphabeta USE mp_global, ONLY: inter_pool_comm, intra_pool_comm, mp_global_end, mpime, & nproc_pool, nproc, me_pool, my_pool_id, npool USE mp, ONLY : mp_barrier, mp_bcast, mp_sum implicit none !tprec true or false conditioning? logical :: tprec integer :: ndmx, & ! input: the maximum dimension of the vectors ndim, & ! input: the actual dimension of the vectors kter, & ! output: counter on iterations nbnd, & ! input: the number of bands npol, & ! input: number of components of the wavefunctions ik, & ! input: the k point niters, & ! number of iterations for this BiCG min nrec, & ! for composite rec numbers lmresloc ! set number of loops over minimal residuals... real(DP) :: & anorm, & ! output: the norm of the error in the solution ethr, & ! input: the required precision h_diag(ndmx,nbnd) ! input: an estimate of ( H - \epsilon ) COMPLEX(DP) :: alphabeta(2), beta_old complex(DP) :: & dpsi (ndmx*npol, nbnd), & ! output: the solution of the linear syst d0psi (ndmx*npol, nbnd) ! input: the known term !GMRES part complex(DP) :: tau(lmresloc + 1,lmresloc + 1) complex(DP) :: sigma(lmresloc + 1) complex(DP) :: gmgamma(lmresloc + 1) complex(DP) :: gmgammapp(lmresloc + 1) complex(DP) :: gmgammap(lmresloc + 1) complex(DP) :: gschmidt logical :: conv_root ! output: if true the root is converged external h_psi ! input: the routine computing h_psi external cg_psi ! input: the routine computing cg_psi !here the local variables !integer, parameter :: maxter = 200 !integer, parameter :: maxter = 600 !the maximum number of iterations integer :: iter, ibnd, lbnd, iterj, iteri !counters on iteration, bands integer , allocatable :: conv (:) !if 1 the root is converged !HL NB GWTC: g t h hold -> SGW: r q p pold complex(DP), allocatable :: g (:,:), t (:,:), h (:,:), hold (:,:) !the gradient of psi !the preconditioned gradient !the delta gradient !the conjugate gradient !work space COMPLEX(DP) :: cw !HL need to introduce gt tt ht htold for BICON !also gp grp for preconditioned systems complex(DP), allocatable :: gt (:,:), hn (:,:) complex(DP), allocatable :: gp (:,:), gtp (:,:) complex(DP) :: dcgamma, dclambda, alpha, beta, omega !the ratio between rho !step length complex(DP), external :: zdotc !HL (eigenvalue + iw) complex(DP) :: e(nbnd) complex(DP) :: rho, rhoold !min residual vector !complex(DP), allocatable :: s (:,:) !the scalar product !why is this real?? !real(DP), allocatable :: a(:), c(:) complex(DP), allocatable :: a(:), c(:) !the residue !auxiliary for h_diag real(DP) :: kter_eff !account the number of iterations with b !coefficient of quadratic form call start_clock ('cgsolve') allocate ( g(ndmx*npol,lmresloc+1), t(ndmx*npol,nbnd), h(ndmx*npol,nbnd), & hold(ndmx*npol, nbnd) ) allocate ( gt(ndmx*npol,nbnd), hn(ndmx*npol,nbnd)) allocate ( gp(ndmx*npol,nbnd), gtp(ndmx*npol,nbnd)) allocate (a(nbnd), c(nbnd)) allocate (conv ( nbnd)) kter_eff = 0.d0 do ibnd = 1, nbnd conv (ibnd) = 0 enddo conv_root = .false. g = dcmplx(0.d0,0.d0) t = dcmplx(0.d0,0.d0) h = dcmplx(0.d0,0.d0) hold = dcmplx(0.d0,0.d0) gt = dcmplx(0.d0,0.d0) hn = dcmplx(0.d0,0.d0) gp(:,:) = dcmplx(0.d0,0.0d0) gtp(:,:) = dcmplx(0.d0,0.0d0) rho = dcmplx(1.0d0,0.0d0) rhoold = dcmplx(1.0d0,0.0d0) omega = dcmplx(1.0d0,0.0d0) alpha = dcmplx(0.0d0,0.0d0) sigma(:) = dcmplx(1.0d0, 0.0d0) do iter = 1, maxter_green ! r = b - Ax ! rt = conjg ( r ) !!!!!!!!!!!!!!!!!!cBICG PART!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ! iterj allows for the bicgstab(2,3,4,...) generalizations. ! a choice of lmresloc = 1 corresponds to the standard bicgstab. ! H. A. Van Der Corst Siam J. Sci. Stat. Comput. ! Vol. 13, No. 2, pp. 631-644 if (iter .eq. 1) then !initialize iter 0 stuff !r = b - A* x !rt = conjg (r) !call h_psi (ndim, dpsi, g, e, cw, ik, nbnd) do ibnd = 1, nbnd !initial residual should be r = b ! I/O for multishift. ! call davcio (d0psi(:,1), lrresid, iunresid, iter, +1) call zaxpy (ndim, (-1.d0,0.d0), d0psi(1,ibnd), 1, g(1,ibnd), 1) call zscal (ndim, (-1.0d0, 0.0d0), g(1,ibnd), 1) ! copy r -> u, i.e. u = r_{0} ! call zcopy (ndim, g (1, ibnd), 1, h (1, ibnd), 1) ! set \tilde{r} = r^{*} ! gt(:,ibnd) = conjg ( g(:,ibnd) ) gt(:,ibnd) = g(:,ibnd) ! for preconditioning we solve the system MAx = Mb: ! if(tprec) call cg_psi(ndmx, ndim, 1, g(1,ibnd), h_diag(1,ibnd) ) ! if(tprec) call cg_psi(ndmx, ndim, 1, gt(1,ibnd), h_diag(1,ibnd) ) enddo endif rhoold = -omega*rhoold do iterj = 1, lmresloc do ibnd = 1, nbnd rho = ZDOTC (ndim, gt(1,ibnd), 1, g(1,ibnd), 1) beta = alpha*(rho/rhoold) rhoold = rho do iteri = 1, iterj !\hat{u}_{i} = \hat{r}_{i} - \beta \hat{u}_{i} hold(:,:) = dcmplx(0.0d0, 0.d0) call ZCOPY (ndmx*npol, g(1, iteri), 1, hold (1, iteri), 1) call ZAXPY (ndmx*npol, -beta, h(1,iteri), 1, hold(1,iteri), 1) call ZCOPY (ndmx*npol, hold(1, iteri), 1, h (1, iteri), 1) enddo lbnd = nbnd kter_eff = kter_eff + DBLE (lbnd) / DBLE (nbnd) enddo conv_root = .true. do ibnd = 1, nbnd anorm = sqrt ( abs ( ZDOTC (ndim, g(1,ibnd), 1, g(1,ibnd), 1) ) ) if (anorm.lt.ethr) conv (ibnd) = 1 conv_root = conv_root.and.(conv (ibnd).eq.1) enddo if (conv_root) goto 100 ! HLstab ! t = u_{j+1} = A u_{j} do ibnd = 1, nbnd call h_psi (ndim, h(:,ibnd), hn(:,ibnd), e(1), cw, ik, nbnd) if(tprec) call cg_psi(ndmx, ndim, 1, hn(1,ibnd), h_diag(1,ibnd)) enddo lbnd=0 do ibnd = 1, nbnd if (conv (ibnd) .eq.0) then lbnd = ibnd ! alpha = <\tilde{r}|M^{-1}r>/<\tilde{u}|A{u}> ! the denominator is stored for subsequent use in beta ! HLSTAB in sleijpen notation ! gamma = (u_{j+1}, r_{0}) = (t,gp) a(ibnd) = rho c(ibnd) = ZDOTC (ndim, gt(1,ibnd), 1, hn(1,ibnd), 1) alpha = a(ibnd) / c(ibnd) ! HLSTAB do i = 0; iterj: ! ri = ri - alpha*hn ! end do iteri = 1, iterj call ZAXPY (ndmx*npol, -alpha, hn(1,ibnd), 1, g(1,ibnd), 1) enddo ! FOR MINIMAL RESIDUAL SCHEMA WE NEED TO UPDATE THE RESIDUALS AND THE SOLUTION VECTORS here ! but we need to hold off on the solution vectors u0 ! Also we introduce the min res part: ! t = r_{j+1} = A r_{j} ! t = Hg = Ar i.e. the min resid. part call h_psi (ndim, g(:,iterj), t(:,iterj), e(1), cw, ik, nbnd) if(tprec) call cg_psi(ndmx, ndim, 1, t(1,ibnd), h_diag(1,ibnd)) !MA r !x_{0} = x_{0} + alpha * u_{0} call ZAXPY (ndmx*npol, alpha, h(1,ibnd), 1, dpsi (1,ibnd), 1) endif enddo!do ibnd enddo!iterj !!!!!!!!!!!!!!!!!!!Min Residual Part!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! do ibnd = 1, nbnd do iterj = 2, lmresloc + 1 if (lmresloc.gt.1) then do iteri = 2, iterj tau(iteri, iterj) = (1.0d0/sigma(iteri))*ZDOTC(ndim, g(1,iterj), g(1,iteri), 1) !orthogonalizes each residual: call ZAXPY (ndmx*npol, -tau(iteri, iterj), g(1,iteri), 1, g(1,iterj), 1) enddo endif !should correspond to van der vorst's:sleipjen:SGW !w=(t,s)(t,t):(\sigma_{1} = (\r_{1},\r_{1})), \gammap=(1/\sigma_{j})(\r_{0}, \r_{j}): sigma(iterj) = ZDOTC(ndmx*npol, t(1,ibnd), 1, t(1,ibnd), 1) gmgammap(iterj) = (1.0d0/sigma(iterj)) * ZDOTC(ndim, t(1,ibnd), 1, g(1,ibnd), 1) enddo enddo !w = (t,s)(t,t) = (\sigma_{j} = (\r_{j}, \r_{j})), \gammap = (1/\sigma_{j})(\r_{0}, \r_{0}) gmgamma(lmresloc+1) = gmgammap(lmresloc+1) omega = gmgamma(lmresloc+1) if (lmresloc.gt.1) then do iterj = lmresloc+1, 2 gschmidt = dcmplx(0.0d0, 0.0d0) do iteri = iterj + 1, lmresloc+1 gschmidt = gschmidt + tau(iterj, iteri)*gmgamma(iteri) enddo gmgamma(iterj) = gmgammap(iterj) - gschmidt enddo endif if(lmresloc.gt.1) then do iterj = 1, lmresloc-1 do iteri = iterj + 1, lmresloc-1 gschmidt = gschmidt + tau(iterj, iteri)*gmgamma(iteri + 1) enddo gmgammapp(iterj) = gmgamma(iterj+1) + gschmidt enddo endif !The second update phase: do ibnd = 1, nbnd ! x = x + gmgamma(1) * \hat{r}_{0} call ZAXPY (ndim, (gmgamma(lmresloc+1)), g(1,ibnd), 1, dpsi(1,ibnd), 1) ! \hat{u}_{0} = \hat{u}_{0} + beta * u_old call ZAXPY (ndim, -(gmgamma(lmresloc+1)), hn(1,ibnd), 1, h(1,ibnd), 1) ! \hat{r}_{0} = \hat{r}_{0} - gammap_{l}*r_{lmresloc} call ZAXPY (ndim, -(gmgammap(lmresloc+1)), t(1,ibnd), 1, g(1,ibnd), 1) !and again here we get to a point and then stop progressing... !call ZAXPY (ndmx*npol, (gmgamma(lmresloc+1)), hn(1,ibnd), 1, h(1,ibnd), 1) if(lmresloc.gt.1) then do iterj = 2, lmresloc ! \hat{u_0} = \hat{u_0} + gmgamma(lmresloc)*u_{j} call ZAXPY (ndmx*npol, -(gmgamma(iterj)), hn (1,iterj), 1, h (1,iterj), 1) ! x_{0} = x_{0} + gmgamma''_{j} * r_{j} call ZAXPY (ndmx*npol, gmgammapp(iterj), g(1,iterj), 1, dpsi (1,iterj), 1) ! r_{0} = r_{0} - gamma'_{j}*r_{j} call ZAXPY (ndmx*npol, -(gmgammap(iterj)), t(1,iterj), 1, g (1,ibnd), 1) enddo endif enddo enddo!maxter 100 continue niters = iter kter = kter_eff deallocate (conv) deallocate (a,c) deallocate (g, t, h, hold) deallocate (gt, hn) deallocate (gtp, gp) call stop_clock ('cgsolve') return END SUBROUTINE cbicgstabl