SUBROUTINE cbcg_solve_fix(h_psi, cg_psi, e, d0psi, dpsi, h_diag, & ndmx, ndim, ethr, ik, kter, conv_root, anorm, nbnd, npol, cw, tprec) ! !----------------------------------------------------------------------- ! ! Iterative solution of the linear system: ! ! ( h - e + w + i * eta ) * x = b ! ! where h is a complex hermitian matrix, e, w, and eta are ! real scalar, x and b are complex vectors ! USE kinds, ONLY : DP USE mp_global, ONLY: intra_pool_comm USE mp, ONLY: mp_sum implicit none ! first I/O variables logical :: tprec integer :: ndmx, & ! input: the maximum dimension of the vectors ndim, & ! input: the actual dimension of the vectors kter, & ! output: counter on iterations nbnd, & ! input: the number of bands npol, & ! input: number of components of the wavefunctions ik ! input: the k point real(DP) :: & anorm, & ! output: the norm of the error in the solution h_diag(ndmx*npol,nbnd), & ! input: an estimate of ( H - \epsilon ) ethr ! input: the required precision complex(DP) :: & dpsi (ndmx*npol, nbnd), & ! output: the solution of the linear syst d0psi (ndmx*npol, nbnd) ! input: the known term logical :: conv_root ! output: if true the root is converged external h_psi ! input: the routine computing h_psi external cg_psi ! input: the routine computing cg_psi ! ! here the local variables ! !HL upping iterations to get convergence with green_linsys? integer, parameter :: maxter = 200 !integer, parameter :: maxter = 600 ! the maximum number of iterations integer :: iter, ibnd, lbnd ! counters on iteration, bands integer , allocatable :: conv (:) ! if 1 the root is converged !HL NB GWTC: g t h hold -> SGW: r q p pold complex(DP), allocatable :: g (:,:), t (:,:), h (:,:), hold (:,:) ! the gradient of psi ! the preconditioned gradient ! the delta gradient ! the conjugate gradient ! work space COMPLEX(DP) :: cw !HL need to introduce gt tt ht htold for BICON ! also gp grp for preconditioned systems complex(DP), allocatable :: gt (:,:), tt (:,:), ht (:,:), htold (:,:) complex(DP), allocatable :: gp (:,:), gtp (:,:) complex(DP) :: dcgamma, dclambda, alpha, beta ! the ratio between rho ! step length complex(DP), external :: zdotc !HL (eigenvalue + iw) complex(DP) :: e(nbnd) ! the scalar product real(DP), allocatable :: rho (:), rhoold (:), a(:), c(:), astar(:), cstar(:) ! the residue ! auxiliary for h_diag real(DP) :: kter_eff ! account the number of iterations with b ! coefficient of quadratic form ! !HL call start_clock ('cgsolve') ! call start_clock ('cbcgsolve') allocate ( g(ndmx*npol,nbnd), t(ndmx*npol,nbnd), h(ndmx*npol,nbnd), & hold(ndmx*npol ,nbnd) ) allocate ( gt(ndmx*npol,nbnd), tt(ndmx*npol,nbnd), ht(ndmx*npol,nbnd), & htold(ndmx*npol, nbnd) ) allocate ( gp(ndmx*npol,nbnd), gtp(ndmx*npol,nbnd)) allocate (a(nbnd), c(nbnd)) allocate (conv ( nbnd)) allocate (rho(nbnd),rhoold(nbnd)) ! WRITE(6,*) g,t,h,hold ! Initialize kter_eff = 0.d0 do ibnd = 1, nbnd conv (ibnd) = 0 enddo conv_root = .false. g=(0.d0,0.d0) t=(0.d0,0.d0) h=(0.d0,0.d0) hold=(0.d0,0.d0) gt = (0.d0,0.d0) tt = (0.d0,0.d0) ht = (0.d0,0.d0) htold = (0.d0,0.d0) gp(:,:) = (0.d0, 0.0d0) gtp(:,:) = (0.d0, 0.0d0) !HL PH.x does explicit do loop: !I'd prefer to mimic SGW approach here, but Quesspro Guys like there goto etc !and I want to stick with there convergence parameters/mindset etc. do iter = 1, maxter ! kter = kter + 1 ! g = (-PcDv\Psi) - (H \Delta\Psi) ! gt = conjg( g) ! r = b - Ax ! rt = conjg ( r ) ! write(6, '("cBiCG")') if (iter .eq. 1) then !r = b - A* x !rt = conjg (r) call h_psi (ndim, dpsi, g, e, cw, ik, nbnd) !call h_psi (ndim, dpsi, g, e, ik, nbnd) do ibnd = 1, nbnd call zaxpy (ndim, (-1.d0,0.d0), d0psi(1,ibnd), 1, g(1,ibnd), 1) call zscal (ndim, (-1.0d0, 0.0d0), g(1,ibnd), 1) gt(:,ibnd) = conjg ( g(:,ibnd) ) ! p = inv(M) * r ! pt = conjg ( p ) call zcopy (ndmx*npol, g (1, ibnd), 1, h (1, ibnd), 1) if(tprec) call cg_psi(ndmx, ndim, 1, h(1,ibnd), h_diag(1,ibnd) ) ht(:,ibnd) = conjg( h(:,ibnd) ) enddo IF (npol==2) THEN do ibnd = 1, nbnd call zaxpy (ndim, (-1.d0,0.d0), d0psi(ndmx+1,ibnd), 1, & g(ndmx+1,ibnd), 1) gt(:,ibnd) = conjg ( g(:,ibnd) ) enddo END IF endif ! I've moved: compute preconditioned residual vector and convergence check ! into the first iteration loop. Like in SGW. ! p = inv(M) * r ! pt = conjg ( p ) ! lbnd = 0 ! do ibnd = 1, nbnd ! if (conv (ibnd) .eq.0) then ! lbnd = lbnd+1 ! call zcopy (ndmx*npol, g (1, ibnd), 1, h (1, ibnd), 1) ! call cg_psi(ndmx, ndim, 1, h(1,ibnd), h_diag(1,ibnd) ) ! ht(:, ibnd) = conjg (h(:,ibnd)) ! HL ! not too sure what this rho is up to ! rho(lbnd) = zdotc (ndmx*npol, h(1,ibnd), 1, g(1,ibnd), 1) ! endif ! enddo lbnd = nbnd kter_eff = kter_eff + DBLE (lbnd) / DBLE (nbnd) #ifdef __PARA !HL rho call mp_sum( rho(1:lbnd) , intra_pool_comm ) #endif ! do ibnd = nbnd, 1, -1 ! write(6,*) ibnd, anorm ! if (conv(ibnd).eq.0) then !HL rho(ibnd)=rho(lbnd) !HL lbnd = lbnd -1 !HL anorm = sqrt (rho (ibnd) ) !HL switching convergence test so that its the same as in SGW ! anorm = sqrt ( abs ( ZDOTC (ndim, g(1,ibnd), 1, g(1,ibnd), 1) ) ) ! if (anorm.lt.ethr) conv (ibnd) = 1 ! endif ! enddo ! ! "get out if all bands are converged." conv_root = .true. do ibnd = 1, nbnd anorm = sqrt ( abs ( ZDOTC (ndim, g(1,ibnd), 1, g(1,ibnd), 1) ) ) !write(6,*) anorm, ibnd, ethr if (anorm.lt.ethr) conv (ibnd) = 1 conv_root = conv_root.and.(conv (ibnd).eq.1) enddo if (conv_root) goto 100 ! " compute the step direction h. Conjugate it to previous step" ! HL actually ... We do the conjugation steps with h a leetle bit differently. ! lbnd = 0 ! do ibnd = 1, nbnd ! if (conv (ibnd) .eq.0) then ! change sign to h ! call dscal (2 * ndmx * npol, - 1.d0, h (1, ibnd), 1) ! if (iter.ne.1) then ! dcgamma = rho (ibnd) / rhoold (ibnd) ! call zaxpy (ndmx*npol, dcgamma, hold (1, ibnd), 1, h (1, ibnd), 1) ! endif ! here hold is used as auxiliary vector in order to efficiently compute t = A*h ! it is later set to the current (becoming old) value of h ! lbnd = lbnd+1 ! call zcopy (ndmx*npol, h (1, ibnd), 1, hold (1, lbnd), 1) ! call zcopy (ndmx*npol, ht (1, ibnd), 1, htold (1, lbnd), 1) ! eu (lbnd) = e (ibnd) ! endif ! enddo ! compute t = A*h ! SGW: Here we calculate q = A * p and qt = A^\dagger * pt ! GWTC: Here we calculate t = A * h and tt = A^\dagger * ht !****************** THIS IS THE MOST EXPENSIVE PART**********************! ! write(6,*) h(:,:) call h_psi (ndim, h, t, e(1), cw, ik, nbnd) call h_psi (ndim, ht, tt, e(1), conjg(cw), ik, nbnd) ! write(6,'("Search Directions")') ! write(6,*) t(:,:) ! stop ! call h_psi (ndim, h, t, e(1), ik, nbnd) ! call h_psi (ndim, ht, tt, e(1), ik, nbnd) ! ! compute the coefficients a and c for the line minimization ! compute step length lambda ! lbnd=0 ! do ibnd = 1, nbnd ! if (conv (ibnd) .eq.0) then ! lbnd=lbnd+1 ! lbnd = ibnd ! old regular CG stuff ! HL a(lbnd) = zdotc (ndmx*npol, h(1,ibnd), 1, g(1,ibnd), 1) ! HL c(lbnd) = zdotc (ndmx*npol, h(1,ibnd), 1, t(1,lbnd), 1) ! call zcopy(ndmx*npol, g(1,ibnd), 1, gp(1,ibnd), 1) ! call cg_psi(ndmx, ndim,1, gp(1,ibnd), h_diag(1,ibnd)) ! end if ! end do ! #ifdef __PARA !HL call mp_sum( a(1:lbnd), intra_pool_comm ) ! call mp_sum( c(1:lbnd), intra_pool_comm ) #endif lbnd=0 do ibnd = 1, nbnd if (conv (ibnd) .eq.0) then !HL packing? lbnd=lbnd+1 lbnd = ibnd !HL dclambda is the PH step size I want alpha... !HL dclambda = CMPLX( - a(lbnd) / c(lbnd), 0.d0,kind=DP) ! ! alpha = / ! [ the denominator is stored for subsequent use in beta ] ! call ZCOPY (ndmx*npol, g (1, ibnd), 1, gp (1, ibnd), 1) if (tprec) call cg_psi (ndmx, ndim, 1, gp(1,ibnd), h_diag(1,ibnd) ) a(ibnd) = ZDOTC (ndim, gt(1,ibnd), 1, gp(1,ibnd), 1) c(ibnd) = ZDOTC (ndim, ht(1,ibnd), 1, t (1,lbnd), 1) alpha = a(ibnd) / c(ibnd) ! ! x = x + alpha * p ! r = r - alpha * q ! rt = rt - conjg(alpha) * qt ! call ZAXPY (ndmx*npol, alpha, h (1,ibnd), 1, dpsi (1,ibnd), 1) call ZAXPY (ndmx*npol, -alpha, t (1,lbnd), 1, g (1,ibnd), 1) call ZAXPY (ndmx*npol, -conjg(alpha), tt (1,ibnd), 1, gt (1,ibnd), 1) ! rp = inv(M) * r ! rtp = inv(M) * rt call ZCOPY (ndmx*npol, g (1, ibnd), 1, gp (1, ibnd), 1) call ZCOPY (ndmx*npol, gt (1, ibnd), 1, gtp (1, ibnd), 1) if (tprec) call cg_psi (ndmx, ndmx*npol, 1, gp (1,ibnd), h_diag(1,ibnd) ) if (tprec) call cg_psi (ndmx, ndmx*npol, 1, gtp (1,ibnd), h_diag(1,ibnd) ) ! ! beta = - / ! a(ibnd) = ZDOTC (ndmx*npol, tt(1,ibnd), 1, gp(1,ibnd), 1) beta = - a(ibnd) / c(ibnd) ! ! pold = p ! ptold = pt ! call ZCOPY (ndmx*npol, h (1, ibnd), 1, hold (1, ibnd), 1) call ZCOPY (ndmx*npol, ht (1, ibnd), 1, htold (1, ibnd), 1) ! ! p = rp + beta * pold ! pt = rtp + conjg(beta) * ptold ! call ZCOPY (ndmx*npol, gp (1, ibnd), 1, h (1, ibnd), 1) call ZCOPY (ndmx*npol, gtp (1, ibnd), 1, ht (1, ibnd), 1) call ZAXPY (ndmx*npol, beta, hold (1,ibnd), 1, h (1,ibnd), 1) call ZAXPY (ndmx*npol, conjg(beta), htold (1,ibnd), 1, ht(1,ibnd), 1) ! write(6,*)h, ht ! move to new position !HL call zaxpy (ndmx*npol, dclambda, h(1,ibnd), 1, dpsi(1,ibnd), 1) ! call zaxpy (ndmx*npol, alpha, h(1,ibnd), 1, dpsi(1,ibnd), 1) ! update to get the gradient ! g=g+lam ! call zaxpy (ndmx*npol, dclambda, t(1,lbnd), 1, g(1,ibnd), 1) ! save current (now old) h and rho for later use ! call zcopy (ndmx*npol, h(1,ibnd), 1, hold(1,ibnd), 1) ! rhoold (ibnd) = rho (ibnd) endif enddo enddo !write(6,*)d0psi(:,2) !write(6,*) g(:,2) !write(6,*) h(:,1) 100 continue kter = kter_eff deallocate (rho, rhoold) deallocate (conv) deallocate (a,c) deallocate (g, t, h, hold) deallocate (gt, tt, ht, htold) deallocate (gtp, gp) call stop_clock ('cgsolve') return END SUBROUTINE cbcg_solve_fix