!------------------------------------------------------------------------------ ! ! This file is part of the Sternheimer-GW code. ! ! Copyright (C) 2010 - 2016 ! Henry Lambert, Martin Schlipf, and Feliciano Giustino ! ! Sternheimer-GW is free software: you can redistribute it and/or modify ! it under the terms of the GNU General Public License as published by ! the Free Software Foundation, either version 3 of the License, or ! (at your option) any later version. ! ! Sternheimer-GW is distributed in the hope that it will be useful, ! but WITHOUT ANY WARRANTY; without even the implied warranty of ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ! GNU General Public License for more details. ! ! You should have received a copy of the GNU General Public License ! along with Sternheimer-GW. If not, see ! http://www.gnu.org/licenses/gpl.html . ! !------------------------------------------------------------------------------ !> Shifted BiCGStab(l) solver for linear equation \f$(A + \sigma I) x = b\f$. !! !! Implements the shifted BiCGStab(l) algorithm according to the paper of !! Frommer, Computing **70**, 87 (2003). The general idea is that the matrix !! \f$A\f$ and \f$A + \sigma I\f$ span the same Krylov subspace. Hence, we can !! solve the linear equation of all linear problems at the cost of a single one. !! MODULE bicgstab_module USE kinds, ONLY: dp IMPLICIT NONE PRIVATE PUBLIC bicgstab !> Variables and arrays used for the seed system. TYPE seed_system_type !> search direction COMPLEX(dp), ALLOCATABLE :: uu(:,:) !> approximate solution to the linear system COMPLEX(dp), ALLOCATABLE :: xx(:) !> residual vector COMPLEX(dp), ALLOCATABLE :: rr(:,:) !> \f$\tilde r_0\f$ of Frommer's algorithm. COMPLEX(dp), ALLOCATABLE :: tilde_r0(:) !> the shift of this system COMPLEX(dp) sigma !> rho contains dot product of residuals COMPLEX(dp) rho !> \f$\rho_{\text{old}}\f$ of Frommer's algorithm COMPLEX(dp) rho_old !> \f$\alpha\f$ of Frommer's algorithm COMPLEX(dp) alpha !> \f$\alpha_{\text{old}}\f$ of Frommer's algorithm COMPLEX(dp) alpha_old !> beta determines step size COMPLEX(dp) beta !> \f$\omega\f$ of Frommer's algorithm COMPLEX(dp) omega !> \f$\gamma\f$ of Frommer's algorithm COMPLEX(dp), ALLOCATABLE :: gamma(:) !> \f$\gamma'\f$ of Frommer's algorithm COMPLEX(dp), ALLOCATABLE :: gamma_p(:) !> \f$\gamma''\f$ of Frommer's algorithm COMPLEX(dp), ALLOCATABLE :: gamma_pp(:) END TYPE seed_system_type !> Variables and arrays used for the shifted system. TYPE shift_system_type !> search direction COMPLEX(dp), ALLOCATABLE :: uu(:,:) !> approximate solution to the linear system COMPLEX(dp), ALLOCATABLE :: xx(:) !> the shift of this system COMPLEX(dp) sigma !> \f$\phi_{\text{old}}^\sigma\f$ of Frommer's algorithm COMPLEX(dp) phi_old !> \f$\phi^\sigma\f$ of Frommer's algorithm COMPLEX(dp) phi !> \f$\phi_{\text{new}}^\sigma\f$ of Frommer's algorithm COMPLEX(dp) phi_new !> \f$\vartheta^\sigma\f$ of Frommer's algorithm COMPLEX(dp) theta !> \f$\alpha^\sigma\f$ of Frommer's algorithm COMPLEX(dp) alpha !> \f$\beta^\sigma\f$ of Frommer's algorithm COMPLEX(dp) beta !> \f$\mu_{ij}\f$ of Frommer's algorithm COMPLEX(dp), ALLOCATABLE :: mu(:) !> \f$\gamma\f$ of Frommer's algorithm COMPLEX(dp), ALLOCATABLE :: gamma(:) !> \f$\gamma'\f$ of Frommer's algorithm COMPLEX(dp), ALLOCATABLE :: gamma_p(:) !> \f$\gamma''\f$ of Frommer's algorithm COMPLEX(dp), ALLOCATABLE :: gamma_pp(:) END TYPE shift_system_type CONTAINS !> Main driver routine of the algorithm. !! !! This subroutine implements the *Algorithm 2* of Frommer's paper. !! SUBROUTINE bicgstab(lmax, threshold, AA, bb, sigma, xx) !> Dimensionality of the GMRES algorithm. INTEGER, INTENT(IN) :: lmax !> Function pointer that applies the linear operator to a vector. INTERFACE SUBROUTINE AA(sigma, xx, Ax) USE kinds, ONLY: dp !> The shift of this system. COMPLEX(dp), INTENT(IN) :: sigma !> The input vector. COMPLEX(dp), INTENT(IN) :: xx(:) !> The operator applied to the vector. COMPLEX(dp), INTENT(OUT) :: Ax(:) END SUBROUTINE AA END INTERFACE !> Right hand side of the linear equation. COMPLEX(dp), INTENT(IN) :: bb(:) !> Shift \f$\sigma\f$ in the linear operator. The first element of the array !! will be used as seed system. The other ones as shifted systems. COMPLEX(dp), INTENT(IN) :: sigma(:) !> Stop when convergence threshold is reached. REAL(dp), INTENT(IN) :: threshold !> On output: the solution of the linear system COMPLEX(dp), INTENT(OUT) :: xx(:,:) !> Size of the right-hand vector. INTEGER vec_size !> loop over shifted systems INTEGER ishift !> Counter for number of iterations. INTEGER iter !> Maximum number of iterations. INTEGER, PARAMETER :: max_iter = 10000 !> Contains the current best guess for the solution of the linear !! system and the corresponding residual in the seed system. TYPE(seed_system_type) seed_system !> Contains the current best guess for the solution of the linear !! system and the corresponding residual in the shifted system. TYPE(shift_system_type) shift_system(SIZE(sigma) - 1) ! determine dimension of vector vec_size = SIZE(bb) ! ! sanity check of the input ! IF (SIZE(xx, 1) /= vec_size) & CALL errore(__FILE__, "right-hand side and solution must have same size", 1) IF (SIZE(xx, 2) /= SIZE(sigma)) & CALL errore(__FILE__, "we need one solution vector per shift for the result", 1) ! ! initialization seed system ! CALL init_seed(lmax, bb, sigma(1), seed_system) ! ! initialization shifted systems ! CALL init_shift(lmax, vec_size, sigma, shift_system) ! loop until solution is found DO iter = 1, max_iter ! ! perform BiCG part (Algorithm 3) ! CALL bicg_part(lmax, AA, seed_system, shift_system) ! stop loop if result is converged IF (converged(threshold, seed_system%rr(:,0))) EXIT ! ! perform MR part (Algorithm 4) ! CALL mr_part(lmax, seed_system, shift_system) ! stop loop if result is converged IF (converged(threshold, seed_system%rr(:,0))) EXIT END DO ! iter IF (iter > max_iter) THEN CALL errore(__FILE__, "BiCGstab algorithm did not converge in given& & number of iterations", max_iter) END IF ! ! copy the result to the output array ! CALL ZCOPY(vec_size, seed_system%xx, 1, xx(:,1), 1) DO ishift = 2, SIZE(sigma) CALL ZCOPY(vec_size, shift_system(ishift - 1)%xx, 1, xx(:,ishift), 1) END DO ! ishift ! ! destroy the allocated array in the types ! CALL destroy_seed(seed_system) CALL destroy_shift(shift_system) END SUBROUTINE bicgstab !> Check if the residual is below the given threshold. FUNCTION converged(threshold, residual) !> If the norm of the residual drops below this threshold, the system is !! considered as converged. REAL(dp), INTENT(IN) :: threshold !> The residual in the seed system of the multishift problem. COMPLEX(dp), INTENT(IN) :: residual(:) !> returns true, if the norm of the residual is smaller than the threshold LOGICAL converged !> size of the vector INTEGER vec_size !> norm of the residual REAL(dp) norm_residual !> BLAS function to evaluate the euclidian norm REAL(dp), EXTERNAL :: DNRM2 ! determine vector size ! note: factor 2 because we want to evaluate the norm of a complex vector vec_size = 2 * SIZE(residual) ! check residual of seed system norm_residual = DNRM2(vec_size, residual, 1) ! if the norm is smaller than the threshold the system is converged converged = (norm_residual < threshold) END FUNCTION converged !> Initialize the seed system. SUBROUTINE init_seed(lmax, bb, sigma, seed_system) !> Dimensionality of the GMRES algorithm. INTEGER, INTENT(IN) :: lmax !> Initial residual (right hand side of equation). COMPLEX(dp), INTENT(IN) :: bb(:) !> The shift of the system that we will use as the seed system. COMPLEX(dp), INTENT(IN) :: sigma !> On output contains the initialized seed system. TYPE(seed_system_type), INTENT(OUT) :: seed_system !> Size of the vectors to initialize. INTEGER vec_size ! determine size of vector vec_size = SIZE(bb) ! allocate arrays of given size ALLOCATE(seed_system%uu(vec_size, 0:lmax)) ALLOCATE(seed_system%xx(vec_size)) ALLOCATE(seed_system%rr(vec_size, 0:lmax)) ALLOCATE(seed_system%tilde_r0(vec_size)) ALLOCATE(seed_system%gamma(lmax)) ALLOCATE(seed_system%gamma_p(lmax)) ALLOCATE(seed_system%gamma_pp(lmax)) ! initialize arrays ! initialize only u0, because the other ones will be set in the algorithm seed_system%uu(:,0) = 0 seed_system%xx = 0 ! initial residual is r0 = (b - A x) = b, because initial x = 0 ! initialize only r0, because the other ones will be set in the algorithm seed_system%rr(:,0) = bb seed_system%tilde_r0 = bb ! init the variables seed_system%sigma = sigma seed_system%rho_old = 1.0 seed_system%alpha_old = 1.0 seed_system%alpha = 0.0 seed_system%omega = 1.0 END SUBROUTINE init_seed !> Deallocate the arrays in the seed system type. SUBROUTINE destroy_seed(seed_system) !> Deallocate the arrays of this system. TYPE(seed_system_type), INTENT(INOUT) :: seed_system ! deallocate the arrays DEALLOCATE(seed_system%uu) DEALLOCATE(seed_system%xx) DEALLOCATE(seed_system%rr) DEALLOCATE(seed_system%tilde_r0) DEALLOCATE(seed_system%gamma) DEALLOCATE(seed_system%gamma_p) DEALLOCATE(seed_system%gamma_pp) END SUBROUTINE destroy_seed !> Initialize the shifted systems. SUBROUTINE init_shift(lmax, vec_size, sigma, shift_system) !> Dimensionality of the GMRES algorithm. INTEGER, INTENT(IN) :: lmax !> Size of the vectors to initialize. INTEGER, INTENT(IN) :: vec_size !> Shifts \f$\sigma\f$ relative to the seed system. COMPLEX(dp), INTENT(IN) :: sigma(:) !> On output contains the initialized shifted systems. TYPE(shift_system_type), INTENT(OUT) :: shift_system(SIZE(sigma) - 1) !> counter over shifted systems INTEGER ishift !> dimension of the array mu INTEGER mu_size !> binomials \f$\begin{pmatrix}j \\ i\end{pmatrix}\f$ REAL(dp), ALLOCATABLE :: binomial(:) !> array to store \f$\sigma^n\f$ for \f$n = 0, \ldots, l_{\text{max}}\f$ COMPLEX(dp), ALLOCATABLE :: sigma_pow(:) !> counter variables to evaluate mu INTEGER ii, jj, offset, ij ! determine size of array mu mu_size = lmax * (lmax + 1) / 2 ! allocate helper arrays ALLOCATE(binomial(mu_size)) ALLOCATE(sigma_pow(0:lmax - 1)) ! / j \ j! ! evaluate binomials ( ) = ----------- ! \ i / i! (j - i)! DO jj = 0, lmax - 1 ! the ij index starts at this value offset = jj * (jj + 1) / 2 + 1 DO ii = 0, jj ! evaluate index in array ij = offset + ii ! trivial case i = 0: binomial = 1 IF (ii == 0) THEN binomial(ij) = 1 ! in general, the following is valid ! / j \ / j \ j - i + 1 ! ( ) = ( ) --------- ! \ i / \ i - 1 / i ELSE binomial(ij) = (binomial(ij - 1) * (jj - ii + 1)) / ii END IF END DO ! i END DO ! j ! loop over all shifted systems DO ishift = 1, SIZE(sigma) - 1 ! allocate arrays of given size ALLOCATE(shift_system(ishift)%uu(vec_size, 0:lmax)) ALLOCATE(shift_system(ishift)%xx(vec_size)) ALLOCATE(shift_system(ishift)%gamma(lmax)) ALLOCATE(shift_system(ishift)%gamma_p(lmax)) ALLOCATE(shift_system(ishift)%gamma_pp(lmax)) ! allocate array for mu ALLOCATE(shift_system(ishift)%mu(mu_size)) ! initialize arrays to 0 ! initialize only u0, because the other ones will be set in the algorithm shift_system(ishift)%uu(:,0) = 0 shift_system(ishift)%xx = 0 ! initialize the variables shift_system(ishift)%phi_old = 1.0 shift_system(ishift)%phi = 1.0 shift_system(ishift)%theta = 1.0 ! subtract the shift of the initial system ! shift_system(ishift)%sigma = sigma(ishift + 1) - sigma(1) shift_system(ishift)%sigma = sigma(1) - sigma(ishift + 1) ! construct sigma_pow array sigma_pow(0) = 1.0 DO ii = 1, lmax - 1 sigma_pow(ii) = shift_system(ishift)%sigma * sigma_pow(ii - 1) END DO ! i ! construct mu_ij DO jj = 0, lmax - 1 ! the ij index starts at this value offset = jj * (jj + 1) / 2 + 1 DO ii = 0, jj ! evaluate index in array ij = offset + ii ! / j \ j - i ! mu_ij = ( ) sigma ! \ i / shift_system(ishift)%mu(ij) = binomial(ij) * sigma_pow(jj - ii) END DO ! i END DO ! j END DO ! ishift DEALLOCATE(binomial) DEALLOCATE(sigma_pow) END SUBROUTINE init_shift !> Deallocate the arrays in the shifted system types. SUBROUTINE destroy_shift(shift_system) !> Deallocate the arrays in all of this types. TYPE(shift_system_type), INTENT(INOUT) :: shift_system(:) !> counter over shifted systems INTEGER ishift ! loop over shifted systems DO ishift = 1, SIZE(shift_system) ! deallocate arrays DEALLOCATE(shift_system(ishift)%uu) DEALLOCATE(shift_system(ishift)%xx) DEALLOCATE(shift_system(ishift)%mu) DEALLOCATE(shift_system(ishift)%gamma) DEALLOCATE(shift_system(ishift)%gamma_p) DEALLOCATE(shift_system(ishift)%gamma_pp) END DO ! ishift END SUBROUTINE destroy_shift !> The BiCG part of the BiCGstab algorithm. !! !! This subroutine implements the *Algorithm 3* of Frommer's paper. !! SUBROUTINE bicg_part(lmax, AA, seed_system, shift_system) !> Dimensionality of the GMRES algorithm. INTEGER, INTENT(IN) :: lmax !> Function pointer that applies the linear operator to a vector. INTERFACE SUBROUTINE AA(sigma, xx, Ax) USE kinds, ONLY: dp !> The shift of this system. COMPLEX(dp), INTENT(IN) :: sigma !> The input vector. COMPLEX(dp), INTENT(IN) :: xx(:) !> The operator applied to the vector. COMPLEX(dp), INTENT(OUT) :: Ax(:) END SUBROUTINE AA END INTERFACE !> Contains the current best guess for the solution of the linear !! system and the corresponding residual in the seed system. Updated !! to the next best guess at the end of the routine. TYPE(seed_system_type), INTENT(INOUT) :: seed_system !> Contains the current best guess for the solution of the linear !! system and the corresponding residual in the shifted system. Updated !! to the next best guess at the end of the routine. TYPE(shift_system_type), INTENT(INOUT), TARGET :: shift_system(:) !> pointer to the currently active shifted system TYPE(shift_system_type), POINTER :: active !> Size of the vectors to initialize. INTEGER vec_size !> Number of shifted systems INTEGER num_shift !> Counter for the shifted systems INTEGER ishift !> counter for previous iterations INTEGER ii !> counter for GMRES iteration INTEGER jj !> helper variable to store factor COMPLEX(dp) factor !> complex 1 COMPLEX(dp), PARAMETER :: one = CMPLX(1, 0) ! BLAS dot product routine COMPLEX(dp), EXTERNAL :: ZDOTU ! determine vector size vec_size = SIZE(seed_system%tilde_r0) ! determine number of shifted systems num_shift = SIZE(shift_system) ! ! Frommer's algorithm 3 ! Note: the L?? refers to the line numbers given in Frommer's paper ! we abbreviate the superscript sigma with ^ ! ! L3: rho_old = - omega * rho_old seed_system%rho_old = - seed_system%omega * seed_system%rho_old ! ! L4: loop over GMRES iterations DO jj = 0, lmax - 1 ! ! seed system ! ! L6: rho = (r_j, ~r_0), seed_system%rho = ZDOTU(vec_size, seed_system%rr(:,jj), 1, seed_system%tilde_r0, 1) ! beta = alpha * rho / rho_old, seed_system%beta = seed_system%alpha * seed_system%rho / seed_system%rho_old ! rho_old = rho seed_system%rho_old = seed_system%rho ! ! L7: loop over previous steps DO ii = 0, jj ! ! L8: u_i = r_i - beta u_i CALL ZSCAL(vec_size, -seed_system%beta, seed_system%uu(:,ii), 1) CALL ZAXPY(vec_size, one, seed_system%rr(:,ii), 1, seed_system%uu(:,ii), 1) ! END DO ! i ! ! L10: u_j+1 = A u_j CALL AA(seed_system%sigma, seed_system%uu(:,jj), seed_system%uu(:, jj + 1)) ! ! L11: alpha = rho / (u_j+1, ~r_0) seed_system%alpha = seed_system%rho & / ZDOTU(vec_size, seed_system%uu(:, jj + 1), 1, & seed_system%tilde_r0, 1) ! ! shifted system ! ! loop over all shifted systems DO ishift = 1, num_shift ! active => shift_system(ishift) ! ! L13: phi_new^ = (1 + alpha sigma) phi^ + alpha beta / alpha_old (phi_old^ - phi^) active%phi_new = (one + seed_system%alpha * active%sigma) * active%phi & + seed_system%alpha * seed_system%beta / seed_system%alpha_old & * (active%phi_old - active%phi) ! beta^ = (phi_old^ / phi^)**2 beta active%beta = (active%phi_old / active%phi)**2 * seed_system%beta ! alpha^ = (phi^ / phi_new^) alpha active%alpha = (active%phi / active%phi_new) * seed_system%alpha ! ! evaluate 1 / (theta^ phi^) used in L15 and L17 factor = 1.0 / (active%theta * active%phi) ! ! L14: loop over previous steps DO ii = 0, jj ! ! L15: u_i^ = r_i / (theta^ phi^) - beta^ u_i^ CALL ZSCAL(vec_size, -active%beta, active%uu(:,ii), 1) CALL ZAXPY(vec_size, factor, seed_system%rr(:,ii), 1, active%uu(:,ii), 1) ! END DO ! i ! ! L17: x^ = x^ + alpha^ u0^ CALL ZAXPY(vec_size, active%alpha, active%uu(:,0), 1, active%xx, 1) ! ! note - we shifted update of alpha after the loop to allow ! for systems with multiple shifts ! L18: phi_old^ = phi^ active%phi_old = active%phi ! phi^ = phi_new^ active%phi = active%phi_new ! ! note: we can use factor here, because phi_old = phi in L18 ! L19: u_j+1^ = r_j / (theta^ phi_old^) CALL ZCOPY(vec_size, seed_system%rr(:,jj), 1, active%uu(:, jj + 1), 1) CALL ZSCAL(vec_size, factor, active%uu(:, jj + 1), 1) ! END DO ! ishift ! ! this update must be done here instead of with the rest of ! line 18, because it acts on the seed_system and would affect ! the other shifted systems otherwise ! L18: alpha_old = alpha seed_system%alpha_old = seed_system%alpha ! ! L21: loop over previous iterations DO ii = 0, jj ! ! L22: r_i = r_i - alpha u_i+1 CALL ZAXPY(vec_size, -seed_system%alpha, seed_system%uu(:, ii + 1), 1, & seed_system%rr(:,ii), 1) ! END DO ! i ! ! L24: r_j+1 = A r_j CALL AA(seed_system%sigma, seed_system%rr(:,jj), seed_system%rr(:, jj + 1)) ! ! L25: x = x + alpha u0 CALL ZAXPY(vec_size, seed_system%alpha, seed_system%uu(:,0), 1, seed_system%xx, 1) ! ! now update shifted systems again DO ishift = 1, num_shift ! active => shift_system(ishift) ! ! L26: u_j+1^ = (u_j+1^ - r_j / (theta^ phi^)) / alpha^ - sigma u_j^ factor = -1.0 / (active%theta * active%phi) CALL ZAXPY(vec_size, factor, seed_system%rr(:,jj), 1, active%uu(:, jj + 1), 1) CALL ZSCAL(vec_size, 1.0 / active%alpha, active%uu(:, jj + 1), 1) CALL ZAXPY(vec_size, -active%sigma, active%uu(:,jj), 1, active%uu(:, jj + 1), 1) ! END DO ! ishift ! END DO ! j END SUBROUTINE bicg_part !> The MR part of the BiCGstab algorithm. !! !! This subroutine implements the *Algorithm 4* of Frommer's paper. !! SUBROUTINE mr_part(lmax, seed_system, shift_system) !> Dimensionality of the GMRES algorithm. INTEGER, INTENT(IN) :: lmax !> Contains the current best guess for the solution of the linear !! system and the corresponding residual in the seed system. Updated !! to the next best guess at the end of the routine. TYPE(seed_system_type), INTENT(INOUT) :: seed_system !> Contains the current best guess for the solution of the linear !! system and the corresponding residual in the shifted system. Updated !! to the next best guess at the end of the routine. TYPE(shift_system_type), INTENT(INOUT), TARGET :: shift_system(:) !> pointer to the currently active shifted system TYPE(shift_system_type), POINTER :: active !> Size of the vectors to initialize. INTEGER vec_size !> Number of shifted systems INTEGER num_shift !> Counter for the shifted systems INTEGER ishift !> counter for previous iterations INTEGER ii !> counter for GMRES iteration INTEGER jj !> helper to access the right element INTEGER offset, ij !> Temporary storage for prefactors COMPLEX(dp) factor !> The variable \f$\xi\f$ of Frommer's algorithm. COMPLEX(dp) xi !> The variable \f$\psi\f$ of Frommer's algorithm. COMPLEX(dp) psi !> The vector \f$\nu\f$ of Frommer's algorithm. COMPLEX(dp), ALLOCATABLE :: nu(:) !> The array \f$\tau\f$ of Frommer's algorithm. COMPLEX(dp), ALLOCATABLE :: tau(:) ! BLAS dot product routine COMPLEX(dp), EXTERNAL :: ZDOTU ! allocate vectors of appropriate size ALLOCATE(nu(lmax)) ALLOCATE(tau((lmax + 1) * (lmax + 2) / 2)) ! determine vector size vec_size = SIZE(seed_system%tilde_r0) ! determine number of shifted systems num_shift = SIZE(shift_system) ! ! Frommer's algorithm 4 ! Note: the L?? refers to the line numbers given in Frommer's paper ! we abbreviate the superscript sigma with ^ ! ! modified Gram-Schmidt orthogonalization ! ! L1: loop over GMRES iterations DO jj = 1, lmax ! offset = jj * (jj + 1) / 2 + 1 ! ! L2: loop over previous iterations DO ii = 1, jj - 1 ! ij = offset + ii ! ! L3: tau_ij = (r_j, r_i) / nu_i tau(ij) = ZDOTU(vec_size, seed_system%rr(:,jj), 1, seed_system%rr(:,ii), 1) / nu(ii) ! r_j = r_j - tau_ij r_i CALL ZAXPY(vec_size, -tau(ij), seed_system%rr(:,ii), 1, seed_system%rr(:,jj), 1) ! END DO ! i ! ! L5: nu_j = (r_j, r_j) nu(jj) = ZDOTU(vec_size, seed_system%rr(:,jj), 1, seed_system%rr(:,jj), 1) ! gamma_j' = (r_0, r_j) / nu(j) seed_system%gamma_p(jj) = ZDOTU(vec_size, seed_system%rr(:,0), 1, seed_system%rr(:,jj), 1) / nu(jj) ! END DO ! j ! ! L7: gamma_l = gamma_l' seed_system%gamma(lmax) = seed_system%gamma_p(lmax) ! omega = gamma_l seed_system%omega = seed_system%gamma(lmax) ! ! L8: sum over tau gamma DO jj = lmax - 1, 1, -1 ! ! L9: gamma_j = gamma_j' - sum_{i = j + 1}^l tau_ji gamma_i seed_system%gamma(jj) = seed_system%gamma_p(jj) DO ii = jj + 1, lmax ij = ii * (ii + 1) / 2 + jj + 1 seed_system%gamma(jj) = seed_system%gamma(jj) - tau(ij) * seed_system%gamma(ii) END DO ! i ! END DO ! j ! ! L11: sum over tau gamma DO jj = 1, lmax - 1 ! ! L12: gamma_j" = gamma_j+1 + sum_{i = j + 1}^{l-1} tau_ji gamma_i+1 seed_system%gamma_pp(jj) = seed_system%gamma(jj + 1) DO ii = jj + 1, lmax - 1 ij = ii * (ii + 1) / 2 + jj + 1 seed_system%gamma_pp(jj) = seed_system%gamma_pp(jj) + tau(ij) * seed_system%gamma(ii + 1) END DO ! i END DO ! j ! ! update step ! L14: x = x + gamma_1 r_0 CALL ZAXPY(vec_size, seed_system%gamma(1), seed_system%rr(:,0), 1, seed_system%xx, 1) ! u_0 = u_0 - gamma_l u_l CALL ZAXPY(vec_size, -seed_system%gamma(lmax), seed_system%uu(:,lmax), 1, seed_system%uu(:,0), 1) ! ! L15: loop over GMRES iterations DO jj = 1, lmax - 1 ! ! L16: u_0 = u_0 - gamma_j u_j CALL ZAXPY(vec_size, -seed_system%gamma(jj), seed_system%uu(:,jj), 1, seed_system%uu(:,0), 1) ! ! L17: x = x + gamma_j" r_j CALL ZAXPY(vec_size, seed_system%gamma_pp(jj), seed_system%rr(:,jj), 1, seed_system%xx, 1) ! END DO ! j ! ! shifted systems ! DO ishift = 1, num_shift ! active => shift_system(ishift) ! ! L20: call Horner's scheme to obtain psi^ and gamma_j^ CALL horner_scheme(seed_system%gamma, active%sigma, active%gamma, psi) ! ! L21: xi = theta^ phi^ xi = active%theta * active%phi ! theta^ = theta^ psi active%theta = active%theta * psi ! ! L22: loop over GMRES iterations DO jj = 1, lmax ! ! L23: gamma_j'^ = sum_{i = j}^l mu_{j - 1, i - 1} gamma_i^ active%gamma_p(jj) = 0 DO ii = jj, lmax ij = (ii - 1) * ii / 2 + jj active%gamma_p(jj) = active%gamma_p(jj) + active%mu(ij) * active%gamma(ii) END DO ! i ! END DO ! j ! ! L25: loop over GMRES iterations DO jj = 1, lmax - 1 ! ! L26: gamma_j"^ = gamma_j+1'^ + sum_{i = j + 1}^{l - 1} tau_ji gamma_i+1'^ active%gamma_pp(jj) = active%gamma_p(jj + 1) DO ii = jj + 1, lmax - 1 ij = ii * (ii + 1) / 2 + jj + 1 active%gamma_pp(jj) = active%gamma_pp(jj) + tau(ij) * active%gamma_p(ii + 1) END DO ! i ! END DO ! j ! ! note: the residuum update is done later, outside of the loop ! L28: x^ = x^ + gamma_1'^ / xi r_0 factor = active%gamma_p(1) / xi CALL ZAXPY(vec_size, factor, seed_system%rr(:,0), 1, active%xx, 1) ! u_0^ = u_0^ - gamma_l u_l^ ! note: the latter equation is incorrect in Frommer's paper ! where u_l instead of u_l^ is used CALL ZAXPY(vec_size, -seed_system%gamma(lmax), active%uu(:,lmax), 1, & active%uu(:,0), 1) ! ! L29: loop over GMRES iterations DO jj = 1, lmax - 1 ! ! L30: u_0^ = u_0^ - gamma_j u_j^ CALL ZAXPY(vec_size, -seed_system%gamma(jj), active%uu(:,jj), 1, & active%uu(:,0), 1) ! ! L31: x^ = x^ + (gamma_j"^ / xi) r_j factor = active%gamma_pp(jj) / xi CALL ZAXPY(vec_size, factor, seed_system%rr(:,jj), 1, active%xx, 1) ! END DO ! j ! ! L34: u_0^ = u_0^ / psi factor = 1 / psi CALL ZSCAL(vec_size, factor, active%uu(:,0), 1) ! END DO ! ishift ! ! now do the delayed residuum update ! note that the effect of L28 is to shift the upper boundary of the loop ! ! L29: loop over GMRES iterations DO jj = 1, lmax ! ! L32: r_0 = r_0 - gamma_j' r_j CALL ZAXPY(vec_size, -seed_system%gamma_p(jj), seed_system%rr(:,jj), 1, & seed_system%rr(:,0), 1) ! END DO ! j ! free memory DEALLOCATE(tau) DEALLOCATE(nu) END SUBROUTINE mr_part !> The Horner's scheme part of the BiCGstab algorithm. !! !! This subroutine implements the *Algorithm 5* of Frommer's paper. !! !! Computes \f$\gamma^\sigma_j$ and $\psi^\sigma$ for the shifted !! system from the values in the seed system and the shift !! \f$\sigma\f$. !! SUBROUTINE horner_scheme(seed_gamma, sigma, shift_gamma, psi) !> The \f$gamma\f$ values in the seed system. COMPLEX(dp), INTENT(IN) :: seed_gamma(:) !> The shift relative to the seed system. COMPLEX(dp), INTENT(IN) :: sigma !> The \f$\gamma^\sigma\f$ values in the shifted system. COMPLEX(dp), INTENT(OUT) :: shift_gamma(:) !> The \f$\psi^\sigma\f$ value for the shifted system. COMPLEX(dp), INTENT(OUT) :: psi !> dimension of the gamma arrays INTEGER lmax !> loop variables INTEGER ii, jj !> determine size of the array lmax = SIZE(seed_gamma) ! ! Frommer's algorithm 5 ! Note: we abbreviate the superscript sigma with ^ ! ! gamma_l^ = -gamma_l shift_gamma(lmax) = -seed_gamma(lmax) ! ! loop over GMRES iterations DO jj = lmax - 1, 1, -1 ! ! gamma_j^ = -sigma gamma_j+1^ - gamma_j shift_gamma(jj) = -sigma * shift_gamma(jj + 1) - seed_gamma(jj) ! END DO ! j ! ! psi^ = -sigma gamma_1^ + 1 psi = -sigma * shift_gamma(1) + 1 ! DO ii = 1, lmax - 1 DO jj = lmax - 1, ii, -1 ! ! gamma_j^ = -sigma gamma_j+1^ + gamma_j^ shift_gamma(jj) = -sigma * shift_gamma(jj + 1) + shift_gamma(jj) ! END DO ! j END DO ! i ! ! loop over GMRES iterations DO jj = 1, lmax ! ! gamma_j^ = -gamma_j^ / psi^ shift_gamma(jj) = -shift_gamma(jj) / psi ! END DO ! j END SUBROUTINE horner_scheme END MODULE bicgstab_module