!
  !-----------------------------------------------------------------------
  subroutine bcgsolve_all (Ax, e, b, x, h_diag, &
     ndmx, ndim, ethr, ik, kter, conv_root, anorm, nbnd, g2kin, vr, evq)
  !-----------------------------------------------------------------------
  !
  !  rewritten by FG following the book
  !  "Templates for the solution of linear systems: building blocks
  !  for iterative methods", by R. Barret et al, SIAM (http://www.siam.org/books)
  !  pag. 13. The only changes are:
  !  1. the product r^T*z on line 4 has been rewritten as z^T*r
  !  t obe consistent with the BiConjugate gradient method on pag. 20
  !  (look for [*] below).
  !  2. Instead of introducing p in the if/then/else, we overwrite z.
  !  Therefore our z at some point corresponds to their p.
  !
  !-----------------------------------------------------------------------
  !
  !     iterative solution of the linear system:
  !
  !                 ( h - e + Q ) * x = b                      (1)
  !
  !                 where h is a complex hermitean matrix, e is a real sca
  !                 x and b are complex vectors
  !
  !     on input:
  !                 Ax    EXTERNAL  name of a subroutine:
  !                          Ax(ndim,psi,psip)
  !                          Calculates  H*psi products.
  !                          Vectors psi and psip should be dimensined
  !                          (ndmx,nvec). nvec=1 is used!
  !
  !                 cg_psi   EXTERNAL  name of a subroutine:
  !                          g_psi(ndmx,ndim,notcnv,psi,e)
  !                          which calculates (h-e)^-1 * psi, with
  !                          some approximation, e.g. (diag(h)-e)
  !
  !                 e        real     unperturbed eigenvalue.
  !
  !                 x     contains an estimate of the solution
  !                          vector.
  !
  !                 b    contains the right hand side vector
  !                          of the system.
  !
  !                 ndmx     integer row dimension of x, ecc.
  !
  !                 ndim     integer actual row dimension of x
  !
  !                 ethr     real     convergence threshold. solution
  !                          improvement is stopped when the error in
  !                          eq (1), defined as l.h.s. - r.h.s., becomes
  !                          less than ethr in norm.
  !
  !     on output:  x     contains the refined estimate of the
  !                          solution vector.
  !
  !                 b    is corrupted on exit
  !
  !   revised (extensively)       6 Apr 1997 by A. Dal Corso & F. Mauri
  !   revised (to reduce memory) 29 May 2004 by S. de Gironcoli
  !
  !-----------------------------------------------------------------------
  !
  use parameters, only : DP, nbnd_occ
  use gspace, only : nr, ngm
  use constants
  implicit none
  integer :: i
  complex(DP) :: vr(nr)
  real(DP) :: g2kin (ngm)
  complex(kind=DP) :: evq (ngm, nbnd_occ)
  !
  !   first the I/O variables
  !
  integer :: ndmx, & ! input: the maximum dimension of the vectors
             ndim, & ! input: the actual dimension of the vectors
             kter, & ! output: counter on iterations
             nbnd, & ! input: the number of bands
             ik      ! input: the k point

  real(kind=DP) :: &
             e(nbnd), &        ! input: the actual eigenvalue
             anorm,   &        ! output: the norm of the error in the solution
             h_diag(ndmx,nbnd), & ! input: preconditioner
             ethr               ! input: the required precision
  complex(kind=DP) :: &
             x (ndmx, nbnd), & ! output: the solution of the linear syst
             b (ndmx, nbnd)    ! input: the known term
  logical :: conv_root         ! output: if true the root is converged
  external Ax                  ! input: the routine computing Ax
  !
  ! local variables
  !
  integer, parameter :: maxter = 200
  integer :: iter, ibnd, lbnd
  complex(kind=DP), allocatable :: r  (:,:), q  (:,:), z  (:,:), zold  (:,:)
  complex(kind=DP), allocatable :: rt (:,:), qt (:,:), zt (:,:), ztold (:,:)
  complex(kind=DP) :: a, c, beta, alpha, ZDOTC
  complex(kind=DP) :: rho (nbnd), rhoold (nbnd)
  real(kind=DP) :: eu (nbnd)
  integer :: conv (nbnd)
  real(kind=DP) :: kter_eff
  !
  allocate ( r (ndmx,nbnd), q (ndmx,nbnd), z (ndmx,nbnd), zold (ndmx ,nbnd) )    
  allocate ( rt(ndmx,nbnd), qt(ndmx,nbnd), zt(ndmx,nbnd), ztold(ndmx ,nbnd) )    
  !
  kter_eff = zero
  conv = 0
  !
  do iter = 1, maxter
     !
     ! the residuals r and r~ (=r at the first iter)
     !
     if (iter .eq. 1) then
   !@
        call Ax ( x, r , e, ik, nbnd, g2kin, vr, evq, +1)
        call Ax ( x, r , e, ik, nbnd, g2kin, vr, evq, 0)  
   !@
        do ibnd = 1, nbnd
           call ZAXPY (ndim, - cone, b(1,ibnd), 1, r (1,ibnd), 1)
           call DSCAL (2 * ndim, - one, r  (1, ibnd), 1)
        enddo
        !
        rt = r
        !
     endif
     !
     ! preconditioned residual vectors z and z~ 
     ! (note that the preconditioner is diagonal, then Hermitian)
     !
     lbnd = 0
     do ibnd = 1, nbnd
        if (conv (ibnd) .eq.0) then
           lbnd = lbnd+1
           call ZCOPY (ndim, r  (1, ibnd), 1, z  (1, ibnd), 1)
           call ZCOPY (ndim, rt (1, ibnd), 1, zt (1, ibnd), 1)
           do i = 1, ndim
             z  (i, ibnd) = z  (i, ibnd) * h_diag (i, ibnd)
             zt (i, ibnd) = zt (i, ibnd) * h_diag (i, ibnd)
           enddo
         rho(lbnd) = ZDOTC (ndim, z(1,ibnd), 1, rt(1,ibnd), 1)  ! [*]
  !@@    rho(lbnd) = ZDOTC (ndim, zt(1,ibnd), 1, r(1,ibnd), 1)  ! [*]
        endif
     enddo
     !
     ! convergence check
     !
     kter_eff = kter_eff + float (lbnd) / float (nbnd)
     do ibnd = nbnd, 1, -1
        if (conv(ibnd).eq.0) then
           rho(ibnd) = rho(lbnd)
           lbnd = lbnd -1
           anorm = sqrt ( abs ( rho (ibnd) ) )
           if (anorm.lt.ethr) conv (ibnd) = 1
        endif
     enddo
     conv_root = .true.
     do ibnd = 1, nbnd
        conv_root = conv_root .and. (conv (ibnd) .eq.1)
     enddo
     if (conv_root) goto 100
     !
     ! compute the step direction z
     !
     lbnd = 0
     do ibnd = 1, nbnd
        if (conv (ibnd) .eq.0) then
           !
           if (iter.ne.1) then
              beta = rho (ibnd) / rhoold (ibnd)
              call ZAXPY (ndim, beta, zold  (1, ibnd), 1, z  (1, ibnd), 1)
              call ZAXPY (ndim, beta, ztold (1, ibnd), 1, zt (1, ibnd), 1)
           endif
           !
           ! here zold, ztold are used as auxiliary vectors 
           ! in order to efficiently compute q = A*z and qt = A*zt
           ! they are later set to the current (becoming old) values of z, zt 
           !
           lbnd = lbnd + 1
           call ZCOPY (ndim, z  (1, ibnd), 1, zold  (1, lbnd), 1)
           call ZCOPY (ndim, zt (1, ibnd), 1, ztold (1, lbnd), 1)
           eu (lbnd) = e (ibnd)
        endif
     enddo
     !
     ! q = A*z, qt = A*zt
     !
  !@@@
   ! call Ax ( zold , q , eu, ik, lbnd, g2kin, vr, evq, +1)
   ! call Ax ( ztold, qt, eu, ik, lbnd, g2kin, vr, evq, -1)
     call Ax ( zold , q , eu, ik, lbnd, g2kin, vr, evq, 0)
     call Ax ( ztold, qt, eu, ik, lbnd, g2kin, vr, evq, 0)
  !@@@
     !
     lbnd=0
     do ibnd = 1, nbnd
        if (conv (ibnd) .eq.0) then
           lbnd=lbnd+1
           !
           a = ZDOTC (ndim, z (1,ibnd), 1, rt(1,ibnd), 1)
           c = ZDOTC (ndim, zt(1,ibnd), 1, q (1,lbnd), 1)
           alpha = a / c
           !
           call ZAXPY (ndim,   alpha, z (1,ibnd), 1, x (1,ibnd), 1)
           call ZAXPY (ndim, - alpha, q (1,lbnd), 1, r (1,ibnd), 1)
           call ZAXPY (ndim, - alpha, qt(1,lbnd), 1, rt(1,ibnd), 1)
           !
           !    save current (now old) z and rho for later use
           ! 
           call ZCOPY (ndim, z (1,ibnd), 1, zold (1,ibnd), 1)
           call ZCOPY (ndim, zt(1,ibnd), 1, ztold(1,ibnd), 1)
           rhoold (ibnd) = rho (ibnd)
        endif
     enddo
  enddo
100 continue
  kter = kter_eff
  deallocate (r , q , z , zold )
  deallocate (rt, qt, zt, ztold)
  !
  return
  end subroutine bcgsolve_all
  !