! ! Copyright (C) 2001-2004 PWSCF group ! This file is distributed under the terms of the ! GNU General Public License. See the file `License' ! in the root directory of the present distribution, ! or http://www.gnu.org/copyleft/gpl.txt . ! #include "f_defs.h" ! !---------------------------------------------------------------------------- MODULE para_const !---------------------------------------------------------------------------- ! SAVE ! INTEGER, PARAMETER :: & maxproc = 128 ! maximum number of processors END MODULE para_const ! ! !---------------------------------------------------------------------------- MODULE pfft !---------------------------------------------------------------------------- ! ! ... parallel fft information for the dense grid ! USE para_const ! SAVE ! INTEGER :: & npp(maxproc), &! number of plane per processor ncp(maxproc), &! number of (density) columns per proc ncp0(maxproc), &! starting column for each processor ncplane, &! number of columns in a plane nct, &! total number of non-zero columns nxx ! local fft data dim ! END MODULE pfft ! ! !---------------------------------------------------------------------------- MODULE pffts !---------------------------------------------------------------------------- ! ! ... parallel fft information for the smooth grid ! USE para_const ! SAVE ! INTEGER :: & nkcp(maxproc) ! number of (wfs) columns per processor INTEGER :: & npps(maxproc), &! number of plane per processor ncps(maxproc), &! number of (density) columns per proc ncp0s(maxproc), &! starting column for each processor ncplanes, &! number of columns in a plane ncts, &! total number of non-zero columns nxxs ! local fft data dim ! END MODULE pffts ! ! !---------------------------------------------------------------------------- MODULE para !---------------------------------------------------------------------------- ! ! ... tis module contains data and methods needed for parallel version of ! ... PWscf code ! USE pfft USE pffts !@@ USE kinds, ONLY : DP USE parameters, ONLY : DP ! SAVE ! ! ... number of processors = # of tasks ! ! ... general parallel information ! INTEGER :: & npool = 1, &! number of pools nprocp = 1, &! number of processors in this task pool mypool = 1, &! identifier of this task pool me = 1, &! identifier of this task within his pool kunit = 1 ! granularity of k-point distribution ! END MODULE para ! ! ... here are all parallel subroutines (wrappers to MPI calls) used ! ... by the PWscf code ! ! ... "reduce"-like subroutines ! !---------------------------------------------------------------------------- SUBROUTINE reduce( dim, ps ) !---------------------------------------------------------------------------- ! ! ... sums a distributed variable ps(dim) over the processors. ! ... This version uses a fixed-length buffer of appropriate (?) dim ! ... uses SHMEM for the T3D/T3E, MPI otherwhise ! #if defined (__PARA) ! USE mp_global, ONLY : intra_pool_comm, my_pool_id, nproc_pool, npool USE mp, ONLY : mp_barrier !@@ USE kinds, ONLY : DP USE parameters, ONLY : DP USE parallel_include # if defined (__SHMEM) USE para, ONLY : nprocp # endif ! IMPLICIT NONE ! INTEGER :: dim REAL (KIND=DP) :: ps(dim) INTEGER :: info, n, nbuf INTEGER, PARAMETER :: maxb = 10000 # if (defined __SHMEM && defined __ALTIX) || (defined __SHMEM && defined __ORIGIN) INTEGER :: sym_len LOGICAL :: first REAL (KIND=DP) :: buff(*), snd_buff(*) POINTER (buff_p, buff), (snd_buff_p, snd_buff) COMMON /sym_heap1/ buff_p, snd_buff_p, sym_len, first #else REAL (KIND=DP) :: buff(maxb) # endif ! # if defined (__SHMEM) ! ! ... SHMEM specific ! INCLUDE 'mpp/shmem.fh' # if defined (__ALTIX) || defined (__ORIGIN) INTEGER :: pWrkSync(SHMEM_REDUCE_SYNC_SIZE), & & pWrkData(1024*1024), start DATA pWrkSync /SHMEM_REDUCE_SYNC_SIZE*SHMEM_SYNC_VALUE/ DATA pWrkData / 1048576 * 0 / # else INTEGER :: pWrkSync, pWrkData, start COMMON / SH_SYNC / pWrkSync(SHMEM_BARRIER_SYNC_dim) COMMON / SH_DATA / pWrkData(1024*1024) DATA pWrkData / 1048576 * 0 / DATA pWrkSync / SHMEM_BARRIER_SYNC_dim * SHMEM_SYNC_VALUE / !DIR$ CACHE_ALIGN /SH_SYNC/ !DIR$ CACHE_ALIGN /SH_DATA/ # endif ! # endif ! ! IF ( dim <= 0 .OR. nproc_pool <= 1 ) RETURN ! !@ CALL start_clock( 'reduce' ) ! ! ... syncronize processes - maybe unneeded on T3D but necessary on T3E !!! ! CALL mp_barrier( intra_pool_comm ) ! nbuf = dim / maxb ! # if defined (__SHMEM) # if defined (__ALTIX) || defined (__ORIGIN) IF (dim .GT. sym_len) THEN IF (sym_len .NE. 0) THEN CALL shpdeallc( snd_buff_p, info, -1 ) END IF sym_len = dim CALL shpalloc( snd_buff_p, 2*sym_len, info, -1 ) END IF IF (first .NE. .TRUE.) THEN CALL shpalloc( buff_p, 2*maxb, info, -1 ) first = .TRUE. END IF snd_buff(1:dim) = ps(1:dim) # endif ! start = my_pool_id * nproc_pool ! # endif ! DO n = 1, nbuf ! # if defined (__SHMEM) ! # if defined (__ALTIX) || defined (__ORIGIN) CALL SHMEM_REAL8_SUM_TO_ALL( buff, snd_buff(1+(n-1)*maxb), & maxb, start, 0, nprocp, pWrkData, pWrkSync ) # else CALL SHMEM_REAL8_SUM_TO_ALL( buff, ps(1+(n-1)*maxb), & maxb, start, 0, nprocp, pWrkData, pWrkSync ) #endif ! # else ! CALL MPI_ALLREDUCE( ps(1+(n-1)*maxb), buff, maxb, MPI_REAL8, & MPI_SUM, intra_pool_comm, info ) ! CALL errore( 'reduce', 'error in allreduce1', info ) ! # endif ! ps((1+(n-1)*maxb):(n*maxb)) = buff(1:maxb) ! END DO ! ! ... possible remaining elements < maxb ! IF ( ( dim - nbuf * maxb ) > 0 ) THEN ! # if defined (__SHMEM) ! # if defined (__ALTIX) || defined (__ORIGIN) CALL SHMEM_REAL8_SUM_TO_ALL( buff, snd_buff(1+nbuf*maxb), & & (dim-nbuf*maxb), start, 0, nprocp, & & pWrkData, pWrkSync ) # else CALL SHMEM_REAL8_SUM_TO_ALL( buff, ps(1+nbuf*maxb), (dim-nbuf*maxb), & start, 0, nprocp, pWrkData, pWrkSync ) # endif ! # else ! CALL MPI_ALLREDUCE( ps(1+nbuf*maxb), buff, (dim-nbuf*maxb), MPI_REAL8, & MPI_SUM, intra_pool_comm, info ) ! CALL errore( 'reduce', 'error in allreduce2', info ) ! # endif ! ps((1+nbuf*maxb):dim) = buff(1:(dim-nbuf*maxb)) ! END IF ! !@ CALL stop_clock( 'reduce' ) ! #endif ! RETURN ! END SUBROUTINE reduce ! !---------------------------------------------------------------------------- SUBROUTINE ireduce( dim, is ) !---------------------------------------------------------------------------- ! ! ... sums a distributed variable is(dim) over the processors. ! #if defined (__PARA) ! USE mp_global, ONLY : intra_pool_comm, nproc_pool, npool USE mp, ONLY : mp_barrier USE parallel_include ! IMPLICIT NONE ! INTEGER :: dim, is(dim) INTEGER :: info, n, m, nbuf INTEGER, PARAMETER :: maxi = 500 INTEGER :: buff(maxi) ! ! IF ( dim <= 0 .OR. nproc_pool <= 1 ) RETURN ! ! ... syncronize processes ! CALL mp_barrier( intra_pool_comm ) ! nbuf = dim / maxi ! DO n = 1, nbuf ! CALL MPI_ALLREDUCE( is(1+(n-1)*maxi), buff, maxi, MPI_INTEGER, & MPI_SUM, intra_pool_comm, info ) ! CALL errore( 'ireduce', 'error in allreduce 1', info ) ! is((1+(n-1)*maxi):(n*maxi)) = buff(1:maxi) ! END DO ! ! ... possible remaining elements < maxi ! IF ( ( dim - nbuf * maxi ) > 0 ) THEN ! CALL MPI_ALLREDUCE( is(1+nbuf*maxi), buff, (dim-nbuf*maxi), MPI_INTEGER, & MPI_SUM, intra_pool_comm, info ) ! CALL errore( 'reduce', 'error in allreduce 2', info ) ! is((1+nbuf*maxi):dim) = buff(1:(dim-nbuf*maxi)) ! END IF ! #endif ! RETURN ! END SUBROUTINE ireduce ! !------------------------------------------------------------------------ SUBROUTINE poolreduce( dim, ps ) !----------------------------------------------------------------------- ! ! ... Sums a distributed variable ps(dim) over the pools. ! ... This MPI-only version uses a fixed-length buffer ! #if defined (__PARA) ! USE mp_global, ONLY : inter_pool_comm, intra_image_comm, & my_pool_id, nproc_pool, npool USE mp, ONLY : mp_barrier !@@ USE kinds, ONLY : DP USE parameters, ONLY : DP USE parallel_include ! IMPLICIT NONE ! INTEGER :: dim REAL (KIND=DP) :: ps(dim) INTEGER, PARAMETER :: maxb = 10000 REAL (KIND=DP) :: buff(maxb) INTEGER :: info, nbuf, n ! ! IF ( dim <= 0 .OR. npool <= 1 ) RETURN ! !@ CALL start_clock( 'poolreduce' ) ! ! ... MPI syncronize processes ! CALL mp_barrier( intra_image_comm ) ! nbuf = dim / maxb ! DO n = 1, nbuf ! CALL MPI_ALLREDUCE( ps(1+(n-1)*maxb), buff, maxb, MPI_REAL8, & MPI_SUM, inter_pool_comm, info ) ! CALL errore( 'poolreduce', 'info<>0 at allreduce1', info ) ! ps((1+(n-1)*maxb):(n*maxb)) = buff(1:maxb) !CALL DCOPY( maxb, buff, 1, ps(1+(n-1)*maxb), 1 ) ! END DO ! IF ( ( dim - nbuf * maxb ) > 0 ) THEN ! CALL MPI_ALLREDUCE( ps(1+nbuf*maxb), buff, (dim-nbuf*maxb), MPI_REAL8, & MPI_SUM, inter_pool_comm, info ) ! CALL errore( 'poolreduce', 'info<>0 at allreduce2', info ) ! ps((1+nbuf*maxb):dim) = buff(1:(dim-nbuf*maxb)) !CALL DCOPY( dim-nbuf*maxb, buff, 1, ps(1+nbuf*maxb), 1 ) ! END IF ! !@ CALL stop_clock( 'poolreduce' ) ! #endif ! RETURN ! END SUBROUTINE poolreduce ! ! ... "gather"-like subroutines ! !---------------------------------------------------------------------------- SUBROUTINE gather( f_in, f_out ) !---------------------------------------------------------------------------- ! ! ... gathers nprocp distributed data on the first processor of every pool ! ! ... REAL*8 f_in = distributed variable (nxx) ! ... REAL*8 f_out = gathered variable (nrx1*nrx2*nrx3) ! #if defined (__PARA) ! USE pfft, ONLY : ncplane, npp, nxx USE mp_global, ONLY : intra_pool_comm, nproc_pool, me_pool, root_pool USE mp, ONLY : mp_barrier !@@ USE kinds, ONLY : DP USE parameters, ONLY : DP USE parallel_include ! IMPLICIT NONE ! REAL (KIND=DP) :: f_in(nxx), f_out(*) INTEGER :: proc, info INTEGER :: displs(0:nproc_pool-1), recvcount(0:nproc_pool-1) ! ! !@ CALL start_clock( 'gather' ) ! DO proc = 0, ( nproc_pool - 1 ) ! recvcount(proc) = ncplane * npp(proc+1) ! IF ( proc == 0 ) THEN ! displs(proc) = 0 ! ELSE ! displs(proc) = displs(proc-1) + recvcount(proc-1) ! END IF ! END DO ! CALL mp_barrier( intra_pool_comm ) ! CALL MPI_GATHERV( f_in, recvcount(me_pool), MPI_REAL8, f_out, & recvcount, displs, MPI_REAL8, root_pool, & intra_pool_comm, info ) ! CALL errore( 'gather', 'info<>0', info ) ! !@ CALL stop_clock( 'gather' ) ! #endif ! RETURN ! END SUBROUTINE gather ! !----------------------------------------------------------------------- SUBROUTINE cgather_sym( f_in, f_out ) !----------------------------------------------------------------------- ! ! ... gather complex data for symmetrization (in phonon code) ! ... COMPLEX*16 f_in = distributed variable (nrxx) ! ... COMPLEX*16 f_out = gathered variable (nrx1*nrx2*nrx3) ! #if defined (__PARA) ! USE pfft, ONLY : ncplane, npp, nxx USE mp_global, ONLY : intra_pool_comm, intra_image_comm, & nproc_pool, me_pool USE mp, ONLY : mp_barrier USE parallel_include ! IMPLICIT NONE ! COMPLEX(KIND=DP) :: f_in(nxx), f_out(*) INTEGER :: proc, info INTEGER :: displs(0:nproc_pool-1), recvcount(0:nproc_pool-1) ! ! !@ CALL start_clock( 'cgather' ) ! DO proc = 0, ( nproc_pool - 1 ) ! recvcount(proc) = 2 * ncplane * npp(proc+1) ! IF ( proc == 0 ) THEN ! displs(proc) = 0 ! ELSE ! displs(proc) = displs(proc-1) + recvcount(proc-1) ! END IF ! END DO ! CALL mp_barrier( intra_pool_comm ) ! CALL MPI_ALLGATHERV( f_in, recvcount(me_pool), MPI_REAL8, & f_out, recvcount, displs, MPI_REAL8, & intra_pool_comm, info ) ! CALL errore( 'cgather_sym', 'info<>0', info ) ! CALL mp_barrier( intra_image_comm ) ! !@ CALL stop_clock( 'cgather' ) ! #endif ! RETURN ! END SUBROUTINE cgather_sym ! ! ... "scatter"-like subroutines ! !---------------------------------------------------------------------------- SUBROUTINE scatter( f_in, f_out ) !---------------------------------------------------------------------------- ! ! ... scatters data from the first processor of every pool ! ! ... REAL*8 f_in = gathered variable (nrx1*nrx2*nrx3) ! ... REAL*8 f_out = distributed variable (nxx) ! #if defined (__PARA) ! USE pfft, ONLY : ncplane, npp, nxx USE mp_global, ONLY : intra_pool_comm, nproc_pool, & me_pool, root_pool USE mp, ONLY : mp_barrier !@@ USE kinds, ONLY : DP USE parameters, ONLY : DP USE parallel_include ! IMPLICIT NONE ! REAL (KIND=DP) :: f_in(*), f_out(nxx) INTEGER :: proc, info INTEGER :: displs(0:nproc_pool-1), sendcount(0:nproc_pool-1) ! ! !@ CALL start_clock( 'scatter' ) ! DO proc = 0, ( nproc_pool - 1 ) ! sendcount(proc) = ncplane * npp(proc+1) ! IF ( proc == 0 ) THEN ! displs(proc) = 0 ! ELSE ! displs(proc) = displs(proc-1) + sendcount(proc-1) ! END IF ! END DO ! CALL mp_barrier( intra_pool_comm ) ! CALL MPI_SCATTERV( f_in, sendcount, displs, MPI_REAL8, & f_out, sendcount(me_pool), MPI_REAL8, & root_pool, intra_pool_comm, info ) ! CALL errore( 'scatter', 'info<>0', info ) ! IF ( sendcount(me_pool) /= nxx ) f_out(sendcount(me_pool)+1:nxx) = 0.D0 ! !@ CALL stop_clock( 'scatter' ) ! #endif ! RETURN ! END SUBROUTINE scatter ! !---------------------------------------------------------------------------- SUBROUTINE poolscatter( nsize, nkstot, f_in, nks, f_out ) !---------------------------------------------------------------------------- ! ! ... This routine scatters a quantity ( typically the eigenvalues ) ! ... among the pools. ! ... On input, f_in is required only on the first node of the first pool. ! ... f_in and f_out may coincide. ! ... Not a smart implementation! ! #if defined (__PARA) ! !@@ USE kinds, ONLY : DP USE parameters, ONLY : DP USE mp_global, ONLY : intra_pool_comm, inter_pool_comm, & my_pool_id, npool, me_pool, root_pool USE para, ONLY : kunit USE mp, ONLY : mp_bcast ! IMPLICIT NONE ! INTEGER :: nsize, nkstot, nks ! first dimension of vectors f_in and f_out ! number of k-points per pool ! total number of k-points REAL (KIND=DP) :: f_in(nsize,nkstot), f_out(nsize,nks) ! input ( contains values for all k-point ) ! output ( only for k-points of mypool ) INTEGER :: rest, nbase ! the rest of the integer division nkstot / npo ! the position in the original list ! ! ! ... copy from the first node of the first pool ! ... to the first node of all the other pools ! IF ( me_pool == root_pool ) & CALL mp_bcast( f_in, root_pool, inter_pool_comm ) ! ! ... distribute the vector on the first node of each pool ! rest = nkstot / kunit - ( nkstot / kunit / npool ) * npool ! nbase = nks * my_pool_id ! IF ( ( my_pool_id + 1 ) > rest ) nbase = nbase + rest * kunit ! f_out(:,1:nks) = f_in(:,(nbase+1):(nbase+nks)) !CALL DCOPY( nsize * nks, f_in(1,nbase+1), 1, f_out, 1 ) ! ! ... copy from the first node of every pool ! ... to the other nodes of every pool ! CALL mp_bcast( f_out, root_pool, intra_pool_comm ) ! #endif ! RETURN ! END SUBROUTINE poolscatter ! !---------------------------------------------------------------------------- SUBROUTINE fft_scatter1( f_in, nrx3, nxx_, f_aux, ncp_, npp_, sign ) !---------------------------------------------------------------------------- ! ! ... transpose the fft grid across nodes ! ! ... a) From columns to planes (sign > 0) ! ! ... "columns" (or "pencil") representation: ! ! ... processor "me" has ncp_(me) contiguous columns along z ! ... Each column has nrx3 elements for a fft of order nr3 ! ... nrx3 can be = nr3 + 1 in order to reduce memory conflicts. ! ! ... The transpose take places in two steps: ! ! ... 1) on each processor the columns are divided into slices along z ! ... that are stored contiguously. On processor "me", slices for ! ... processor "proc" are npp_(proc)*ncp_(me) big ! ... 2) all processors communicate to exchange slices ! ... (all columns with z in the slice belonging to "me" ! ... must be received, all the others must be sent to "proc") ! ! ... Finally one gets the "planes" representation: ! ! ... processor "me" has npp_(me) complete xy planes ! ! ... b) From planes to columns (sign < 0) ! ! ... Quite the same in the opposite direction ! ! ... The output is overwritten on f_in ; f_aux is used as work space ! #if defined (__PARA) ! USE para_const, ONLY : maxproc USE mp_global, ONLY : intra_pool_comm, nproc_pool, me_pool USE mp, ONLY : mp_barrier USE parallel_include ! IMPLICIT NONE ! INTEGER :: nrx3, nxx_, sign, ncp_(maxproc), npp_(maxproc) REAL (KIND=DP) :: f_in(2*nxx_), f_aux(2*nxx_) INTEGER :: dest, from, k, proc, ierr INTEGER :: offset1(0:maxproc-1), sendcount(0:maxproc-1), & sdispls(0:maxproc-1), recvcount(0:maxproc-1), & rdispls(0:maxproc-1) ! ! IF ( nproc_pool == 1 ) RETURN ! !@ CALL start_clock( 'fft_scatter' ) ! ! ... sendcount(proc): amount of data processor "me" must send to processor ! ... recvcount(proc): amount of data processor "me" must receive from ! DO proc = 0, ( nproc_pool - 1 ) ! sendcount(proc) = 2 * npp_(proc+1) * ncp_(me_pool+1) recvcount(proc) = 2 * npp_(me_pool+1) * ncp_(proc+1) ! END DO ! ! ... offset1(proc) is used to locate the slices to be sent to proc ! ... sdispls(proc)+1 is the beginning of data that must be sent to proc ! ... rdispls(proc)+1 is the beginning of data that must be received from proc ! offset1(0) = 1 sdispls(0) = 0 rdispls(0) = 0 ! DO proc = 1, ( nproc_pool - 1 ) ! offset1(proc) = offset1(proc-1) + 2 * npp_(proc) sdispls(proc) = sdispls(proc-1) + sendcount(proc-1) rdispls(proc) = rdispls(proc-1) + recvcount(proc-1) ! END DO ! ierr = 0 ! IF ( sign > 0 ) THEN ! ! ... "forward" scatter from columns to planes ! ! ... step one: store contiguously the slices ! DO proc = 0, ( nproc_pool - 1 ) ! from = offset1(proc) dest = 1 + sdispls(proc) ! DO k = 1, ncp_(me_pool+1) ! CALL DCOPY( 2*npp_(proc+1), f_in(from+2*(k-1)*nrx3), & 1, f_aux(dest+2*(k-1)*npp_(proc+1)), 1 ) ! END DO ! END DO ! ! ... maybe useless; ensures that no garbage is present in the output ! f_in(:) = ( 0.D0, 0.D0 ) ! ! ... step two: communication ! CALL mp_barrier( intra_pool_comm ) ! CALL MPI_ALLTOALLV( f_aux, sendcount, sdispls, MPI_REAL8, & f_in, recvcount, rdispls, MPI_REAL8, & intra_pool_comm, ierr ) ! CALL errore( 'fft_scatter', 'info<>0', ierr ) ! ELSE ! ! ... "backward" scatter from planes to columns ! ! ... step two: communication ! CALL mp_barrier( intra_pool_comm ) ! CALL MPI_ALLTOALLV( f_in, recvcount, rdispls, MPI_REAL8, & f_aux, sendcount, sdispls, MPI_REAL8, & intra_pool_comm, ierr ) ! CALL errore( 'fft_scatter', 'info<>0', ierr ) ! ! ... step one: store contiguously the columns ! f_in(:) = ( 0.D0, 0.D0 ) ! DO proc = 0,( nproc_pool - 1 ) ! from = 1 + sdispls(proc) ! dest = offset1(proc) ! DO k = 1, ncp_(me_pool+1) ! CALL DCOPY( 2*npp_(proc+1), f_aux(from+2*(k-1)*npp_(proc+1)), & 1, f_in (dest+2*(k-1)*nrx3), 1 ) ! END DO ! END DO ! END IF ! !@ CALL stop_clock( 'fft_scatter' ) ! #endif ! RETURN ! END SUBROUTINE fft_scatter1 ! ! ... other parallel subroutines ! !------------------------------------------------------------------------ SUBROUTINE poolextreme( ps, iflag ) !----------------------------------------------------------------------- ! ! ... Finds the maximum (iflag.gt.0) or the minimum (iflag.le.0) value o ! ... a real variable among the values distributed across the pools and ! ... returns this value to all pools. ! #if defined (__PARA) ! USE mp_global, ONLY : inter_pool_comm, intra_image_comm, npool USE mp, ONLY : mp_barrier USE parallel_include ! IMPLICIT NONE ! INTEGER :: iflag, info REAL (KIND=DP) :: ps, psr ! ! IF ( npool <= 1 ) RETURN ! CALL mp_barrier( intra_image_comm ) ! IF ( iflag > 0 ) THEN ! CALL MPI_ALLREDUCE( ps, psr, 1, MPI_REAL8, MPI_MAX, & inter_pool_comm, info ) ! CALL errore( 'poolextreme', 'info<>0 in allreduce1', info ) ! ELSE ! CALL MPI_ALLREDUCE( ps, psr, 1, MPI_REAL8, MPI_MIN, & inter_pool_comm, info ) ! CALL errore( 'poolextreme', 'info<>0 in allreduce2', info ) ! END IF ! ps = psr ! #endif ! RETURN ! END SUBROUTINE poolextreme ! !----------------------------------------------------------------------- SUBROUTINE poolrecover( vec, length, nkstot, nks ) !----------------------------------------------------------------------- ! ! ... recovers on the first processor of the first pool a ! ... distributed vector ! #if defined (__PARA) ! USE mp_global, ONLY : inter_pool_comm, intra_image_comm, & npool, me_pool, root_pool, my_pool_id USE para, ONLY : kunit USE mp, ONLY : mp_barrier USE parallel_include ! IMPLICIT NONE ! INTEGER :: status(MPI_STATUS_SIZE) INTEGER :: length, i, nks1, rest, fine, nbase, info, nks, nkstot REAL (KIND=DP) :: vec(length,nkstot) ! ! IF ( npool <= 1 ) RETURN ! IF ( MOD( nkstot, kunit ) /= 0 ) & CALL errore( 'poolrecover', 'nkstot/kunit is not an integer', nkstot ) ! nks1 = kunit * ( nkstot / kunit / npool ) ! rest = ( nkstot - nks1 * npool ) / kunit ! CALL mp_barrier( intra_image_comm ) ! IF ( me_pool == root_pool .AND. my_pool_id > 0 ) THEN ! CALL MPI_SEND( vec, (length*nks), MPI_REAL8, 0, 17, & inter_pool_comm, info ) ! CALL errore( 'poolrecover', 'info<>0 in send', info ) ! END IF ! DO i = 2, npool ! IF ( i <= rest ) THEN ! fine = nks1 + kunit ! nbase = ( nks1 + kunit ) * ( i - 1 ) ! ELSE ! fine = nks1 ! nbase = rest * (nks1 + kunit) + (i - 1 - rest) * nks1 ! END IF ! IF ( me_pool == root_pool .AND. my_pool_id == 0 ) THEN ! CALL MPI_RECV( vec(1,nbase+1), (length*fine), MPI_REAL8, & (i-1), 17, inter_pool_comm, status, info ) ! CALL errore( 'poolrecover', 'info<>0 in recv', info ) ! END IF ! END DO ! #endif ! RETURN ! END SUBROUTINE poolrecover ! !------------------------------------------------------------------------ SUBROUTINE ipoolrecover( ivec, length, nkstot, nks ) !------------------------------------------------------------------------ ! ! ... as above, for an integer vector ! #if defined (__PARA) ! USE mp_global, ONLY : inter_pool_comm, intra_image_comm, & npool, me_pool, root_pool, my_pool_id USE para, ONLY : kunit USE mp, ONLY : mp_barrier USE parallel_include ! IMPLICIT NONE ! INTEGER :: status(MPI_STATUS_SIZE) INTEGER :: length, i, nks1, rest, fine, nbase, info, nks, nkstot INTEGER :: ivec(length,nkstot) ! ! IF ( npool <= 1 ) RETURN ! IF ( MOD( nkstot, kunit ) /= 0 ) & CALL errore( 'poolrecover', 'nkstot/kunit is not an integer', nkstot ) ! nks1 = kunit * ( nkstot / kunit / npool ) ! rest = ( nkstot - nks1 * npool ) / kunit ! CALL mp_barrier( intra_image_comm ) ! IF ( me_pool == root_pool .AND. my_pool_id > 0 ) THEN ! CALL MPI_SEND( ivec, (length*nks), MPI_INTEGER, 0, 17, & inter_pool_comm, info ) ! CALL errore( 'ipoolrecover', 'info<>0 in send', info ) ! END IF ! DO i = 2, npool ! IF ( i <= rest ) THEN ! fine = nks1 + kunit ! nbase = ( nks1 + kunit ) * ( i - 1 ) ! ELSE ! fine = nks1 ! nbase = rest * ( nks1 + kunit ) + ( i - 1 - rest ) * nks1 ! END IF ! IF ( me_pool == root_pool .AND. my_pool_id == 0 ) THEN ! CALL MPI_RECV( ivec(1,nbase+1), (length*fine), MPI_INTEGER, & (i-1), 17, inter_pool_comm, status, info ) ! CALL errore( 'ipoolrecover', 'info<>0 in recv', info ) ! END IF ! END DO ! #endif ! RETURN ! END SUBROUTINE ipoolrecover ! !----------------------------------------------------------------------- SUBROUTINE extreme( ps, iflag ) !----------------------------------------------------------------------- ! ! ... Finds the maximum (iflag.gt.0) or the minimum (iflag.le.0) value ! ... of a real variable among the values distributed on a given pool ! #if defined (__PARA) ! USE mp_global, ONLY : intra_image_comm USE mp, ONLY : mp_barrier USE parallel_include ! IMPLICIT NONE ! REAL (KIND=DP) :: ps, psr INTEGER :: iflag, info ! ! CALL mp_barrier( intra_image_comm ) ! IF ( iflag > 0 ) THEN ! CALL MPI_ALLREDUCE( ps, psr, 1, MPI_REAL8, MPI_MAX, & intra_image_comm, info ) ! ELSE ! CALL MPI_ALLREDUCE( ps, psr, 1, MPI_REAL8, MPI_MIN, & intra_image_comm, info ) ! END IF ! ps = psr ! #endif ! RETURN ! END SUBROUTINE extreme