! ! Copyright (C) 2002 FPMD group ! This file is distributed under the terms of the ! GNU General Public License. See the file `License' ! in the root directory of the present distribution, ! or http://www.gnu.org/copyleft/gpl.txt . ! !------------------------------------------------------------------------------! ! Carlo Cavazzoni ! Last update 19 May 2001 !------------------------------------------------------------------------------! !------------------------------------------------------------------------------! ! MODULE mp_buffers ! This module is used to implement, when possible, high efficient buffered ! communications among processors. ! In particular two buffers are defined: ! ! mp_snd_buffer Send buffer ! ! mp_rcv_buffer Receive buffer ! ! together with the buffers the module contains initialization and ! communication functions, that may depend on the particular hardware ! !------------------------------------------------------------------------------! USE io_global, ONLY : stdout USE kinds, ONLY : dbl USE parallel_include #if defined __SHMEM USE shmem_include #endif PRIVATE PUBLIC :: mp_sendrecv_buffers, mp_allocate_buffers, mp_deallocate_buffers, & mp_barrier_buffers, mp_alltoall_buffers,mp_excng, mp_sum_buffers, mp_report_buffers SAVE #if defined COMPLEX_MESSAGE_MAX_SIZE INTEGER, PARAMETER :: mp_bufsize_msgmax = COMPLEX_MESSAGE_MAX_SIZE #else INTEGER, PARAMETER :: mp_bufsize_msgmax = 2**20 ! 1Mb 2^20 #endif #if defined __SHMEM #if defined __ALTIX || defined __ORIGIN COMPLEX (dbl) :: mp_snd_buffer(mp_bufsize_msgmax), & & mp_rcv_buffer(mp_bufsize_msgmax) POINTER (mp_p_snd_buffer,mp_snd_buffer), & & (mp_p_rcv_buffer,mp_rcv_buffer) LOGICAL, SAVE :: first #else pointer (mp_p_snd_buffer,mp_snd_buffer) pointer (mp_p_rcv_buffer,mp_rcv_buffer) complex (dbl) :: mp_snd_buffer(1) complex (dbl) :: mp_rcv_buffer(1) #endif #else integer :: mp_p_snd_buffer = 0 integer :: mp_p_rcv_buffer = 0 complex (dbl), allocatable :: mp_snd_buffer(:) complex (dbl), allocatable :: mp_rcv_buffer(:) #endif PUBLIC :: mp_snd_buffer, mp_rcv_buffer, & mp_p_snd_buffer, mp_p_rcv_buffer integer :: mp_bufsize integer :: mp_high_watermark = 0 PUBLIC :: mp_bufsize_msgmax !------------------------------------------------------------------------------! ! CONTAINS ! !------------------------------------------------------------------------------! !..mp_allocate_buffers !..Carlo Cavazzoni SUBROUTINE mp_allocate_buffers(bufsize) IMPLICIT NONE INTEGER, INTENT(IN) :: bufsize INTEGER :: ERR #if (defined __SHMEM && defined __ALTIX) || (defined __SHMEM && defined __ORIGIN) IF (bufsize .GT. mp_bufsize_msgmax) THEN CALL errore(' mp_allocate_buffers ', & ' mp_bufsize_msgmax too small, need ', bufsize) END IF IF (first .NE. .TRUE.) THEN CALL shpalloc(mp_p_snd_buffer, 2 * mp_bufsize_msgmax, err, & & -1) IF (err .NE. 0) CALL errore(' mp_allocate_buffers ', & & ' allocating mp_snd_buffer ',err) CALL shpalloc(mp_p_rcv_buffer, 2 * mp_bufsize_msgmax, err, & & -1) IF (err .NE. 0) CALL errore(' mp_allocate_buffers ', & & ' allocating mp_rcv_buffer ',err) first = .TRUE. END IF #else #if defined __SHMEM CALL SHPALLOC(mp_p_snd_buffer,2*bufsize, err, 0) IF(ERR /= 0) CALL errore(' mp_allocate_buffers ', ' allocating mp_rcv_buffer ',err) #else ALLOCATE(mp_snd_buffer(bufsize), STAT = err) #endif IF(ERR /= 0) CALL errore(' mp_allocate_buffers ', ' allocating mp_snd_buffer ',err) #if defined __SHMEM CALL SHPALLOC(mp_p_rcv_buffer,2*bufsize, err, 0) #else ALLOCATE(mp_rcv_buffer(bufsize), STAT = err) #endif IF(ERR /= 0) CALL errore(' mp_allocate_buffers ', ' allocating mp_rcv_buffer ',err) #endif /* (defined __SHMEM && defined __ALTIX) || (defined __SHMEM && defined __ORIGIN) */ mp_bufsize = bufsize RETURN END SUBROUTINE mp_allocate_buffers ! !------------------------------------------------------------------------------! !..mp_deallocate_buffers !..Carlo Cavazzoni SUBROUTINE mp_deallocate_buffers IMPLICIT NONE integer err #if (defined __SHMEM && defined __ALTIX) || (defined __SHMEM && defined __ORIGIN) ! ! nothing ! #else #if defined __SHMEM CALL shmem_barrier_all CALL SHPDEALLC(mp_p_rcv_buffer, err, 0) #else DEALLOCATE(mp_snd_buffer, STAT = err) #endif IF(ERR /= 0) CALL errore(' mp_deallocate_buffers ', ' deallocating mp_rcv_buffer ',err) #if defined __SHMEM #else DEALLOCATE(mp_rcv_buffer, STAT = err) #endif IF(ERR /= 0) CALL errore(' mp_deallocate_buffers ', ' deallocating mp_snd_buffer ',err) #endif /* (defined __SHMEM && defined __ALTIX) || (defined __SHMEM && defined __ORIGIN) */ RETURN END SUBROUTINE mp_deallocate_buffers ! !------------------------------------------------------------------------------! !..mp_barrier_buffers !..Carlo Cavazzoni SUBROUTINE mp_barrier_buffers IMPLICIT NONE #if defined __SHMEM call shmem_barrier_all #else #endif RETURN END SUBROUTINE mp_barrier_buffers ! !------------------------------------------------------------------------------! !..mp_sum_buffers !..Carlo Cavazzoni SUBROUTINE mp_sum_buffers IMPLICIT NONE INTEGER ierr, pwrksize #if defined __PARA # if defined __SHMEM pointer (p_pWrk,pWrk) REAL(dbl) pWrk(1) INTEGER :: nproc, num_pes nproc = num_pes() pwrksize = MAX(2*mp_bufsize,SHMEM_REDUCE_MIN_WRKDATA_SIZE) CALL SHPALLOC(p_pWrk, pwrksize, ierr, 0) IF(IERR /= 0) THEN CALL errore(' mp_sum_buffers ', ' allocating p_pWrk ',ierr) END IF call shmem_barrier_all CALL SHMEM_REAL8_SUM_TO_ALL(mp_rcv_buffer, mp_snd_buffer, & 2*mp_bufsize, 0, 0, nproc, pWrk, pSync_sta) call shmem_barrier_all CALL SHPDEALLC(p_pwrk, ierr, 0) IF(IERR /= 0) call errore(' mp_sum_buffers ', & ' deallocating p_pWrk ',ierr) # elif defined __MPI CALL MPI_ALLREDUCE( mp_snd_buffer(1), mp_rcv_buffer(1), mp_bufsize, & MPI_DOUBLE_COMPLEX, MPI_SUM, MPI_COMM_WORLD, IERR) IF(IERR /= 0) call errore(' mp_sum_buffers ', ' mpi_allreduce ',ierr) # endif #else call ZCOPY(mp_bufsize,mp_snd_buffer(1),1,mp_rcv_buffer(1),1) #endif mp_high_watermark = MAX( mp_high_watermark, 16 * mp_bufsize ) return END SUBROUTINE mp_sum_buffers ! !------------------------------------------------------------------------------! !..mp_alltoall_buffers !..Carlo Cavazzoni SUBROUTINE mp_alltoall_buffers(mp_snd_buffer, mp_rcv_buffer) IMPLICIT NONE COMPLEX(dbl) :: mp_snd_buffer(:) COMPLEX(dbl) :: mp_rcv_buffer(:) INTEGER :: ierr, nproc, i INTEGER :: msg_size #if defined __PARA # if defined __SHMEM integer ip, isour, mpime integer my_pe, num_pes call shmem_barrier_all mpime = my_pe() nproc = num_pes() msg_size = mp_bufsize/nproc IF( (msg_size + 1) > mp_bufsize_msgmax ) THEN CALL errore(' mp_alltoall_buffers ', ' bufsize too large ', msg_size) END IF do ip =1,nproc ISOUR = MOD(MPIME-IP+NPROC,NPROC) call shmem_get64(mp_rcv_buffer( 1 + isour*msg_size), & mp_snd_buffer(1+mpime*msg_size), msg_size*2, isour) end do # elif defined __MPI CALL MPI_COMM_SIZE(MPI_COMM_WORLD, nproc, ierr) msg_size = mp_bufsize/nproc IF( (msg_size + 1) > mp_bufsize_msgmax ) THEN CALL errore(' mp_alltoall_buffers ', ' bufsize too large ', msg_size) END IF !WRITE( stdout,*) ' MP_BUFFERS DEBUG ', msg_size !WRITE( stdout,*) ' MP_BUFFERS DEBUG ', mp_snd_buffer(1) !WRITE( stdout,*) ' MP_BUFFERS DEBUG ', mp_snd_buffer(1+msg_size) call MPI_ALLTOALL(mp_snd_buffer(1),msg_size,MPI_DOUBLE_COMPLEX, & mp_rcv_buffer(1),msg_size,MPI_DOUBLE_COMPLEX, & MPI_COMM_WORLD,IERR) !WRITE( stdout, fmt='(10D8.2)' ) mp_rcv_buffer(1:mp_bufsize) !WRITE( stdout,*) ' MP_BUFFERS DEBUG ', mp_rcv_buffer(1) !WRITE( stdout,*) ' MP_BUFFERS DEBUG ', mp_rcv_buffer(1+msg_size) IF(IERR /= 0) call errore(' mp_alltoall_buffers ', ' mpi_alltoall ',ierr) # endif #else msg_size = mp_bufsize CALL ZCOPY(msg_size, mp_snd_buffer(1), 1, mp_rcv_buffer(1), 1) #endif mp_high_watermark = MAX( mp_high_watermark, 16 * msg_size ) return END SUBROUTINE mp_alltoall_buffers ! !------------------------------------------------------------------------------! !..mp_sendrecv_buffers !..Carlo Cavazzoni SUBROUTINE mp_sendrecv_buffers(isour, idest, ip) IMPLICIT NONE INTEGER, INTENT(IN) :: isour, idest, ip #if defined __PARA # if defined __MPI INTEGER :: istatus(MPI_STATUS_SIZE) INTEGER :: ierr # endif # if defined __SHMEM call shmem_barrier_all call shmem_get64(mp_rcv_buffer, mp_snd_buffer, mp_bufsize*2, isour-1) # elif defined __MPI CALL MPI_SENDRECV(mp_snd_buffer(1), mp_bufsize, MPI_DOUBLE_COMPLEX, & IDEST-1, ip, mp_rcv_buffer(1), mp_bufsize, MPI_DOUBLE_COMPLEX, & ISOUR-1, ip, MPI_COMM_WORLD, ISTATUS, ierr) IF(ierr /= 0) call errore(' mp_sendrecv_buffers ', ' MPI_SENDRECV ', ierr) # endif #else CALL ZCOPY(mp_bufsize, mp_snd_buffer(1), 1, mp_rcv_buffer(1), 1) #endif mp_high_watermark = MAX( mp_high_watermark, 16 * mp_bufsize ) RETURN END SUBROUTINE mp_sendrecv_buffers SUBROUTINE mp_report_buffers WRITE( stdout, *) WRITE( stdout, *) ' mp_buffers: high_watermark (bytes): ', mp_high_watermark RETURN END SUBROUTINE mp_report_buffers END MODULE mp_buffers