! ! Copyright (C) 2002 FPMD group ! This file is distributed under the terms of the ! GNU General Public License. See the file `License' ! in the root directory of the present distribution, ! or http://www.gnu.org/copyleft/gpl.txt . ! ! ! Copyright (C) 2001-2003 PWSCF group ! This file is distributed under the terms of the ! GNU General Public License. See the file `License' ! in the root directory of the present distribution, ! or http://www.gnu.org/copyleft/gpl.txt . ! !----------------------------------------------------------------------! ! FFT scalar drivers Module. ! Written by Carlo Cavazzoni ! Last update January 2004 !----------------------------------------------------------------------! #if defined __HPM # include "/cineca/prod/hpm/include/f_hpm.h" #endif #include "f_defs.h" !=----------------------------------------------------------------------=! MODULE fft_scalar !=----------------------------------------------------------------------=! USE kinds IMPLICIT NONE SAVE PRIVATE PUBLIC :: cft_1z, cft_2xy, cft_b, cfft3d, cfft3ds PUBLIC :: good_fft_dimension, allowed, good_fft_order ! ... Local Parameter INTEGER, PARAMETER :: ndims = 3 ! ndims Number of different FFT tables that the module ! could keep into memory without reinitialization #if defined __AIX INTEGER, PARAMETER :: nfftx = 2049 INTEGER, PARAMETER :: lwork = 20000 + ( 2 * nfftx + 256 ) * 64 + 3 * nfftx INTEGER, PARAMETER :: ltabl = 20000 + 3 * nfftx ! see the ESSL manual ( DCFT ) for the workspace and table lenght formulas #elif defined __SCSL INTEGER, PARAMETER :: nfftx = 1025 INTEGER, PARAMETER :: lwork = 2 * nfftx INTEGER, PARAMETER :: ltabl = 2 * nfftx + 256 #else INTEGER, PARAMETER :: nfftx = 1025 INTEGER, PARAMETER :: lwork = 20 * nfftx INTEGER, PARAMETER :: ltabl = 4 * nfftx #endif ! C_POINTER is defined in include/f_defs.h ! for 32bit executables, C_POINTER is integer(4) ! for 64bit executables, C_POINTER is integer(8) #if defined __FFTW C_POINTER :: fw_plan( 3, ndims ) = 0 C_POINTER :: bw_plan( 3, ndims ) = 0 ! Pointers to the "C" structures containing FFT factors ( PLAN ) #elif defined __AIX REAL (dbl) :: fw_table( ltabl, 3, ndims ) REAL (dbl) :: bw_table( ltabl, 3, ndims ) REAL (dbl) :: work( lwork ) ! Array containing FFT factors + workspace #elif defined __COMPLIB REAL (dbl) :: work(lwork) REAL (dbl) :: tablez(ltabl,ndims) REAL (dbl) :: tablex(ltabl,ndims) REAL (dbl) :: tabley(ltabl,ndims) ! Array containing FFT factors + workspace #elif defined __SCSL COMPLEX (dbl) :: work(lwork) REAL (dbl) :: tablez(ltabl,ndims) REAL (dbl) :: tablex(ltabl,ndims) REAL (dbl) :: tabley(ltabl,ndims) REAL (dbl) :: DUMMY INTEGER :: isys(0:1) ! Array containing FFT factors + workspace #endif REAL (dbl) :: scale !=----------------------------------------------------------------------=! CONTAINS !=----------------------------------------------------------------------=! ! !=----------------------------------------------------------------------=! ! ! ! ! FFT along "z" ! ! ! !=----------------------------------------------------------------------=! ! SUBROUTINE cft_1z(c, nsl, nz, ldc, sgn, cout) ! driver routine for m 1d complex fft's ! nx=n+1 is allowed (in order to avoid memory conflicts) ! A separate initialization is stored each combination of input sizes ! NOTA BENE: the output in fout ! INTEGER, INTENT(IN) :: sgn INTEGER, INTENT(IN) :: nsl, nz, ldc COMPLEX (dbl) :: c(:), cout(:) REAL(dbl) :: tscale INTEGER :: i, j, err, idir, ip, isign INTEGER, SAVE :: zdims( 3, ndims ) = -1 INTEGER, SAVE :: icurrent = 1 #if defined __HPM CALL f_hpmstart( 30 + ABS(sgn), 'cft_1z' ) #endif IF( nsl < 0 ) THEN CALL errore(" fft_scalar: cft_1 ", " nsl out of range ", nsl) END IF #if defined __SCSL isys(0) = 1 #endif isign = -sgn ! ! Here initialize table only if necessary ! ip = -1 DO i = 1, ndims ! first check if there is already a table initialized ! for this combination of parameters IF( ( nz == zdims(1,i) ) .and. ( nsl == zdims(2,i) ) .and. ( ldc == zdims(3,i) ) ) THEN ip = i EXIT END IF END DO IF( ip == -1 ) THEN ! no table exist for these parameters ! initialize a new one ! WRITE( stdout, fmt="('DEBUG cft_1z, reinitializing tables ', I3)" ) icurrent #if defined __FFTW IF( fw_plan( 3, icurrent) /= 0 ) CALL DESTROY_PLAN_1D( fw_plan( 3, icurrent) ) IF( bw_plan( 3, icurrent) /= 0 ) CALL DESTROY_PLAN_1D( bw_plan( 3, icurrent) ) idir = -1; CALL CREATE_PLAN_1D( fw_plan( 3, icurrent), nz, idir) idir = 1; CALL CREATE_PLAN_1D( bw_plan( 3, icurrent), nz, idir) #elif defined __COMPLIB CALL ZFFT1DI( nz, tablez(1,icurrent) ) #elif defined __SCSL CALL ZZFFTM (0, nz, 0, 0.0D0, DUMMY, 1, DUMMY, 1, & tablez(1,icurrent), DUMMY, isys) #elif defined __AIX tscale = 1.0d0 / nz CALL DCFT ( 1, c(1), 1, ldc, cout(1), 1, ldc, nz, nsl, 1, & tscale, fw_table(1, 3, icurrent), ltabl, work(1), lwork) CALL DCFT ( 1, c(1), 1, ldc, cout(1), 1, ldc, nz, nsl, -1, & 1.0d0, bw_table(1, 3, icurrent), ltabl, work(1), lwork) #else CALL errore(' cft_1 ',' no scalar fft driver specified ', 1) #endif zdims(1,icurrent) = nz; zdims(2,icurrent) = nsl; zdims(3,icurrent) = ldc; ip = icurrent icurrent = MOD( icurrent, ndims ) + 1 END IF ! ! Now perform the FFTs using machine specific drivers ! #if defined __FFTW IF (isign > 0) THEN tscale = 1.0d0 / nz CALL FFT_Z_STICK(fw_plan( 3, ip), c(1), ldc, nsl) CALL ZDSCAL( ldc * nsl, tscale, c(1), 1) ELSE IF (isign < 0) THEN CALL FFT_Z_STICK(bw_plan( 3, ip), c(1), ldc, nsl) END IF #elif defined __COMPLIB IF (isign /= 0) THEN IF( isign < 0 ) idir = +1 IF( isign > 0 ) idir = -1 CALL zfftm1d( idir, nz, nsl, c(1), 1, ldc, tablez(1,ip) ) IF (isign > 0) THEN tscale = 1.0d0 / nz CALL ZDSCAL( ldc * nsl, tscale, c(1), 1) END IF END IF #elif defined __SCSL IF ( isign /= 0 ) THEN IF ( isign > 0 ) THEN idir = -1 tscale = 1.0d0 / nz ELSE IF ( isign < 0 ) THEN idir = 1 tscale = 1.0d0 END IF CALL ZZFFTM (idir, nz, nsl, tscale, c(1), ldc, cout(1), ldc, & tablez(1,ip), work, isys) END IF #elif defined __AIX IF( isign > 0 ) THEN tscale = 1.0d0 / nz idir = 1 CALL DCFT (0, c(1), 1, ldc, cout(1), 1, ldc, nz, nsl, idir, & tscale, fw_table(1, 3, ip), ltabl, work, lwork) ELSE IF( isign < 0 ) THEN idir = -1 tscale = 1.0d0 CALL DCFT (0, c(1), 1, ldc, cout(1), 1, ldc, nz, nsl, idir, & tscale, bw_table(1, 3, ip), ltabl, work, lwork) END IF #else CALL errore(' cft_1 ',' no scalar fft driver specified ', 1) #endif #if defined __FFTW || defined __COMPLIB cout( 1 : ldc * nsl ) = c( 1 : ldc * nsl ) #endif #if defined __HPM CALL f_hpmstop( 30 + ABS(sgn) ) #endif RETURN END SUBROUTINE cft_1z ! ! !=----------------------------------------------------------------------=! ! ! ! ! FFT along "x" and "y" direction ! ! ! !=----------------------------------------------------------------------=! ! ! SUBROUTINE cft_2xy(r, nzl, nx, ny, ldx, ldy, sgn, pl2ix) ! driver routine for nzl 2d complex fft's of lengths nx and ny ! (sparse grid, both charge and wavefunctions) ! on input, sgn=+/-1 for charge density, sgn=+/-2 for wavefunctions ! ldx is the actual dimension of f (may differ from n) ! for compatibility: ldy is not used ! A separate initialization is stored for each combination of input parameters IMPLICIT NONE INTEGER, INTENT(IN) :: sgn, ldx, ldy, nx, ny, nzl INTEGER, OPTIONAL, INTENT(IN) :: pl2ix(:) COMPLEX (dbl) :: r( : ) INTEGER :: i, k, j, err, idir, ip, isign, kk REAL(dbl) :: tscale INTEGER, SAVE :: icurrent = 1 INTEGER, SAVE :: dims( 4, ndims) = -1 LOGICAL :: dofft( nfftx ) INTEGER, PARAMETER :: stdout = 6 #if defined __SCSL COMPLEX(dbl) :: XY(nx+nx*ny) #endif #if defined __HPM CALL f_hpmstart( 40 + ABS(sgn), 'cft_2xy' ) #endif #if defined __SCSL isys(0) = 1 #endif isign = - sgn dofft( 1 : nx ) = .TRUE. IF( PRESENT( pl2ix ) ) THEN IF( SIZE( pl2ix ) < nx ) & CALL errore( ' cft_2xy ', ' wrong dimension for arg no. 8 ', 1 ) DO i = 1, nx IF( pl2ix(i) < 1 ) dofft( i ) = .FALSE. END DO END IF ! WRITE( stdout,*) 'DEBUG: ', COUNT( dofft ) ! ! Here initialize table only if necessary ! ip = -1 DO i = 1, ndims ! first check if there is already a table initialized ! for this combination of parameters IF( ( ny == dims(1,i) ) .and. ( ldx == dims(2,i) ) .and. & ( nx == dims(3,i) ) .and. ( nzl == dims(4,i) ) ) THEN ip = i EXIT END IF END DO IF( ip == -1 ) THEN ! no table exist for these parameters ! initialize a new one ! WRITE( stdout, fmt="('DEBUG cft_2xy, reinitializing tables ', I3)" ) icurrent #if defined __FFTW IF( fw_plan( 2, icurrent) /= 0 ) CALL DESTROY_PLAN_1D( fw_plan( 2, icurrent) ) IF( bw_plan( 2, icurrent) /= 0 ) CALL DESTROY_PLAN_1D( bw_plan( 2, icurrent) ) idir = -1; CALL CREATE_PLAN_1D( fw_plan( 2, icurrent), ny, idir) idir = 1; CALL CREATE_PLAN_1D( bw_plan( 2, icurrent), ny, idir) IF( fw_plan( 1, icurrent) /= 0 ) CALL DESTROY_PLAN_1D( fw_plan( 1, icurrent) ) IF( bw_plan( 1, icurrent) /= 0 ) CALL DESTROY_PLAN_1D( bw_plan( 1, icurrent) ) idir = -1; CALL CREATE_PLAN_1D( fw_plan( 1, icurrent), nx, idir) idir = 1; CALL CREATE_PLAN_1D( bw_plan( 1, icurrent), nx, idir) #elif defined __AIX tscale = 1.0d0 / ( nx * ny ) CALL DCFT ( 1, r(1), ldx, 1, r(1), ldx, 1, ny, 1, 1, 1.0d0, & fw_table( 1, 2, icurrent), ltabl, work(1), lwork ) CALL DCFT ( 1, r(1), ldx, 1, r(1), ldx, 1, ny, 1, -1, 1.0d0, & bw_table(1, 2, icurrent), ltabl, work(1), lwork ) CALL DCFT ( 1, r(1), 1, ldx, r(1), 1, ldx, nx, ny, 1, & tscale, fw_table( 1, 1, icurrent), ltabl, work(1), lwork) CALL DCFT ( 1, r(1), 1, ldx, r(1), 1, ldx, nx, ny, -1, & 1.0d0, bw_table(1, 1, icurrent), ltabl, work(1), lwork) #elif defined __COMPLIB CALL ZFFT1DI( ny, tabley(1, icurrent) ) CALL ZFFT1DI( nx, tablex(1, icurrent) ) #elif defined __SCSL CALL ZZFFTMR (0, ny, 0, 0.0D0, DUMMY, 1, DUMMY, 1, & tabley(1, icurrent), DUMMY, isys) CALL ZZFFTM (0, nx, 0, 0.0D0, DUMMY, 1, DUMMY, 1, & tablex(1, icurrent), DUMMY, isys) #else CALL errore(' fft_y ',' no scalar fft driver specified ', 1) #endif dims(1,icurrent) = ny; dims(2,icurrent) = ldx; dims(3,icurrent) = nx; dims(4,icurrent) = nzl; ip = icurrent icurrent = MOD( icurrent, ndims ) + 1 END IF ! ! Now perform the FFTs using machine specific drivers ! #if defined __FFTW IF( isign > 0 ) THEN CALL FFT_X_STICK( fw_plan( 1, ip), r(1), nx, ny, nzl, ldx, ldy ) do i = 1, nx do k = 1, nzl IF( dofft( i ) ) THEN j = i + ldx*ldy * ( k - 1 ) call FFT_Y_STICK(fw_plan( 2, ip), r(j), ny, ldx) END IF end do end do tscale = 1.0d0 / ( nx * ny ) CALL ZDSCAL( ldx * ldy * nzl, tscale, r(1), 1) ELSE IF( isign < 0 ) THEN do i = 1, nx do k = 1, nzl IF( dofft( i ) ) THEN j = i + ldx*ldy * ( k - 1 ) call FFT_Y_STICK( bw_plan( 2, ip), r(j), ny, ldx) END IF end do end do CALL FFT_X_STICK( bw_plan( 1, ip), r(1), nx, ny, nzl, ldx, ldy ) END IF #elif defined __AIX IF( isign > 0 ) THEN idir = 1 tscale = 1.0d0 / ( nx * ny ) do k = 1, nzl kk = 1 + ( k - 1 ) * ldx * ldy CALL DCFT ( 0, r(kk), 1, ldx, r(kk), 1, ldx, nx, ny, idir, & tscale, fw_table( 1, 1, ip ), ltabl, work( 1 ), lwork) do i = 1, nx IF( dofft( i ) ) THEN kk = i + ( k - 1 ) * ldx * ldy call DCFT ( 0, r( kk ), ldx, 1, r( kk ), ldx, 1, ny, 1, & idir, 1.0d0, fw_table(1, 2, ip), ltabl, work( 1 ), lwork) END IF end do end do ELSE IF( isign < 0 ) THEN idir = -1 DO k = 1, nzl do i = 1, nx IF( dofft( i ) ) THEN kk = i + ( k - 1 ) * ldx * ldy call DCFT ( 0, r( kk ), ldx, 1, r( kk ), ldx, 1, ny, 1, & idir, 1.0d0, bw_table(1, 2, ip), ltabl, work( 1 ), lwork) END IF end do kk = 1 + ( k - 1 ) * ldx * ldy CALL DCFT ( 0, r( kk ), 1, ldx, r( kk ), 1, ldx, nx, ny, idir, & 1.0d0, bw_table(1, 1, ip), ltabl, work( 1 ), lwork) END DO END IF #elif defined __COMPLIB IF( isign > 0 ) THEN idir = -1 DO i = 1, nzl k = 1 + ( i - 1 ) * ldx * ldy call zfftm1d( idir, nx, ny, r(k), 1, ldx, tablex(1,ip) ) END DO do i = 1, nx IF( dofft( i ) ) THEN call zfftm1d( idir, ny, nzl, r(i), ldx, ldx*ldy, tabley(1, ip) ) END IF end do tscale = 1.0d0 / ( nx * ny ) CALL ZDSCAL( ldx * ldy * nzl, tscale, r(1), 1) ELSE IF( isign < 0 ) THEN idir = 1 do i = 1, nx IF( dofft( i ) ) THEN call zfftm1d( idir, ny, nzl, r(i), ldx, ldx*ldy, tabley(1, ip) ) END IF end do DO i = 1, nzl k = 1 + ( i - 1 ) * ldx * ldy call zfftm1d( idir, nx, ny, r(k), 1, ldx, tablex(1,ip) ) END DO END IF #elif defined __SCSL IF( isign > 0 ) THEN idir = -1 tscale = 1.0d0 / (nx * ny) DO k = 0, nzl-1 kk = k * ldx * ldy ! FORWARD: ny FFTs in the X direction CALL ZZFFTM ( idir, nx, ny, tscale, r(kk+1), ldx, r(kk+1), ldx, & tablex(1, ip), work(1), isys ) ! FORWARD: nx FFTs in the Y direction DO i = 1, nx IF ( dofft(i) ) THEN !DIR$IVDEP !DIR$LOOP COUNT (50) DO j = 0, ny-1 XY(j+1) = r(i + (j) * ldx + kk) END DO CALL ZZFFT(idir, ny, 1.0D0, XY, XY, tabley(1, ip), & work(1), isys) !DIR$IVDEP !DIR$LOOP COUNT (50) DO j = 0, ny-1 r(i + (j) * ldx + kk) = XY(j+1) END DO END IF END DO END DO END IF IF ( isign < 0 ) THEN idir = 1 tscale = 1.0d0 DO k = 0, nzl-1 ! BACKWARD: nx FFTs in the Y direction kk = (k) * ldx * ldy DO i = 1, nx IF ( dofft(i) ) THEN !DIR$IVDEP !DIR$LOOP COUNT (50) DO j = 0, ny-1 XY(j+1) = r(i + (j) * ldx + kk) END DO CALL ZZFFT(idir, ny, 1.0D0, XY, XY, tabley(1, ip), & work(1), isys) !DIR$IVDEP !DIR$LOOP COUNT (50) DO j = 0, ny-1 r(i + (j) * ldx + kk) = XY(j+1) END DO END IF END DO ! BACKWARD: ny FFTs in the X direction CALL ZZFFTM ( idir, nx, ny, tscale, r(kk+1), ldx, r(kk+1), ldx, & tablex(1, ip), work(1), isys ) END DO END IF #else CALL errore(' cft_2xy ',' no scalar fft driver specified ', 1) #endif #if defined __HPM CALL f_hpmstop( 40 + ABS(sgn) ) #endif return end subroutine cft_2xy ! !=----------------------------------------------------------------------=! ! ! ! ! 3D scalar FFTs ! ! ! !=----------------------------------------------------------------------=! ! SUBROUTINE cfft3d( f, nr1, nr2, nr3, nr1x, nr2x, nr3x, sgn ) IMPLICIT NONE INTEGER, INTENT(IN) :: nr1, nr2, nr3, nr1x, nr2x, nr3x, sgn COMPLEX (dbl) :: f(:) INTEGER :: i, k, j, err, idir, ip, isign REAL(dbl) :: tscale INTEGER, SAVE :: icurrent = 1 INTEGER, SAVE :: dims(3,ndims) = -1 #if defined __FFTW C_POINTER, save :: fw_plan(ndims) = 0 C_POINTER, save :: bw_plan(ndims) = 0 #elif defined __AIX #elif defined __COMPLIB || defined __SCSL real(kind=8), save :: table( 3 * ltabl, ndims ) #endif #if defined __HPM CALL f_hpmstart( 50 + ABS(sgn), 'cfft3d' ) #endif #if defined __SCSL isys(0) = 1 #endif isign = -sgn ! ! Here initialize table only if necessary ! ip = -1 DO i = 1, ndims ! first check if there is already a table initialized ! for this combination of parameters IF ( ( nr1 == dims(1,i) ) .and. ( nr2 == dims(2,i) ) .and. ( nr3 == dims(3,i) ) ) THEN ip = i EXIT END IF END DO IF( ip == -1 ) THEN ! no table exist for these parameters ! initialize a new one #if defined __FFTW IF ( nr1 /= nr1x .or. nr2 /= nr2x .or. nr3 /= nr3x ) & call errore('cfft3','not implemented',1) IF( fw_plan(icurrent) /= 0 ) CALL DESTROY_PLAN_3D( fw_plan(icurrent) ) IF( bw_plan(icurrent) /= 0 ) CALL DESTROY_PLAN_3D( bw_plan(icurrent) ) idir = -1; CALL CREATE_PLAN_3D( fw_plan(icurrent), nr1, nr2, nr3, idir) idir = 1; CALL CREATE_PLAN_3D( bw_plan(icurrent), nr1, nr2, nr3, idir) #elif defined __AIX #elif defined __COMPLIB CALL zfft3di( nr1, nr2, nr3, table(1,icurrent) ) #elif defined __SCSL CALL zzfft3d (0, nr1, nr2, nr3, 0.0D0, DUMMY, 1, 1, DUMMY, 1, 1, & table(1, icurrent), work(1), isys) #else CALL errore(' cfft3d ',' no scalar fft driver specified ', 1) #endif dims(1,icurrent) = nr1; dims(2,icurrent) = nr2; dims(3,icurrent) = nr3 ip = icurrent icurrent = MOD( icurrent, ndims ) + 1 END IF ! ! Now perform the 3D FFT using the machine specific driver ! #if defined __FFTW IF( isign > 0 ) THEN call FFTW_INPLACE_DRV_3D( fw_plan(ip), 1, f(1), 1, 1 ) tscale = 1.0d0 / DBLE( nr1 * nr2 * nr3 ) call ZDSCAL( nr1 * nr2 * nr3, tscale, f(1), 1) ELSE IF( isign < 0 ) THEN call FFTW_INPLACE_DRV_3D( bw_plan(ip), 1, f(1), 1, 1 ) END IF #elif defined __AIX if ( isign > 0 ) then tscale = 1.0d0 / ( nr1 * nr2 * nr3 ) else tscale = 1.0d0 end if call dcft3( f(1), nr1x, nr1x*nr2x, f(1), nr1x, nr1x*nr2x, nr1, nr2, nr3, & isign, tscale, work(1), lwork) #elif defined __COMPLIB IF( isign > 0 ) idir = -1 IF( isign < 0 ) idir = +1 IF( isign /= 0 ) & CALL zfft3d( idir, nr1, nr2, nr3, f(1), nr1x, nr2x, table(1,ip) ) IF( isign > 0 ) THEN tscale = 1.0d0 / DBLE( nr1 * nr2 * nr3 ) call ZDSCAL( nr1x * nr2x * nr3x, tscale, f(1), 1) END IF #elif defined __SCSL IF ( isign /= 0 ) THEN IF ( isign > 0 ) THEN idir = -1 tscale = 1.0D0 / DBLE( nr1 * nr2 * nr3 ) ELSE IF ( isign < 0 ) THEN idir = 1 tscale = 1.0D0 END IF CALL ZZFFT3D ( idir, nr1, nr2, nr3, tscale, f(1), nr1x, nr2x, & f(1), nr1x, nr2x, table(1, ip), work(1), isys ) END IF #endif #if defined __HPM CALL f_hpmstop( 50 + ABS(sgn) ) #endif RETURN END SUBROUTINE ! !=----------------------------------------------------------------------=! ! ! ! ! 3D scalar FFTs, but using sticks! ! ! ! !=----------------------------------------------------------------------=! ! ! ! Copyright (C) 2001-2003 PWSCF group ! This file is distributed under the terms of the ! GNU General Public License. See the file `License' ! in the root directory of the present distribution, ! or http://www.gnu.org/copyleft/gpl.txt . ! !---------------------------------------------------------------------- subroutine cfft3ds (f, nr1, nr2, nr3, nrx1, nrx2, nrx3, sign, do_fft_x, do_fft_y) ! ! driver routine for 3d complex "reduced" fft ! sign > 0 : f(G) => f(R) ; sign < 0 : f(R) => f(G) ! ! The 3D fft are computed only on lines and planes which have ! non zero elements. These lines and planes are defined by ! the two vectors do_fft_x and do_fft_y ! ! The routine is implemented for: ! ! IBM : essl library ! !---------------------------------------------------------------------- ! implicit none integer :: nr1, nr2, nr3, nrx1, nrx2, nrx3, sign ! ! logical dimensions of the fft ! physical dimensions of the f array ! sign of the transformation complex(dbl) :: f ( nrx1 * nrx2 * nrx3 ) integer :: do_fft_x(:), do_fft_y(:) ! ! the fft array ! ! ESSL fft's require a different initialization for sign=-1 and sign=1 ! aux1 contains the initialized quantities ! aux2 is work space ! integer :: m, incx1, incx2 INTEGER :: i, k, j, err, idir, ip, isign, ii, jj REAL(dbl) :: tscale INTEGER, SAVE :: icurrent = 1 INTEGER, SAVE :: dims(3,ndims) = -1 tscale = 1.d0 isign = - sign ! here we follow ESSL convention ! ! ESSL sign convention for fft's is the opposite of the "usual" one ! ! WRITE( stdout, fmt="('DEBUG cfft3ds :',6I6)") nr1, nr2, nr3, nrx1, nrx2, nrx3 ! WRITE( stdout, fmt="('DEBUG cfft3ds :',24I2)") do_fft_x ! WRITE( stdout, fmt="('DEBUG cfft3ds :',24I2)") do_fft_y IF( nr2 /= nrx2 ) & CALL errore(' cfft3ds ', ' wrong dimensions: nr2 /= nrx2 ', 1 ) ip = -1 DO i = 1, ndims ! first check if there is already a table initialized ! for this combination of parameters IF( ( nr1 == dims(1,i) ) .and. ( nr2 == dims(2,i) ) .and. & ( nr3 == dims(3,i) ) ) THEN ip = i EXIT END IF END DO IF( ip == -1 ) THEN ! no table exist for these parameters ! initialize a new one #if defined __FFTW IF( fw_plan( 1, icurrent) /= 0 ) CALL DESTROY_PLAN_1D( fw_plan( 1, icurrent) ) IF( bw_plan( 1, icurrent) /= 0 ) CALL DESTROY_PLAN_1D( bw_plan( 1, icurrent) ) IF( fw_plan( 2, icurrent) /= 0 ) CALL DESTROY_PLAN_1D( fw_plan( 2, icurrent) ) IF( bw_plan( 2, icurrent) /= 0 ) CALL DESTROY_PLAN_1D( bw_plan( 2, icurrent) ) IF( fw_plan( 3, icurrent) /= 0 ) CALL DESTROY_PLAN_1D( fw_plan( 3, icurrent) ) IF( bw_plan( 3, icurrent) /= 0 ) CALL DESTROY_PLAN_1D( bw_plan( 3, icurrent) ) idir = -1; CALL CREATE_PLAN_1D( fw_plan( 1, icurrent), nr1, idir) idir = 1; CALL CREATE_PLAN_1D( bw_plan( 1, icurrent), nr1, idir) idir = -1; CALL CREATE_PLAN_1D( fw_plan( 2, icurrent), nr2, idir) idir = 1; CALL CREATE_PLAN_1D( bw_plan( 2, icurrent), nr2, idir) idir = -1; CALL CREATE_PLAN_1D( fw_plan( 3, icurrent), nr3, idir) idir = 1; CALL CREATE_PLAN_1D( bw_plan( 3, icurrent), nr3, idir) #elif defined __AIX tscale = 1.0d0 ! x - direction incx1 = 1; incx2 = nrx1; m = 1 CALL DCFT ( 1, f(1), incx1, incx2, f(1), incx1, incx2, nr1, m, 1, 1.0d0, & fw_table( 1, 1, icurrent), ltabl, work(1), lwork ) CALL DCFT ( 1, f(1), incx1, incx2, f(1), incx1, incx2, nr1, m, -1, 1.0d0, & bw_table(1, 1, icurrent), ltabl, work(1), lwork ) ! y - direction incx1 = nrx1; incx2 = 1; m = nr1; CALL DCFT ( 1, f(1), incx1, incx2, f(1), incx1, incx2, nr2, m, 1, 1.0d0, & fw_table( 1, 2, icurrent), ltabl, work(1), lwork ) CALL DCFT ( 1, f(1), incx1, incx2, f(1), incx1, incx2, nr2, m, -1, 1.0d0, & bw_table(1, 2, icurrent), ltabl, work(1), lwork ) ! z - direction incx1 = nrx1 * nrx2; incx2 = 1; m = nrx1 * nr2 CALL DCFT ( 1, f(1), incx1, incx2, f(1), incx1, incx2, nr3, m, 1, 1.0d0, & fw_table(1, 3, icurrent), ltabl, work(1), lwork ) CALL DCFT ( 1, f(1), incx1, incx2, f(1), incx1, incx2, nr3, m, -1, 1.0d0, & bw_table(1, 3, icurrent), ltabl, work(1), lwork ) #else CALL errore(' cfft3ds ',' no scalar fft driver specified ', 1) #endif dims(1,icurrent) = nr1; dims(2,icurrent) = nr2; dims(3,icurrent) = nr3 ip = icurrent icurrent = MOD( icurrent, ndims ) + 1 END IF IF ( isign < 0 ) THEN ! ! i - direction ... ! incx1 = 1; incx2 = nrx1; m = 1 do k = 1, nr3 do j = 1, nr2 jj = j + ( k - 1 ) * nrx2 ii = 1 + nrx1 * ( jj - 1 ) if ( do_fft_x( jj ) == 1 ) THEN #if defined __FFTW call FFTW_INPLACE_DRV_1D( bw_plan( 1, ip), m, f( ii ), incx1, incx2 ) #elif defined __AIX call dcft (0, f ( ii ), incx1, incx2, f ( ii ), incx1, incx2, nr1, m, & isign, 1.0d0, bw_table ( 1, 1, ip ), ltabl, work( 1 ), lwork) #else call errore(' cfft3ds ',' no scalar fft driver specified ', 1) #endif endif enddo enddo ! ! ... j-direction ... ! incx1 = nrx1; incx2 = 1; m = nr1 do k = 1, nr3 ii = 1 + nrx1 * nrx2 * ( k - 1 ) if ( do_fft_y( k ) == 1 ) then #if defined __FFTW call FFTW_INPLACE_DRV_1D( bw_plan( 2, ip), m, f( ii ), incx1, incx2 ) #elif defined __AIX call dcft (0, f ( ii ), incx1, incx2, f ( ii ), incx1, incx2, nr2, m, & isign, 1.0d0, bw_table ( 1, 2, ip ), ltabl, work( 1 ), lwork) #else call errore(' cfft3ds ',' no scalar fft driver specified ', 1) #endif endif enddo ! ! ... k-direction ! incx1 = nrx1 * nrx2; incx2 = 1; m = nrx1 * nr2 #if defined __FFTW call FFTW_INPLACE_DRV_1D( bw_plan( 3, ip), m, f( 1 ), incx1, incx2 ) #elif defined __AIX call dcft (0, f( 1 ), incx1, incx2, f( 1 ), incx1, incx2, nr3, m, & isign, 1.0d0, bw_table ( 1, 3, ip ), ltabl, work( 1 ), lwork) #endif ELSE ! ! ... k-direction ! incx1 = nrx1 * nr2; incx2 = 1; m = nrx1 * nr2 #if defined __FFTW call FFTW_INPLACE_DRV_1D( fw_plan( 3, ip), m, f( 1 ), incx1, incx2 ) #elif defined __AIX call dcft (0, f( 1 ), incx1, incx2, f( 1 ), incx1, incx2, nr3, m, & isign, 1.0d0, fw_table ( 1, 3, ip ), ltabl, work( 1 ), lwork) #endif ! ! ... j-direction ... ! incx1 = nrx1; incx2 = 1; m = nr1 do k = 1, nr3 ii = 1 + nrx1 * nrx2 * ( k - 1 ) if ( do_fft_y ( k ) == 1 ) then #if defined __FFTW call FFTW_INPLACE_DRV_1D( fw_plan( 2, ip), m, f( ii ), incx1, incx2 ) #elif defined __AIX call dcft (0, f ( ii ), incx1, incx2, f ( ii ), incx1, incx2, nr2, m, & isign, 1.0d0, fw_table ( 1, 2, ip ), ltabl, work( 1 ), lwork) #else call errore(' cfft3ds ',' no scalar fft driver specified ', 1) #endif endif enddo ! ! i - direction ... ! incx1 = 1; incx2 = nrx1; m = 1 do k = 1, nr3 do j = 1, nr2 jj = j + ( k - 1 ) * nrx2 ii = 1 + nrx1 * ( jj - 1 ) if ( do_fft_x( jj ) == 1 ) then #if defined __FFTW call FFTW_INPLACE_DRV_1D( fw_plan( 1, ip), m, f( ii ), incx1, incx2 ) #elif defined __AIX call dcft (0, f ( ii ), incx1, incx2, f ( ii ), incx1, incx2, nr1, m, & isign, 1.0d0, fw_table ( 1, 1, ip ), ltabl, work( 1 ), lwork) #else call errore(' cfft3ds ',' no scalar fft driver specified ', 1) #endif endif enddo enddo #if defined __AIX || defined __FFTW call DSCAL (2 * nrx1 * nrx2 * nr3, 1.0d0 / (nr1 * nr2 * nr3), f( 1 ), 1) #endif END IF RETURN END SUBROUTINE ! !=----------------------------------------------------------------------=! ! ! ! ! 3D parallel FFT on sub-grids ! ! ! !=----------------------------------------------------------------------=! ! SUBROUTINE cft_b ( f, n1, n2, n3, n1x, n2x, n3x, imin3, imax3, sgn ) ! driver routine for 3d complex fft's on box grid - ibm essl ! fft along xy is done only on planes that correspond to ! dense grid planes on the current processor, i.e. planes ! with imin3 .le. n3 .le. imax3 ! implicit none integer n1,n2,n3,n1x,n2x,n3x,imin3,imax3,sgn complex(kind=8) :: f(:) integer isign, naux, ibid, nplanes, nstart, k real(dbl) :: tscale integer :: ip, i integer, save :: icurrent = 1 integer, save :: dims( 4, ndims ) = -1 #if defined __FFTW C_POINTER, save :: bw_planz( ndims ) = 0 C_POINTER, save :: bw_planxy( ndims ) = 0 #elif defined __AIX real(kind=8), save :: aux3( ltabl, ndims ) real(kind=8), save :: aux2( ltabl, ndims ) real(kind=8), save :: aux1( ltabl, ndims ) #elif defined __COMPLIB real(kind=8), save :: bw_coeffz( ltabl, ndims ) real(kind=8), save :: bw_coeffy( ltabl, ndims ) real(kind=8), save :: bw_coeffx( ltabl, ndims ) #elif defined __SCSL real(kind=8), save :: bw_coeffz( ltabl, ndims ) real(kind=8), save :: bw_coeffy( ltabl, ndims ) real(kind=8), save :: bw_coeffx( ltabl, ndims ) complex(kind=8) :: fy(n2 + n1x * n2), fz(n3 + n1x * n2x * n3) INTEGER :: j #endif isign = -sgn tscale = 1.d0 if ( isign > 0 ) then call errore('cft_b','not implemented',isign) end if ! ! 2d fft on xy planes - only needed planes are transformed ! note that all others are left in an unusable state ! nplanes = imax3 - imin3 + 1 nstart = ( imin3 - 1 ) * n1x * n2x + 1 ! ! Here initialize table only if necessary ! ip = -1 DO i = 1, ndims ! first check if there is already a table initialized ! for this combination of parameters IF ( ( n1 == dims(1,i) ) .and. ( n2 == dims(2,i) ) .and. & ( n3 == dims(3,i) ) .and. ( nplanes == dims(4,i) ) ) THEN ip = i EXIT END IF END DO IF( ip == -1 ) THEN ! no table exist for these parameters ! initialize a new one #if defined __FFTW if ( bw_planz(icurrent) /= 0 ) call DESTROY_PLAN_1D( bw_planz(icurrent) ) call CREATE_PLAN_1D( bw_planz(icurrent), n3, 1 ) if ( bw_planxy(icurrent) /= 0 ) call DESTROY_PLAN_2D( bw_planxy(icurrent) ) call CREATE_PLAN_2D( bw_planxy(icurrent), n1, n2, 1 ) ! #elif defined __AIX if( n3 /= dims(3,icurrent) ) then call dcft( 1, f(1), n1x*n2x, 1, f(1), n1x*n2x, 1, n3, n1x*n2x, isign, & & tscale, aux3(1,icurrent), ltabl, work(1), lwork) end if call dcft( 1, f(1), 1, n1x, f(1), 1, n1x, n1, n2x*nplanes, isign, & & tscale, aux1(1,icurrent), ltabl, work(1), lwork) if( n2 /= dims(2,icurrent) ) then call dcft( 1, f(1), n1x, 1, f(1), n1x, 1, n2, n1x, isign, & & tscale, aux2(1,icurrent), ltabl, work(1), lwork) end if #elif defined __COMPLIB call zfft1di( n3, bw_coeffz( 1, icurrent ) ) call zfft1di( n2, bw_coeffy( 1, icurrent ) ) call zfft1di( n1, bw_coeffx( 1, icurrent ) ) #elif defined __SCSL CALL ZZFFT (0, n3, 0.0D0, DUMMY, 1, bw_coeffz(1, icurrent), & work(1), isys) CALL ZZFFT (0, n2, 0.0D0, DUMMY, 1, bw_coeffy(1, icurrent), & work(1), isys) CALL ZZFFT (0, n1, 0.0D0, DUMMY, 1, bw_coeffx(1, icurrent), & work(1), isys) #else CALL errore(' cft_b ',' no scalar fft driver specified ', 1) #endif dims(1,icurrent) = n1; dims(2,icurrent) = n2 dims(3,icurrent) = n3; dims(4,icurrent) = nplanes ip = icurrent icurrent = MOD( icurrent, ndims ) + 1 END IF #if defined __FFTW call FFTW_INPLACE_DRV_1D( bw_planz(ip), n1x*n2x, f(1), n1x*n2x, 1 ) call FFTW_INPLACE_DRV_2D( bw_planxy(ip), nplanes, f(nstart), 1, n1x*n2x ) #elif defined __AIX ! fft in the z-direction... call dcft( 0, f(1), n1x*n2x, 1, f(1), n1x*n2x, 1, n3, n1x*n2x, isign, & & tscale, aux3(1,ip), ltabl, work(1), lwork) ! x-direction call dcft( 0, f(nstart), 1, n1x, f(nstart), 1, n1x, n1, n2x*nplanes, isign, & & tscale, aux1(1,ip), ltabl, work(1), lwork) ! y-direction DO K = imin3, imax3 nstart = ( k - 1 ) * n1x * n2x + 1 call dcft( 0, f(nstart), n1x, 1, f(nstart), n1x, 1, n2, n1x, isign, & & tscale, aux2(1,ip), ltabl, work(1), lwork) END DO #elif defined __COMPLIB call zfftm1d( 1, n3, n1x*n2x, f(1), n1x*n2x, 1, bw_coeffz(1, ip) ) call zfftm1d( 1, n1, n2x*nplanes, f(nstart), 1, n1x, bw_coeffx(1, ip) ) DO K = imin3, imax3 nstart = ( k - 1 ) * n1x * n2x + 1 call zfftm1d( 1, n2, n1x, f(nstart), n1x, 1, bw_coeffy(1, ip) ) END DO #elif defined __SCSL CALL ZZFFTMR (1, n3, n1x*n2x, tscale, f(1), n1x*n2x, f(1), & n1x*n2x, bw_coeffz(1, ip), work(1), isys) CALL ZZFFTM (1, n1, n2x*nplanes, tscale, f(nstart), n1x, & f(nstart), n1x, bw_coeffx(1, ip), work(1), isys) DO k = imin3, imax3 nstart = ( k - 1 ) * n1x * n2x + 1 CALL ZZFFTMR (1, n2, n1x, tscale, f(nstart), n1x, f(nstart), & n1x, bw_coeffy(1, ip), work(1), isys) END DO #endif RETURN END SUBROUTINE ! !=----------------------------------------------------------------------=! ! ! ! ! FFT support Functions/Subroutines ! ! ! !=----------------------------------------------------------------------=! ! ! integer function good_fft_dimension (n) ! ! Determines the optimal maximum dimensions of fft arrays ! Useful on some machines to avoid memory conflicts ! USE kinds implicit none integer :: n, nx ! this is the default: max dimension = fft dimension nx = n #if defined(__AIX) || defined(DXML) if ( n==8 .or. n==16 .or. n==32 .or. n==64 .or. n==128 .or. n==256) & nx = n + 1 #endif #if defined(CRAYY) || defined(__SX4) if (mod (nr1, 2) ==0) nx = n + 1 #endif good_fft_dimension = nx return end function good_fft_dimension !=----------------------------------------------------------------------=! function allowed (nr) ! find if the fft dimension is a good one ! a "bad one" is either not implemented (as on IBM with ESSL) ! or implemented but with awful performances (most other cases) USE kinds implicit none integer :: nr logical :: allowed integer :: pwr (5) integer :: mr, i, fac, p, maxpwr integer :: factors( 5 ) = (/ 2, 3, 5, 7, 11 /) ! find the factors of the fft dimension mr = nr pwr = 0 factors_loop: do i = 1, 5 fac = factors (i) maxpwr = NINT ( LOG( REAL (mr) ) / LOG( REAL (fac) ) ) + 1 do p = 1, maxpwr if ( mr == 1 ) EXIT factors_loop if ( MOD (mr, fac) == 0 ) then mr = mr / fac pwr (i) = pwr (i) + 1 endif enddo end do factors_loop IF ( nr /= ( mr * 2**pwr (1) * 3**pwr (2) * 5**pwr (3) * 7**pwr (4) * 11**pwr (5) ) ) & CALL errore (' allowed ', ' what ?!? ', 1 ) if ( mr /= 1 ) then ! fft dimension contains factors > 11 : no good in any case allowed = .false. else #ifdef __AIX ! IBM machines with essl libraries allowed = ( pwr(1) >= 1 ) .and. ( pwr(2) <= 2 ) .and. ( pwr(3) <= 1 ) .and. & ( pwr(4) <= 1 ) .and. ( pwr(5) <= 1 ) .and. & ( ( (pwr(2) == 0 ) .and. ( pwr(3) + pwr(4) + pwr(5) ) <= 2 ) .or. & ( (pwr(2) /= 0 ) .and. ( pwr(3) + pwr(4) + pwr(5) ) <= 1 ) ) #else ! fftw and all other cases: no factors 7 and 11 allowed = ( ( pwr(4) == 0 ) .and. ( pwr(5) == 0 ) ) #endif endif return end function allowed !=----------------------------------------------------------------------=! INTEGER FUNCTION good_fft_order( nr, np ) ! ! This function find a "good" fft order value grather or equal to "nr" ! ! nr (input) tentative order n of a fft ! ! np (optional input) if present restrict the search of the order ! in the ensamble of multiples of np ! ! Output: the same if n is a good number ! the closest higher number that is good ! an fft order is not good if not implemented (as on IBM with ESSL) ! or implemented but with awful performances (most other cases) ! IMPLICIT NONE INTEGER, INTENT(IN) :: nr INTEGER, OPTIONAL, INTENT(IN) :: np INTEGER :: new new = nr IF( PRESENT( np ) ) THEN DO WHILE( ( ( .NOT. allowed( new ) ) .OR. ( MOD( new, np ) /= 0 ) ) .AND. ( new <= nfftx ) ) new = new + 1 END DO ELSE DO WHILE( ( .NOT. allowed( new ) ) .AND. ( new <= nfftx ) ) new = new + 1 END DO END IF IF( new > nfftx ) & CALL errore( ' good_fft_order ', ' fft order too large ', new ) good_fft_order = new RETURN END FUNCTION !=----------------------------------------------------------------------=! END MODULE fft_scalar !=----------------------------------------------------------------------=!