From 11f1dced9ef1801c0eb8f8d6d01e5355540f79e7 Mon Sep 17 00:00:00 2001 From: Connor Aird Date: Thu, 25 Apr 2024 15:33:41 +0100 Subject: [PATCH 01/19] Extract inner cri calc into seperate subroutine --- src/exx_kernel_default.f90 | 83 +++++++++++++++++++++++++------------- 1 file changed, 55 insertions(+), 28 deletions(-) diff --git a/src/exx_kernel_default.f90 b/src/exx_kernel_default.f90 index e9bd0af59..52c140a0d 100644 --- a/src/exx_kernel_default.f90 +++ b/src/exx_kernel_default.f90 @@ -902,7 +902,59 @@ subroutine get_X_matrix( exxspin, scheme, backup_eris, niter, siter, level ) return end subroutine get_X_matrix ! - ! + ! To ensure thread safety, variables which are altered must be passed in as parameters rather than imported. + ! TODO: Change name to something more descriptive + subroutine cri_eri_inner_calculation(phi_i, Ome_kj, nsf1, nsf2, nsf3, ewald_charge, & + dv, ncaddr, ncbeg, ia_nsup, work_out_3d, work_in_3d, c) + + use exx_poisson, only: exx_v_on_grid, exx_ewald_charge + + use exx_types, only: Phy_k, phi_j, phi_k, ewald_rho, p_gauss, w_gauss, reckernel_3d, ewald_pot, & + pulay_radius, p_ngauss, r_int, p_omega, exx_psolver, exx_pscheme, extent + + use GenBlas, only: dot + + use numbers, only: zero + + implicit none + + real(double), pointer, intent(in) :: Ome_kj(:,:,:), phi_i(:,:,:,:) + integer, intent(in) :: nsf1, nsf2 ! The indices of the loops from which this function is called + integer, intent(in) :: ncbeg, ia_nsup + real(double), intent(in) :: dv + real(double), intent(out) :: ewald_charge, work_out_3d(:,:,:), work_in_3d(:,:,:) + real(double), intent(inout) :: c(:) + + integer :: ncaddr, nsf3 + + work_out_3d = zero + ! + work_in_3d = Phy_k(:,:,:,nsf1) * phi_j(:,:,:,nsf2) + ! + if (exx_psolver=='fftw' .and. exx_pscheme=='ewald') then + call exx_ewald_charge(work_in_3d,extent,dv,ewald_charge) + work_in_3d = work_in_3d - ewald_rho*ewald_charge + end if + ! + call exx_v_on_grid(inode,extent,work_in_3d,work_out_3d,r_int, & + exx_psolver,exx_pscheme,pulay_radius,p_omega,p_ngauss,p_gauss,& + w_gauss,reckernel_3d) + + if (exx_psolver=='fftw' .and. exx_pscheme=='ewald') then + work_out_3d = work_out_3d + ewald_pot*ewald_charge + end if + ! + Ome_kj = work_out_3d * phi_k(:,:,:,nsf1) + ! + ncaddr = ncbeg + ia_nsup * (nsf2 - 1) + ! + do nsf3 = 1, ia_nsup + ! + c(ncaddr + nsf3 - 1) = c(ncaddr + nsf3 - 1) & + + dot((2*extent+1)**3, phi_i(:,:,:,nsf3), 1, Ome_kj, 1) * dv + ! + end do ! nsf3 = 1, ia%nsup + end subroutine cri_eri_inner_calculation ! !!****f* exx_kernel_default/m_kern_exx_cri * !! @@ -1186,33 +1238,8 @@ subroutine m_kern_exx_cri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & !$omp do schedule(runtime) collapse(2) do nsf1 = 1, kg%nsup do nsf2 = 1, jb%nsup - work_out_3d = zero - ! - work_in_3d = Phy_k(:,:,:,nsf1) * phi_j(:,:,:,nsf2) - ! - if (exx_psolver=='fftw' .and. exx_pscheme=='ewald') then - call exx_ewald_charge(work_in_3d,extent,dv,ewald_charge) - work_in_3d = work_in_3d - ewald_rho*ewald_charge - end if - ! - call exx_v_on_grid(inode,extent,work_in_3d,work_out_3d,r_int, & - exx_psolver,exx_pscheme,pulay_radius,p_omega,p_ngauss,p_gauss,& - w_gauss,reckernel_3d) - - if (exx_psolver=='fftw' .and. exx_pscheme=='ewald') then - work_out_3d = work_out_3d + ewald_pot*ewald_charge - end if - ! - Ome_kj = work_out_3d * phi_k(:,:,:,nsf1) - ! - ncaddr = ncbeg + ia%nsup * (nsf2 - 1) - ! - do nsf3 = 1, ia%nsup - ! - c(ncaddr + nsf3 - 1) = c(ncaddr + nsf3 - 1) & - + dot((2*extent+1)**3, phi_i(:,:,:,nsf3), 1, Ome_kj, 1) * dv - ! - end do ! nsf3 = 1, ia%nsup + call cri_eri_inner_calculation(phi_i, Ome_kj, nsf1, nsf2, nsf3, ewald_charge, dv, ncaddr, & + ncbeg, ia%nsup, work_out_3d, work_in_3d, c) ! end do ! nsf2 = 1, jb%nsup end do ! nsf1 = 1, kg%nsup From 0bf5355c1cb20b7b27ca1cb079c8833618154602 Mon Sep 17 00:00:00 2001 From: Connor Aird Date: Fri, 26 Apr 2024 12:55:51 +0100 Subject: [PATCH 02/19] Update count calculation to allow running the loop indices out of order i.e. threaded --- src/exx_kernel_default.f90 | 94 ++++++++++++++++++++++++++++++++------ 1 file changed, 79 insertions(+), 15 deletions(-) diff --git a/src/exx_kernel_default.f90 b/src/exx_kernel_default.f90 index 52c140a0d..ddd8563de 100644 --- a/src/exx_kernel_default.f90 +++ b/src/exx_kernel_default.f90 @@ -904,8 +904,8 @@ end subroutine get_X_matrix ! ! To ensure thread safety, variables which are altered must be passed in as parameters rather than imported. ! TODO: Change name to something more descriptive - subroutine cri_eri_inner_calculation(phi_i, Ome_kj, nsf1, nsf2, nsf3, ewald_charge, & - dv, ncaddr, ncbeg, ia_nsup, work_out_3d, work_in_3d, c) + subroutine cri_eri_inner_calculation(phi_i, Ome_kj, nsf1, nsf2, nsf3, dv & + ncaddr, ncbeg, ia_nsup, ewald_charge, work_out_3d, work_in_3d, c) use exx_poisson, only: exx_v_on_grid, exx_ewald_charge @@ -918,14 +918,15 @@ subroutine cri_eri_inner_calculation(phi_i, Ome_kj, nsf1, nsf2, nsf3, ewald_char implicit none - real(double), pointer, intent(in) :: Ome_kj(:,:,:), phi_i(:,:,:,:) - integer, intent(in) :: nsf1, nsf2 ! The indices of the loops from which this function is called - integer, intent(in) :: ncbeg, ia_nsup - real(double), intent(in) :: dv - real(double), intent(out) :: ewald_charge, work_out_3d(:,:,:), work_in_3d(:,:,:) + real(double), pointer, intent(in) :: Ome_kj(:,:,:), phi_i(:,:,:,:) + integer, intent(in) :: nsf1, nsf2 ! The indices of the loops from which this function is called + integer, intent(in) :: ncbeg, ia_nsup + real(double), intent(in) :: dv + real(double), intent(out) :: ewald_charge, work_out_3d(:,:,:), work_in_3d(:,:,:) real(double), intent(inout) :: c(:) - integer :: ncaddr, nsf3 + integer :: ncaddr, nsf3 + real(double) :: exx_mat_elem work_out_3d = zero ! @@ -1238,6 +1239,7 @@ subroutine m_kern_exx_cri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & !$omp do schedule(runtime) collapse(2) do nsf1 = 1, kg%nsup do nsf2 = 1, jb%nsup + ! call cri_eri_inner_calculation(phi_i, Ome_kj, nsf1, nsf2, nsf3, ewald_charge, dv, ncaddr, & ncbeg, ia%nsup, work_out_3d, work_in_3d, c) ! @@ -1383,7 +1385,8 @@ subroutine m_kern_exx_eri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & ! integer :: maxsuppfuncs integer :: nsf_kg, nsf_ld, nsf_ia, nsf_jb - integer :: r, s, t, count + integer :: r, s, t, + integer :: k_count, l_count, ld_count, kg_count, i_count, j_count, jb_count, count ! ! GTO integer :: i_nx, j_nx, k_nx, l_nx @@ -1403,8 +1406,6 @@ subroutine m_kern_exx_eri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & dv = dr**3 maxsuppfuncs = maxval(nsf_species) ! - count = 1 - ! !!$ !!$ ****[ k loop ]**** !!$ @@ -1444,6 +1445,8 @@ subroutine m_kern_exx_eri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & !print*, 'jbnab2ch', jbnab2ch !print* ! + ! The current state of count + k_count = (k - 1) * nbnab(k_in_part) !!$ !!$ ****[ l do loop ]**** !!$ @@ -1466,10 +1469,23 @@ subroutine m_kern_exx_eri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & !yl = ld%xyz_cv(2) !zl = ld%xyz_cv(3) ! + ! The current state of count + ! l_count = (k - 1) * nbnab(k_in_part) * ld%nsup + & + ! (l - 1) * ld%nsup + l_count = k_count + (l - 1) + l_count = l_count * ld%nsup + ! ld_loop: do nsf_ld = 1, ld%nsup ! nbaddr = nbbeg + kg%nsup * (nsf_ld - 1) ! + ! The current state of count + ! count = (k - 1) * nbnab(k_in_part) * ld%nsup * kg%nsup + & + ! (l - 1) * ld%nsup * kg%nsup + & + ! (nsf_ld - 1) * kg%nsup + ld_count = l_count + (nsf_ld - 1) + ld_count = ld_count * kg%nsup + ! kg_loop: do nsf_kg = 1, kg%nsup ! if ( backup_eris ) then @@ -1477,6 +1493,15 @@ subroutine m_kern_exx_eri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & else K_val = b(nbaddr+nsf_kg-1) end if + ! + ! The current state of count + ! kg_count = (k - 1) * nbnab(k_in_part) * ld%nsup * kg%nsup * at%n_hnab(k_in_halo) + & + ! (l - 1) * ld%nsup * kg%nsup * at%n_hnab(k_in_halo) + & + ! (nsf_ld - 1) * kg%nsup * at%n_hnab(k_in_halo) + & + ! (nsf_kg - 1) * at%n_hnab(k_in_halo) + kg_count = ld_count + (nsf_kg - 1) + kg_count = kg_count * at%n_hnab(k_in_halo) + ! !!$ !!$ ****[ i loop ]**** !!$ @@ -1502,7 +1527,16 @@ subroutine m_kern_exx_eri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & !zi = ia%xyz_ip(3) !print*, size(chalo%i_h2d), shape(chalo%i_h2d) - ! + ! + ! The current state of count + ! i_count = (k - 1) * nbnab(k_in_part) * ld%nsup * kg%nsup * at%n_hnab(k_in_halo) * nbnab(k_in_part) + & + ! (l - 1) * ld%nsup * kg%nsup * at%n_hnab(k_in_halo) * nbnab(k_in_part) + & + ! (nsf_ld - 1) * kg%nsup * at%n_hnab(k_in_halo) * nbnab(k_in_part) + & + ! (nsf_kg - 1) * at%n_hnab(k_in_halo) * nbnab(k_in_part) + & + ! (i - 1) * nbnab(k_in_part) + i_count = kg_count + (i - 1) + i_count = i_count * nbnab(k_in_part) + ! !!$ !!$ ****[ j loop ]**** !!$ @@ -1536,7 +1570,28 @@ subroutine m_kern_exx_eri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & !yj = jb%xyz_cv(2) !zj = jb%xyz_cv(3) ! - jb_loop: do nsf_jb = 1, jb%nsup + ! The current state of count + ! j_count = (k - 1) * nbnab(k_in_part) * ld%nsup * kg%nsup * at%n_hnab(k_in_halo) * nbnab(k_in_part) * jb%nsup + & + ! (l - 1) * ld%nsup * kg%nsup * at%n_hnab(k_in_halo) * nbnab(k_in_part) * jb%nsup + & + ! (nsf_ld - 1) * kg%nsup * at%n_hnab(k_in_halo) * nbnab(k_in_part) * jb%nsup + & + ! (nsf_kg - 1) * at%n_hnab(k_in_halo) * nbnab(k_in_part) * jb%nsup + & + ! (i - 1) * nbnab(k_in_part) * jb%nsup + & + ! (j - 1) * jb%nsup + & + j_count = i_count + (j - 1) + j_count = j_count * jb%nsup + ! + jb_loop: do nsf_jb = 1, jb%nsup + ! + ! The current state of count + ! jb_count = (k - 1) * nbnab(k_in_part) * ld%nsup * kg%nsup * at%n_hnab(k_in_halo) * nbnab(k_in_part) * jb%nsup * ia%nsup + & + ! (l - 1) * ld%nsup * kg%nsup * at%n_hnab(k_in_halo) * nbnab(k_in_part) * jb%nsup * ia%nsup + & + ! (nsf_ld - 1) * kg%nsup * at%n_hnab(k_in_halo) * nbnab(k_in_part) * jb%nsup * ia%nsup + & + ! (nsf_kg - 1) * at%n_hnab(k_in_halo) * nbnab(k_in_part) * jb%nsup * ia%nsup + & + ! (i - 1) * nbnab(k_in_part) * jb%nsup * ia%nsup + & + ! (j - 1) * jb%nsup * ia%nsup + & + ! (nsf_jb - 1) * ia%nsup + jb_count = j_count + (nsf_jb - 1) + jb_count = jb_count * ia%nsup ! ncaddr = ncbeg + ia%nsup * (nsf_jb - 1) ! @@ -1560,6 +1615,17 @@ subroutine m_kern_exx_eri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & call stop_timer(tmr_std_exx_poisson,.true.) ia_loop: do nsf_ia = 1, ia%nsup + ! + ! The current state of count + ! count = (k - 1) * nbnab(k_in_part) * ld%nsup * kg%nsup * at%n_hnab(k_in_halo) * nbnab(k_in_part) * jb%nsup * ia%nsup + & + ! (l - 1) * ld%nsup * kg%nsup * at%n_hnab(k_in_halo) * nbnab(k_in_part) * jb%nsup * ia%nsup + & + ! (nsf_ld - 1) * kg%nsup * at%n_hnab(k_in_halo) * nbnab(k_in_part) * jb%nsup * ia%nsup + & + ! (nsf_kg - 1) * at%n_hnab(k_in_halo) * nbnab(k_in_part) * jb%nsup * ia%nsup + & + ! (i - 1) * nbnab(k_in_part) * jb%nsup * ia%nsup + & + ! (j - 1) * jb%nsup * ia%nsup + & + ! (nsf_jb - 1) * ia%nsup + & + ! nsf_ia + count = jb_count + nsf_ia ! exx_mat_elem = zero ! @@ -1598,8 +1664,6 @@ subroutine m_kern_exx_eri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & c(ncaddr + nsf_ia - 1) = c(ncaddr + nsf_ia - 1) + exx_mat_elem ! end if - ! - count = count + 1 ! end do ia_loop ! From 3f2bd850cc62e4a31205c4f69816e88a4cd54888 Mon Sep 17 00:00:00 2001 From: Connor Aird Date: Fri, 26 Apr 2024 14:04:07 +0100 Subject: [PATCH 03/19] Refactor m_kern_exx_eri to call cri_eri_inner_calculation --- src/exx_kernel_default.f90 | 142 ++++++++++++------------------------- 1 file changed, 45 insertions(+), 97 deletions(-) diff --git a/src/exx_kernel_default.f90 b/src/exx_kernel_default.f90 index ddd8563de..6afb11774 100644 --- a/src/exx_kernel_default.f90 +++ b/src/exx_kernel_default.f90 @@ -904,13 +904,14 @@ end subroutine get_X_matrix ! ! To ensure thread safety, variables which are altered must be passed in as parameters rather than imported. ! TODO: Change name to something more descriptive - subroutine cri_eri_inner_calculation(phi_i, Ome_kj, nsf1, nsf2, nsf3, dv & - ncaddr, ncbeg, ia_nsup, ewald_charge, work_out_3d, work_in_3d, c) + subroutine cri_eri_inner_calculation(nsf1_array, phi_i, Ome_kj, nsf1, nsf2, nsf3, kpart, dv, ncaddr, ncbeg, & + ia_nsup, backup_eris, start_count, ewald_charge, work_out_3d, work_in_3d, c) use exx_poisson, only: exx_v_on_grid, exx_ewald_charge - use exx_types, only: Phy_k, phi_j, phi_k, ewald_rho, p_gauss, w_gauss, reckernel_3d, ewald_pot, & - pulay_radius, p_ngauss, r_int, p_omega, exx_psolver, exx_pscheme, extent + use exx_types, only: phi_j, phi_k, ewald_rho, p_gauss, w_gauss, reckernel_3d, ewald_pot, & + pulay_radius, p_ngauss, r_int, p_omega, exx_psolver, exx_pscheme, extent, eris, & + store_eris use GenBlas, only: dot @@ -919,9 +920,10 @@ subroutine cri_eri_inner_calculation(phi_i, Ome_kj, nsf1, nsf2, nsf3, dv & implicit none real(double), pointer, intent(in) :: Ome_kj(:,:,:), phi_i(:,:,:,:) - integer, intent(in) :: nsf1, nsf2 ! The indices of the loops from which this function is called - integer, intent(in) :: ncbeg, ia_nsup - real(double), intent(in) :: dv + integer, intent(in) :: kpart, nsf1, nsf2 ! The indices of the loops from which this function is called + integer, intent(in) :: ncbeg, ia_nsup, start_count + logical, intent(in) :: backup_eris + real(double), intent(in) :: nsf1_array(:,:,:,:), dv real(double), intent(out) :: ewald_charge, work_out_3d(:,:,:), work_in_3d(:,:,:) real(double), intent(inout) :: c(:) @@ -930,11 +932,11 @@ subroutine cri_eri_inner_calculation(phi_i, Ome_kj, nsf1, nsf2, nsf3, dv & work_out_3d = zero ! - work_in_3d = Phy_k(:,:,:,nsf1) * phi_j(:,:,:,nsf2) + work_in_3d = nsf1_array(:,:,:,nsf1) * phi_j(:,:,:,nsf2) ! if (exx_psolver=='fftw' .and. exx_pscheme=='ewald') then call exx_ewald_charge(work_in_3d,extent,dv,ewald_charge) - work_in_3d = work_in_3d - ewald_rho*ewald_charge + work_in_3d = work_in_3d - ewald_rho*ewald_charge end if ! call exx_v_on_grid(inode,extent,work_in_3d,work_out_3d,r_int, & @@ -951,8 +953,17 @@ subroutine cri_eri_inner_calculation(phi_i, Ome_kj, nsf1, nsf2, nsf3, dv & ! do nsf3 = 1, ia_nsup ! - c(ncaddr + nsf3 - 1) = c(ncaddr + nsf3 - 1) & - + dot((2*extent+1)**3, phi_i(:,:,:,nsf3), 1, Ome_kj, 1) * dv + exx_mat_elem = dot((2*extent+1)**3, phi_i(:,:,:,nsf3), 1, Ome_kj, 1) * dv + ! + if ( backup_eris ) then + ! + eris(kpart)%store_eris( start_count + nsf3 ) = exx_mat_elem + ! + else + ! + c(ncaddr + nsf3 - 1) = c(ncaddr + nsf3 - 1) + exx_mat_elem + ! + end if ! end do ! nsf3 = 1, ia%nsup end subroutine cri_eri_inner_calculation @@ -1229,19 +1240,19 @@ subroutine m_kern_exx_cri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & ! Begin the parallel region here as earlier allocations make it difficult to do before now. ! However, this should be possible in future work. ! - !$omp parallel default(none) reduction(+: c) & - !$omp shared(kg,jb,tmr_std_exx_poisson,tmr_std_exx_nsup,Phy_k,phi_j,phi_k,ncbeg,ia, & - !$omp tmr_std_exx_matmult,ewald_pot,phi_i,exx_psolver,exx_pscheme,extent,dv, & - !$omp ewald_rho,inode,pulay_radius,p_omega,p_gauss,w_gauss,reckernel_3d,r_int) & - !$omp private(nsf1,nsf2,work_out_3d,work_in_3d,ewald_charge,Ome_kj_1d_buffer,Ome_kj, & + !$omp parallel default(none) reduction(+: c) & + !$omp shared(kg,jb,tmr_std_exx_poisson,tmr_std_exx_nsup,Phy_k,phi_j,phi_k,ncbeg,ia,kpart, & + !$omp tmr_std_exx_matmult,ewald_pot,phi_i,exx_psolver,exx_pscheme,extent,dv, & + !$omp ewald_rho,inode,pulay_radius,p_omega,p_gauss,w_gauss,reckernel_3d,r_int) & + !$omp private(nsf1,nsf2,work_out_3d,work_in_3d,ewald_charge,Ome_kj_1d_buffer,Ome_kj, & !$omp ncaddr,nsf3,exx_mat_elem,r,s,t) Ome_kj(1:2*extent+1, 1:2*extent+1, 1:2*extent+1) => Ome_kj_1d_buffer !$omp do schedule(runtime) collapse(2) do nsf1 = 1, kg%nsup do nsf2 = 1, jb%nsup ! - call cri_eri_inner_calculation(phi_i, Ome_kj, nsf1, nsf2, nsf3, ewald_charge, dv, ncaddr, & - ncbeg, ia%nsup, work_out_3d, work_in_3d, c) + call cri_eri_inner_calculation(Phy_k, phi_i, Ome_kj, nsf1, nsf2, kpart, dv, ncaddr, ncbeg, & + ia%nsup, .false., 0, ewald_charge, work_out_3d, work_in_3d, c) ! end do ! nsf2 = 1, jb%nsup end do ! nsf1 = 1, kg%nsup @@ -1326,13 +1337,13 @@ subroutine m_kern_exx_eri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & use exx_types, only: prim_atomic_data, neigh_atomic_data, & tmr_std_exx_accumul, tmr_std_exx_poisson, & tmr_std_exx_poisson, grid_spacing, r_int, extent,& - ewald_charge, ewald_rho, ewald_pot, eris, & + ewald_charge, ewald_rho, ewald_pot, & pulay_radius, p_omega, p_ngauss, p_gauss, w_gauss, & exx_psolver,exx_pscheme, & unit_exx_debug, unit_eri_debug ! - use exx_types, only: phi_i, phi_j, phi_k, phi_l, eris, & - work_in_3d, work_out_3d, exx_gto, exx_gto_poisson + use exx_types, only: phi_i_1d_buffer, phi_j, phi_k, phi_l, & + Ome_kj_1d_buffer, work_in_3d, work_out_3d use exx_types, only: exx_alloc ! use exx_memory, only: exx_mem_alloc @@ -1378,6 +1389,9 @@ subroutine m_kern_exx_eri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & real(double) :: dr,dv,K_val real(double) :: exx_mat_elem ! + ! We allocate pointers here to point at 1D arrays later and allow contiguous access when passing to BLAS dot later + real(double), pointer :: phi_i(:,:,:,:), Ome_kj(:,:,:) + ! type(prim_atomic_data) :: ia !i_alpha type(neigh_atomic_data) :: jb !j_beta type(neigh_atomic_data) :: kg !k_gamma @@ -1385,7 +1399,7 @@ subroutine m_kern_exx_eri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & ! integer :: maxsuppfuncs integer :: nsf_kg, nsf_ld, nsf_ia, nsf_jb - integer :: r, s, t, + integer :: r, s, t integer :: k_count, l_count, ld_count, kg_count, i_count, j_count, jb_count, count ! ! GTO @@ -1517,7 +1531,8 @@ subroutine m_kern_exx_eri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & ! !print*, 'i',i, 'global_num',ia%ip,'spe',ia%spec, 'nsup', ia%nsup ! - if ( exx_alloc ) call exx_mem_alloc(extent,ia%nsup,0,'phi_i','alloc') + if ( exx_alloc ) call exx_mem_alloc(extent,ia%nsup,0,'phi_i_1d_buffer','alloc') + phi_i(1:2*extent+1, 1:2*extent+1, 1:2*extent+1, 1:ia%nsup) => phi_i_1d_buffer ! call exx_phi_on_grid(inode,ia%ip,ia%spec,extent, & ia%xyz,ia%nsup,phi_i,r_int,xyz_zero) @@ -1566,6 +1581,8 @@ subroutine m_kern_exx_eri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & call exx_phi_on_grid(inode,jb%global_num,jb%spec,extent, & jb%xyz,jb%nsup,phi_j,r_int,xyz_zero) ! + if ( exx_alloc ) call exx_mem_alloc(extent,0,0,'Ome_kj_1d_buffer','alloc') + ! !xj = jb%xyz_cv(1) !yj = jb%xyz_cv(2) !zj = jb%xyz_cv(3) @@ -1580,6 +1597,7 @@ subroutine m_kern_exx_eri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & j_count = i_count + (j - 1) j_count = j_count * jb%nsup ! + Ome_kj(1:2*extent+1, 1:2*extent+1, 1:2*extent+1) => Ome_kj_1d_buffer jb_loop: do nsf_jb = 1, jb%nsup ! ! The current state of count @@ -1593,79 +1611,8 @@ subroutine m_kern_exx_eri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & jb_count = j_count + (nsf_jb - 1) jb_count = jb_count * ia%nsup ! - ncaddr = ncbeg + ia%nsup * (nsf_jb - 1) - ! - call start_timer(tmr_std_exx_poisson) - work_out_3d = zero - work_in_3d = phi_l(:,:,:,nsf_ld)*phi_j(:,:,:,nsf_jb) - ! - if (exx_psolver=='fftw' .and. exx_pscheme=='ewald') then - call exx_ewald_charge(work_in_3d,extent,dv,ewald_charge) - work_in_3d = work_in_3d - ewald_rho*ewald_charge - end if - ! - call exx_v_on_grid(inode,extent,work_in_3d,work_out_3d,r_int, & - exx_psolver,exx_pscheme,pulay_radius,p_omega,p_ngauss,p_gauss,& - w_gauss,reckernel_3d) - ! - if (exx_psolver=='fftw' .and. exx_pscheme=='ewald') then - work_out_3d = work_out_3d + ewald_pot*ewald_charge - end if - ! - call stop_timer(tmr_std_exx_poisson,.true.) - - ia_loop: do nsf_ia = 1, ia%nsup - ! - ! The current state of count - ! count = (k - 1) * nbnab(k_in_part) * ld%nsup * kg%nsup * at%n_hnab(k_in_halo) * nbnab(k_in_part) * jb%nsup * ia%nsup + & - ! (l - 1) * ld%nsup * kg%nsup * at%n_hnab(k_in_halo) * nbnab(k_in_part) * jb%nsup * ia%nsup + & - ! (nsf_ld - 1) * kg%nsup * at%n_hnab(k_in_halo) * nbnab(k_in_part) * jb%nsup * ia%nsup + & - ! (nsf_kg - 1) * at%n_hnab(k_in_halo) * nbnab(k_in_part) * jb%nsup * ia%nsup + & - ! (i - 1) * nbnab(k_in_part) * jb%nsup * ia%nsup + & - ! (j - 1) * jb%nsup * ia%nsup + & - ! (nsf_jb - 1) * ia%nsup + & - ! nsf_ia - count = jb_count + nsf_ia - ! - exx_mat_elem = zero - ! - call start_timer(tmr_std_exx_accumul) - ! - do r = 1, 2*extent+1 - do s = 1, 2*extent+1 - do t = 1, 2*extent+1 - - exx_mat_elem = exx_mat_elem & - + phi_k(t,s,r,nsf_kg) * phi_i(t,s,r,nsf_ia) * K_val & - * work_out_3d(t,s,r) * dv - - end do - end do - end do - ! - call stop_timer(tmr_std_exx_accumul,.true.) - ! - if ( exx_debug ) then - - write(unit_eri_debug,10) count, exx_mat_elem, K_val, & - '[',ia%ip, kg%global_num,'|',ld%global_num, jb%global_num,']', & - '(',nsf_ia,nsf_kg, '|',nsf_ld,nsf_jb, ')' , & - '[',ia%name,kg%name,'|',ld%name,jb%name,']' , & - ia%xyz_ip(3), kg%xyz_cv(3), ld%xyz_cv(3), jb%xyz_cv(3) - - end if - ! - if ( backup_eris ) then - ! - eris(kpart)%store_eris( count ) = exx_mat_elem - ! - else - ! - c(ncaddr + nsf_ia - 1) = c(ncaddr + nsf_ia - 1) + exx_mat_elem - ! - end if - ! - end do ia_loop + call cri_eri_inner_calculation(phi_l, phi_i, Ome_kj, nsf_kg, nsf_jb, kpart, dv, ncaddr, ncbeg, & + ia%nsup, backup_eris, jb_count, ewald_charge, work_out_3d, work_in_3d, c) ! end do jb_loop ! @@ -1673,6 +1620,7 @@ subroutine m_kern_exx_eri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & ! end if ! + if ( exx_alloc ) call exx_mem_alloc(extent,0,0,'Ome_kj_1d_buffer','dealloc') if ( exx_alloc ) call exx_mem_alloc(extent,jb%nsup,0,'phi_j','dealloc') ! !!$ @@ -1681,7 +1629,7 @@ subroutine m_kern_exx_eri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & ! end do j_loop ! - if ( exx_alloc ) call exx_mem_alloc(extent,ia%nsup,0,'phi_i','dealloc') + if ( exx_alloc ) call exx_mem_alloc(extent,ia%nsup,0,'phi_i_1d_buffer','dealloc') ! !!$ !!$ ****[ i end loop ]**** From 40b66c59575ee5f7b1e9736d95df5949f4c918be Mon Sep 17 00:00:00 2001 From: Connor Aird Date: Fri, 26 Apr 2024 17:12:08 +0100 Subject: [PATCH 04/19] Passing in correct nsf index --- src/exx_kernel_default.f90 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/exx_kernel_default.f90 b/src/exx_kernel_default.f90 index 6afb11774..bb5219f0f 100644 --- a/src/exx_kernel_default.f90 +++ b/src/exx_kernel_default.f90 @@ -904,7 +904,7 @@ end subroutine get_X_matrix ! ! To ensure thread safety, variables which are altered must be passed in as parameters rather than imported. ! TODO: Change name to something more descriptive - subroutine cri_eri_inner_calculation(nsf1_array, phi_i, Ome_kj, nsf1, nsf2, nsf3, kpart, dv, ncaddr, ncbeg, & + subroutine cri_eri_inner_calculation(nsf1_array, phi_i, Ome_kj, nsf1, nsf2, kpart, dv, ncaddr, ncbeg, & ia_nsup, backup_eris, start_count, ewald_charge, work_out_3d, work_in_3d, c) use exx_poisson, only: exx_v_on_grid, exx_ewald_charge @@ -1611,7 +1611,7 @@ subroutine m_kern_exx_eri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & jb_count = j_count + (nsf_jb - 1) jb_count = jb_count * ia%nsup ! - call cri_eri_inner_calculation(phi_l, phi_i, Ome_kj, nsf_kg, nsf_jb, kpart, dv, ncaddr, ncbeg, & + call cri_eri_inner_calculation(phi_l, phi_i, Ome_kj, nsf_ld, nsf_jb, kpart, dv, ncaddr, ncbeg, & ia%nsup, backup_eris, jb_count, ewald_charge, work_out_3d, work_in_3d, c) ! end do jb_loop From c447caf729244431f4e723bea75811a0fbd909bf Mon Sep 17 00:00:00 2001 From: Connor Aird Date: Mon, 29 Apr 2024 11:05:09 +0100 Subject: [PATCH 05/19] Account for looping from 1 to length i.e. missing one iteration --- src/exx_kernel_default.f90 | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/src/exx_kernel_default.f90 b/src/exx_kernel_default.f90 index bb5219f0f..ce33edbbf 100644 --- a/src/exx_kernel_default.f90 +++ b/src/exx_kernel_default.f90 @@ -1460,7 +1460,7 @@ subroutine m_kern_exx_eri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & !print* ! ! The current state of count - k_count = (k - 1) * nbnab(k_in_part) + k_count = (k - 1) * (nbnab(k_in_part) - 1) !!$ !!$ ****[ l do loop ]**** !!$ @@ -1485,9 +1485,9 @@ subroutine m_kern_exx_eri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & ! ! The current state of count ! l_count = (k - 1) * nbnab(k_in_part) * ld%nsup + & - ! (l - 1) * ld%nsup + ! (l - 1) * ld%nsup l_count = k_count + (l - 1) - l_count = l_count * ld%nsup + l_count = l_count * (ld%nsup - 1) ! ld_loop: do nsf_ld = 1, ld%nsup ! @@ -1498,7 +1498,7 @@ subroutine m_kern_exx_eri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & ! (l - 1) * ld%nsup * kg%nsup + & ! (nsf_ld - 1) * kg%nsup ld_count = l_count + (nsf_ld - 1) - ld_count = ld_count * kg%nsup + ld_count = ld_count * (kg%nsup - 1) ! kg_loop: do nsf_kg = 1, kg%nsup ! @@ -1510,11 +1510,11 @@ subroutine m_kern_exx_eri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & ! ! The current state of count ! kg_count = (k - 1) * nbnab(k_in_part) * ld%nsup * kg%nsup * at%n_hnab(k_in_halo) + & - ! (l - 1) * ld%nsup * kg%nsup * at%n_hnab(k_in_halo) + & - ! (nsf_ld - 1) * kg%nsup * at%n_hnab(k_in_halo) + & - ! (nsf_kg - 1) * at%n_hnab(k_in_halo) + ! (l - 1) * ld%nsup * kg%nsup * at%n_hnab(k_in_halo) + & + ! (nsf_ld - 1) * kg%nsup * at%n_hnab(k_in_halo) + & + ! (nsf_kg - 1) * at%n_hnab(k_in_halo) kg_count = ld_count + (nsf_kg - 1) - kg_count = kg_count * at%n_hnab(k_in_halo) + kg_count = kg_count * (at%n_hnab(k_in_halo) - 1) ! !!$ !!$ ****[ i loop ]**** @@ -1545,12 +1545,12 @@ subroutine m_kern_exx_eri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & ! ! The current state of count ! i_count = (k - 1) * nbnab(k_in_part) * ld%nsup * kg%nsup * at%n_hnab(k_in_halo) * nbnab(k_in_part) + & - ! (l - 1) * ld%nsup * kg%nsup * at%n_hnab(k_in_halo) * nbnab(k_in_part) + & - ! (nsf_ld - 1) * kg%nsup * at%n_hnab(k_in_halo) * nbnab(k_in_part) + & - ! (nsf_kg - 1) * at%n_hnab(k_in_halo) * nbnab(k_in_part) + & - ! (i - 1) * nbnab(k_in_part) + ! (l - 1) * ld%nsup * kg%nsup * at%n_hnab(k_in_halo) * nbnab(k_in_part) + & + ! (nsf_ld - 1) * kg%nsup * at%n_hnab(k_in_halo) * nbnab(k_in_part) + & + ! (nsf_kg - 1) * at%n_hnab(k_in_halo) * nbnab(k_in_part) + & + ! (i - 1) * nbnab(k_in_part) i_count = kg_count + (i - 1) - i_count = i_count * nbnab(k_in_part) + i_count = i_count * (nbnab(k_in_part) - 1) ! !!$ !!$ ****[ j loop ]**** @@ -1595,7 +1595,7 @@ subroutine m_kern_exx_eri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & ! (i - 1) * nbnab(k_in_part) * jb%nsup + & ! (j - 1) * jb%nsup + & j_count = i_count + (j - 1) - j_count = j_count * jb%nsup + j_count = j_count * (jb%nsup - 1) ! Ome_kj(1:2*extent+1, 1:2*extent+1, 1:2*extent+1) => Ome_kj_1d_buffer jb_loop: do nsf_jb = 1, jb%nsup @@ -1609,7 +1609,7 @@ subroutine m_kern_exx_eri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & ! (j - 1) * jb%nsup * ia%nsup + & ! (nsf_jb - 1) * ia%nsup jb_count = j_count + (nsf_jb - 1) - jb_count = jb_count * ia%nsup + jb_count = jb_count * (ia%nsup - 1) ! call cri_eri_inner_calculation(phi_l, phi_i, Ome_kj, nsf_ld, nsf_jb, kpart, dv, ncaddr, ncbeg, & ia%nsup, backup_eris, jb_count, ewald_charge, work_out_3d, work_in_3d, c) From 1237b73b5049fc6ae6b244969f7f5cb35c837857 Mon Sep 17 00:00:00 2001 From: Connor Aird Date: Mon, 29 Apr 2024 13:25:44 +0100 Subject: [PATCH 06/19] Use temporary array to store backup values inside threaded regions --- src/exx_kernel_default.f90 | 82 ++++++++++++++++++++++++-------------- 1 file changed, 51 insertions(+), 31 deletions(-) diff --git a/src/exx_kernel_default.f90 b/src/exx_kernel_default.f90 index ce33edbbf..267645534 100644 --- a/src/exx_kernel_default.f90 +++ b/src/exx_kernel_default.f90 @@ -904,8 +904,9 @@ end subroutine get_X_matrix ! ! To ensure thread safety, variables which are altered must be passed in as parameters rather than imported. ! TODO: Change name to something more descriptive - subroutine cri_eri_inner_calculation(nsf1_array, phi_i, Ome_kj, nsf1, nsf2, kpart, dv, ncaddr, ncbeg, & - ia_nsup, backup_eris, start_count, ewald_charge, work_out_3d, work_in_3d, c) + subroutine cri_eri_inner_calculation(nsf1_array, phi_i, Ome_kj, nsf1, nsf2, kpart, dv, & + ncaddr, ncbeg, ia_nsup, ewald_charge, work_out_3d, work_in_3d, c, & + backup_eris, store_eris_inner) use exx_poisson, only: exx_v_on_grid, exx_ewald_charge @@ -919,13 +920,17 @@ subroutine cri_eri_inner_calculation(nsf1_array, phi_i, Ome_kj, nsf1, nsf2, kpar implicit none - real(double), pointer, intent(in) :: Ome_kj(:,:,:), phi_i(:,:,:,:) - integer, intent(in) :: kpart, nsf1, nsf2 ! The indices of the loops from which this function is called - integer, intent(in) :: ncbeg, ia_nsup, start_count - logical, intent(in) :: backup_eris - real(double), intent(in) :: nsf1_array(:,:,:,:), dv - real(double), intent(out) :: ewald_charge, work_out_3d(:,:,:), work_in_3d(:,:,:) - real(double), intent(inout) :: c(:) + integer :: maxsuppfuncs = maxval(nsf_species) + + real(double), pointer, intent(in) :: Ome_kj(:,:,:), phi_i(:,:,:,:) + integer, intent(in) :: kpart, nsf1, nsf2 ! The indices of the loops from which this function is called + integer, intent(in) :: ncbeg, ia_nsup + real(double), intent(in) :: nsf1_array(:,:,:,:), dv + real(double), intent(out) :: ewald_charge, work_out_3d(:,:,:), work_in_3d(:,:,:) + real(double), intent(inout) :: c(:) + ! Backup eris parameters. Optional as they are only needed by eri function + logical, intent(in), :: backup_eris + real(double), intent(out), OPTIONAL :: store_eris_inner(maxsuppfuncs, maxsuppfuncs) integer :: ncaddr, nsf3 real(double) :: exx_mat_elem @@ -957,7 +962,8 @@ subroutine cri_eri_inner_calculation(nsf1_array, phi_i, Ome_kj, nsf1, nsf2, kpar ! if ( backup_eris ) then ! - eris(kpart)%store_eris( start_count + nsf3 ) = exx_mat_elem + ! eris(kpart)%store_eris( count ) = exx_mat_elem + store_eris_inner(nsf2, nsf3) = exx_mat_elem ! else ! @@ -1070,8 +1076,9 @@ subroutine m_kern_exx_cri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & integer :: np, ni ! real(double), dimension(3) :: xyz_zero = zero - real(double) :: dr,dv,K_val - real(double) :: exx_mat_elem + real(double) :: dr,dv,K_val + real(double) :: exx_mat_elem + ! ! ! We allocate pointers here to point at 1D arrays later and allow contiguous access when passing to BLAS dot later real(double), pointer :: phi_i(:,:,:,:), Ome_kj(:,:,:) @@ -1081,8 +1088,8 @@ subroutine m_kern_exx_cri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & type(neigh_atomic_data) :: kg !k_gamma type(neigh_atomic_data) :: ld !l_delta ! - integer :: maxsuppfuncs, nsf1, nsf2, nsf3 - integer :: r, s, t + integer :: maxsuppfuncs, nsf_kg, nsf_ld + integer :: r, s, t, count ! ! dr = grid_spacing @@ -1093,6 +1100,7 @@ subroutine m_kern_exx_cri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & !range_kl = 0.5d0 !unit_exx_debug1 = 333 ! + count = 1 ! !!$ !!$ ****[ k loop ]**** @@ -1158,16 +1166,16 @@ subroutine m_kern_exx_cri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & call exx_phi_on_grid(inode,ld%global_num,ld%spec,extent, & ld%xyz,ld%nsup,phi_l,r_int,xyz_zero) ! - do nsf2 = 1, ld%nsup + do nsf_ld = 1, ld%nsup ! - nbaddr = nbbeg + kg%nsup * (nsf2 - 1) + nbaddr = nbbeg + kg%nsup * (nsf_ld - 1) ! - do nsf1 = 1, kg%nsup + do nsf_kg = 1, kg%nsup ! - K_val = b(nbaddr+nsf1-1) + K_val = b(nbaddr+nsf_kg-1) ! call start_timer(tmr_std_exx_accumul) - Phy_k(:,:,:,nsf1) = Phy_k(:,:,:,nsf1) + K_val*phi_l(:,:,:,nsf2) + Phy_k(:,:,:,nsf_kg) = Phy_k(:,:,:,nsf_kg) + K_val*phi_l(:,:,:,nsf_ld) call stop_timer(tmr_std_exx_accumul,.true.) end do @@ -1244,18 +1252,19 @@ subroutine m_kern_exx_cri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & !$omp shared(kg,jb,tmr_std_exx_poisson,tmr_std_exx_nsup,Phy_k,phi_j,phi_k,ncbeg,ia,kpart, & !$omp tmr_std_exx_matmult,ewald_pot,phi_i,exx_psolver,exx_pscheme,extent,dv, & !$omp ewald_rho,inode,pulay_radius,p_omega,p_gauss,w_gauss,reckernel_3d,r_int) & - !$omp private(nsf1,nsf2,work_out_3d,work_in_3d,ewald_charge,Ome_kj_1d_buffer,Ome_kj, & - !$omp ncaddr,nsf3,exx_mat_elem,r,s,t) + !$omp private(nsf_kg,nsf_ld,work_out_3d,work_in_3d,ewald_charge,Ome_kj_1d_buffer,Ome_kj, & + !$omp ncaddr,exx_mat_elem,r,s,t) Ome_kj(1:2*extent+1, 1:2*extent+1, 1:2*extent+1) => Ome_kj_1d_buffer !$omp do schedule(runtime) collapse(2) - do nsf1 = 1, kg%nsup - do nsf2 = 1, jb%nsup + do nsf_kg = 1, kg%nsup + do nsf_ld = 1, jb%nsup ! - call cri_eri_inner_calculation(Phy_k, phi_i, Ome_kj, nsf1, nsf2, kpart, dv, ncaddr, ncbeg, & - ia%nsup, .false., 0, ewald_charge, work_out_3d, work_in_3d, c) + call cri_eri_inner_calculation(Phy_k, phi_i, Ome_kj, nsf_kg, nsf_ld, kpart, dv, & + ncaddr, &ncbeg, ia%nsup, ewald_charge, work_out_3d, work_in_3d, c, & + .false.) ! - end do ! nsf2 = 1, jb%nsup - end do ! nsf1 = 1, kg%nsup + end do ! nsf_ld = 1, jb%nsup + end do ! nsf_kg = 1, kg%nsup !$omp end do !$omp end parallel ! @@ -1397,11 +1406,13 @@ subroutine m_kern_exx_eri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & type(neigh_atomic_data) :: kg !k_gamma type(neigh_atomic_data) :: ld !l_delta ! - integer :: maxsuppfuncs + integer :: maxsuppfuncs = maxval(nsf_species) integer :: nsf_kg, nsf_ld, nsf_ia, nsf_jb integer :: r, s, t integer :: k_count, l_count, ld_count, kg_count, i_count, j_count, jb_count, count ! + real(double), dimension(maxsuppfuncs, maxsuppfuncs) :: store_eris_inner + ! ! GTO integer :: i_nx, j_nx, k_nx, l_nx integer :: i_ny, j_ny, k_ny, l_ny @@ -1418,7 +1429,7 @@ subroutine m_kern_exx_eri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & ! dr = grid_spacing dv = dr**3 - maxsuppfuncs = maxval(nsf_species) + count = 1 ! !!$ !!$ ****[ k loop ]**** @@ -1611,10 +1622,19 @@ subroutine m_kern_exx_eri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & jb_count = j_count + (nsf_jb - 1) jb_count = jb_count * (ia%nsup - 1) ! - call cri_eri_inner_calculation(phi_l, phi_i, Ome_kj, nsf_ld, nsf_jb, kpart, dv, ncaddr, ncbeg, & - ia%nsup, backup_eris, jb_count, ewald_charge, work_out_3d, work_in_3d, c) + call cri_eri_inner_calculation(phi_l, phi_i, Ome_kj, nsf_ld, nsf_jb, kpart, dv, & + ncaddr, ncbeg, ia%nsup, ewald_charge, work_out_3d, work_in_3d, c, & + backup_eris, store_eris_inner) ! end do jb_loop + + ! It is not ideal to repeat these loops here but it is currently necessary to keep the backup thread safe + do nsf_jb = 1, jb%nsup + do nsf_ia = 1, ia%nsup + eris(kpart)%store_eris( count ) = store_eris_inner(nsf_jb, nsf_ia) + count = count + 1 + end do + end do ! nsf_ld = 1, jb%nsup ! end if ! From 596b58cbcde44f9700e164e0b288c2d290e9ee29 Mon Sep 17 00:00:00 2001 From: Connor Aird Date: Mon, 29 Apr 2024 14:12:31 +0100 Subject: [PATCH 07/19] Fix compiler errors --- src/exx_kernel_default.f90 | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/src/exx_kernel_default.f90 b/src/exx_kernel_default.f90 index 267645534..04d5f4336 100644 --- a/src/exx_kernel_default.f90 +++ b/src/exx_kernel_default.f90 @@ -920,8 +920,6 @@ subroutine cri_eri_inner_calculation(nsf1_array, phi_i, Ome_kj, nsf1, nsf2, kpar implicit none - integer :: maxsuppfuncs = maxval(nsf_species) - real(double), pointer, intent(in) :: Ome_kj(:,:,:), phi_i(:,:,:,:) integer, intent(in) :: kpart, nsf1, nsf2 ! The indices of the loops from which this function is called integer, intent(in) :: ncbeg, ia_nsup @@ -929,8 +927,8 @@ subroutine cri_eri_inner_calculation(nsf1_array, phi_i, Ome_kj, nsf1, nsf2, kpar real(double), intent(out) :: ewald_charge, work_out_3d(:,:,:), work_in_3d(:,:,:) real(double), intent(inout) :: c(:) ! Backup eris parameters. Optional as they are only needed by eri function - logical, intent(in), :: backup_eris - real(double), intent(out), OPTIONAL :: store_eris_inner(maxsuppfuncs, maxsuppfuncs) + logical, intent(in) :: backup_eris + real(double), intent(out), OPTIONAL :: store_eris_inner(:,:) integer :: ncaddr, nsf3 real(double) :: exx_mat_elem @@ -1260,7 +1258,7 @@ subroutine m_kern_exx_cri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & do nsf_ld = 1, jb%nsup ! call cri_eri_inner_calculation(Phy_k, phi_i, Ome_kj, nsf_kg, nsf_ld, kpart, dv, & - ncaddr, &ncbeg, ia%nsup, ewald_charge, work_out_3d, work_in_3d, c, & + ncaddr, ncbeg, ia%nsup, ewald_charge, work_out_3d, work_in_3d, c, & .false.) ! end do ! nsf_ld = 1, jb%nsup @@ -1352,7 +1350,7 @@ subroutine m_kern_exx_eri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & unit_exx_debug, unit_eri_debug ! use exx_types, only: phi_i_1d_buffer, phi_j, phi_k, phi_l, & - Ome_kj_1d_buffer, work_in_3d, work_out_3d + Ome_kj_1d_buffer, work_in_3d, work_out_3d, eris use exx_types, only: exx_alloc ! use exx_memory, only: exx_mem_alloc @@ -1406,12 +1404,12 @@ subroutine m_kern_exx_eri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & type(neigh_atomic_data) :: kg !k_gamma type(neigh_atomic_data) :: ld !l_delta ! - integer :: maxsuppfuncs = maxval(nsf_species) + integer :: maxsuppfuncs integer :: nsf_kg, nsf_ld, nsf_ia, nsf_jb - integer :: r, s, t + integer :: r, s, t, stat integer :: k_count, l_count, ld_count, kg_count, i_count, j_count, jb_count, count ! - real(double), dimension(maxsuppfuncs, maxsuppfuncs) :: store_eris_inner + real(double), allocatable, dimension(:,:) :: store_eris_inner ! ! GTO integer :: i_nx, j_nx, k_nx, l_nx @@ -1427,6 +1425,10 @@ subroutine m_kern_exx_eri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & real(double) :: eri_gto, eri_pao, test ! + maxsuppfuncs = MAXVAL(nsf_species) + allocate(store_eris_inner(maxsuppfuncs, maxsuppfuncs), STAT = stat) + if(stat /= 0) call cq_abort('m_kern_exx_eri: error allocating store_eris_inner!') + ! dr = grid_spacing dv = dr**3 count = 1 @@ -1436,6 +1438,7 @@ subroutine m_kern_exx_eri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & !!$ k_loop: do k = 1, ahalo%nh_part(kpart) ! + write (*,*) kpart, ahalo%nh_part(kpart), k k_in_halo = ahalo%j_beg(kpart) + k - 1 k_in_part = ahalo%j_seq(k_in_halo) nbkbeg = ibaddr (k_in_part) From a6ed706107049a09fc477b48a58ea622c90a529d Mon Sep 17 00:00:00 2001 From: Connor Aird Date: Mon, 29 Apr 2024 14:49:19 +0100 Subject: [PATCH 08/19] Use pointer array instead of directly counting iterations --- src/exx_kernel_default.f90 | 28 +++++++++------------------- 1 file changed, 9 insertions(+), 19 deletions(-) diff --git a/src/exx_kernel_default.f90 b/src/exx_kernel_default.f90 index 04d5f4336..a7e1f75c5 100644 --- a/src/exx_kernel_default.f90 +++ b/src/exx_kernel_default.f90 @@ -928,7 +928,7 @@ subroutine cri_eri_inner_calculation(nsf1_array, phi_i, Ome_kj, nsf1, nsf2, kpar real(double), intent(inout) :: c(:) ! Backup eris parameters. Optional as they are only needed by eri function logical, intent(in) :: backup_eris - real(double), intent(out), OPTIONAL :: store_eris_inner(:,:) + real(double), pointer, intent(out), OPTIONAL :: store_eris_inner(:,:) integer :: ncaddr, nsf3 real(double) :: exx_mat_elem @@ -956,6 +956,7 @@ subroutine cri_eri_inner_calculation(nsf1_array, phi_i, Ome_kj, nsf1, nsf2, kpar ! do nsf3 = 1, ia_nsup ! + ! Can we instead always store directly into store_eris_inner(nsf2, nsf3)? exx_mat_elem = dot((2*extent+1)**3, phi_i(:,:,:,nsf3), 1, Ome_kj, 1) * dv ! if ( backup_eris ) then @@ -1335,8 +1336,6 @@ subroutine m_kern_exx_eri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & ! use gto_format_new, only: gto ! - use species_module, only: nsf_species - ! use exx_evalpao, only: exx_phi_on_grid ! use exx_evalgto, only: exx_gto_on_grid_prim @@ -1397,20 +1396,17 @@ subroutine m_kern_exx_eri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & real(double) :: exx_mat_elem ! ! We allocate pointers here to point at 1D arrays later and allow contiguous access when passing to BLAS dot later - real(double), pointer :: phi_i(:,:,:,:), Ome_kj(:,:,:) + real(double), pointer :: phi_i(:,:,:,:), Ome_kj(:,:,:), store_eris_inner(:,:) ! type(prim_atomic_data) :: ia !i_alpha type(neigh_atomic_data) :: jb !j_beta type(neigh_atomic_data) :: kg !k_gamma type(neigh_atomic_data) :: ld !l_delta ! - integer :: maxsuppfuncs integer :: nsf_kg, nsf_ld, nsf_ia, nsf_jb integer :: r, s, t, stat integer :: k_count, l_count, ld_count, kg_count, i_count, j_count, jb_count, count ! - real(double), allocatable, dimension(:,:) :: store_eris_inner - ! ! GTO integer :: i_nx, j_nx, k_nx, l_nx integer :: i_ny, j_ny, k_ny, l_ny @@ -1425,10 +1421,6 @@ subroutine m_kern_exx_eri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & real(double) :: eri_gto, eri_pao, test ! - maxsuppfuncs = MAXVAL(nsf_species) - allocate(store_eris_inner(maxsuppfuncs, maxsuppfuncs), STAT = stat) - if(stat /= 0) call cq_abort('m_kern_exx_eri: error allocating store_eris_inner!') - ! dr = grid_spacing dv = dr**3 count = 1 @@ -1611,7 +1603,13 @@ subroutine m_kern_exx_eri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & j_count = i_count + (j - 1) j_count = j_count * (jb%nsup - 1) ! + ! TODO include bounds in Ome_kj_1d_buffer and store_eris Ome_kj(1:2*extent+1, 1:2*extent+1, 1:2*extent+1) => Ome_kj_1d_buffer + ! + ! Point at the next block of eris to store and update counter + store_eris_inner(1:jb%nsup, 1:ia%nsup) => eris(kpart)%store_eris(count) + count = count + (jb%nsup * ia%nsup) + ! jb_loop: do nsf_jb = 1, jb%nsup ! ! The current state of count @@ -1630,14 +1628,6 @@ subroutine m_kern_exx_eri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & backup_eris, store_eris_inner) ! end do jb_loop - - ! It is not ideal to repeat these loops here but it is currently necessary to keep the backup thread safe - do nsf_jb = 1, jb%nsup - do nsf_ia = 1, ia%nsup - eris(kpart)%store_eris( count ) = store_eris_inner(nsf_jb, nsf_ia) - count = count + 1 - end do - end do ! nsf_ld = 1, jb%nsup ! end if ! From f1a2c745ed90748831fde728d3e7ae8bd9de3e57 Mon Sep 17 00:00:00 2001 From: Connor Aird Date: Mon, 29 Apr 2024 15:50:02 +0100 Subject: [PATCH 09/19] Change pointer param to inout and type to target --- src/exx_kernel_default.f90 | 4 ++-- src/exx_types.f90 | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/exx_kernel_default.f90 b/src/exx_kernel_default.f90 index a7e1f75c5..bb10ae4e6 100644 --- a/src/exx_kernel_default.f90 +++ b/src/exx_kernel_default.f90 @@ -928,7 +928,7 @@ subroutine cri_eri_inner_calculation(nsf1_array, phi_i, Ome_kj, nsf1, nsf2, kpar real(double), intent(inout) :: c(:) ! Backup eris parameters. Optional as they are only needed by eri function logical, intent(in) :: backup_eris - real(double), pointer, intent(out), OPTIONAL :: store_eris_inner(:,:) + real(double), pointer, intent(inout), OPTIONAL :: store_eris_inner(:,:) integer :: ncaddr, nsf3 real(double) :: exx_mat_elem @@ -1607,7 +1607,7 @@ subroutine m_kern_exx_eri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & Ome_kj(1:2*extent+1, 1:2*extent+1, 1:2*extent+1) => Ome_kj_1d_buffer ! ! Point at the next block of eris to store and update counter - store_eris_inner(1:jb%nsup, 1:ia%nsup) => eris(kpart)%store_eris(count) + store_eris_inner(1:jb%nsup, 1:ia%nsup) => eris(kpart)%store_eris(count:count + (jb%nsup * ia%nsup)) count = count + (jb%nsup * ia%nsup) ! jb_loop: do nsf_jb = 1, jb%nsup diff --git a/src/exx_types.f90 b/src/exx_types.f90 index a842ece92..4a37f6cb0 100644 --- a/src/exx_types.f90 +++ b/src/exx_types.f90 @@ -181,7 +181,7 @@ module exx_types end type store_eris ! Electron repulsion integrals - type(store_eris), dimension(:), allocatable :: eris + type(store_eris), dimension(:), allocatable, target :: eris type prim_atomic_data integer :: pr From aa983672b2b9224f1381bb387e8298c4b58e454d Mon Sep 17 00:00:00 2001 From: Connor Aird Date: Mon, 29 Apr 2024 16:02:15 +0100 Subject: [PATCH 10/19] Add one to count to prevent repeating store indices --- src/exx_kernel_default.f90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/exx_kernel_default.f90 b/src/exx_kernel_default.f90 index bb10ae4e6..40f3ee016 100644 --- a/src/exx_kernel_default.f90 +++ b/src/exx_kernel_default.f90 @@ -1608,7 +1608,7 @@ subroutine m_kern_exx_eri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & ! ! Point at the next block of eris to store and update counter store_eris_inner(1:jb%nsup, 1:ia%nsup) => eris(kpart)%store_eris(count:count + (jb%nsup * ia%nsup)) - count = count + (jb%nsup * ia%nsup) + count = count + (jb%nsup * ia%nsup) + 1 ! jb_loop: do nsf_jb = 1, jb%nsup ! From 19bee0d715ec7649c2dd40ca9da4fbf5918ca7fe Mon Sep 17 00:00:00 2001 From: Connor Aird Date: Mon, 29 Apr 2024 16:16:29 +0100 Subject: [PATCH 11/19] Swap store_eris_inner dimensions to match loops --- src/exx_kernel_default.f90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/exx_kernel_default.f90 b/src/exx_kernel_default.f90 index 40f3ee016..8286da1b9 100644 --- a/src/exx_kernel_default.f90 +++ b/src/exx_kernel_default.f90 @@ -1607,7 +1607,7 @@ subroutine m_kern_exx_eri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & Ome_kj(1:2*extent+1, 1:2*extent+1, 1:2*extent+1) => Ome_kj_1d_buffer ! ! Point at the next block of eris to store and update counter - store_eris_inner(1:jb%nsup, 1:ia%nsup) => eris(kpart)%store_eris(count:count + (jb%nsup * ia%nsup)) + store_eris_inner(1:ia%nsup, 1:jb%nsup) => eris(kpart)%store_eris(count:count + (jb%nsup * ia%nsup)) count = count + (jb%nsup * ia%nsup) + 1 ! jb_loop: do nsf_jb = 1, jb%nsup From f9b048f8058d07eff1dc419064d7296dccd7868c Mon Sep 17 00:00:00 2001 From: Connor Aird Date: Tue, 30 Apr 2024 17:23:38 +0100 Subject: [PATCH 12/19] Pass correct multiplier into shared cri and eri subroutine --- src/exx_kernel_default.f90 | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/exx_kernel_default.f90 b/src/exx_kernel_default.f90 index 8286da1b9..ccb3d5a56 100644 --- a/src/exx_kernel_default.f90 +++ b/src/exx_kernel_default.f90 @@ -904,7 +904,7 @@ end subroutine get_X_matrix ! ! To ensure thread safety, variables which are altered must be passed in as parameters rather than imported. ! TODO: Change name to something more descriptive - subroutine cri_eri_inner_calculation(nsf1_array, phi_i, Ome_kj, nsf1, nsf2, kpart, dv, & + subroutine cri_eri_inner_calculation(nsf1_array, phi_i, Ome_kj, nsf1, nsf2, kpart, multiplier, & ncaddr, ncbeg, ia_nsup, ewald_charge, work_out_3d, work_in_3d, c, & backup_eris, store_eris_inner) @@ -923,7 +923,7 @@ subroutine cri_eri_inner_calculation(nsf1_array, phi_i, Ome_kj, nsf1, nsf2, kpar real(double), pointer, intent(in) :: Ome_kj(:,:,:), phi_i(:,:,:,:) integer, intent(in) :: kpart, nsf1, nsf2 ! The indices of the loops from which this function is called integer, intent(in) :: ncbeg, ia_nsup - real(double), intent(in) :: nsf1_array(:,:,:,:), dv + real(double), intent(in) :: nsf1_array(:,:,:,:), multiplier real(double), intent(out) :: ewald_charge, work_out_3d(:,:,:), work_in_3d(:,:,:) real(double), intent(inout) :: c(:) ! Backup eris parameters. Optional as they are only needed by eri function @@ -957,7 +957,7 @@ subroutine cri_eri_inner_calculation(nsf1_array, phi_i, Ome_kj, nsf1, nsf2, kpar do nsf3 = 1, ia_nsup ! ! Can we instead always store directly into store_eris_inner(nsf2, nsf3)? - exx_mat_elem = dot((2*extent+1)**3, phi_i(:,:,:,nsf3), 1, Ome_kj, 1) * dv + exx_mat_elem = dot((2*extent+1)**3, phi_i(:,:,:,nsf3), 1, Ome_kj, 1) * multiplier ! if ( backup_eris ) then ! @@ -1623,7 +1623,7 @@ subroutine m_kern_exx_eri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & jb_count = j_count + (nsf_jb - 1) jb_count = jb_count * (ia%nsup - 1) ! - call cri_eri_inner_calculation(phi_l, phi_i, Ome_kj, nsf_ld, nsf_jb, kpart, dv, & + call cri_eri_inner_calculation(phi_l, phi_i, Ome_kj, nsf_ld, nsf_jb, kpart, dv * K_val, & ncaddr, ncbeg, ia%nsup, ewald_charge, work_out_3d, work_in_3d, c, & backup_eris, store_eris_inner) ! From 7accbd9e1a546690179530bc9876b78b75893e21 Mon Sep 17 00:00:00 2001 From: Connor Aird Date: Tue, 30 Apr 2024 17:27:44 +0100 Subject: [PATCH 13/19] Add dv back in to signature --- src/exx_kernel_default.f90 | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/exx_kernel_default.f90 b/src/exx_kernel_default.f90 index ccb3d5a56..0c0a3b215 100644 --- a/src/exx_kernel_default.f90 +++ b/src/exx_kernel_default.f90 @@ -904,9 +904,9 @@ end subroutine get_X_matrix ! ! To ensure thread safety, variables which are altered must be passed in as parameters rather than imported. ! TODO: Change name to something more descriptive - subroutine cri_eri_inner_calculation(nsf1_array, phi_i, Ome_kj, nsf1, nsf2, kpart, multiplier, & - ncaddr, ncbeg, ia_nsup, ewald_charge, work_out_3d, work_in_3d, c, & - backup_eris, store_eris_inner) + subroutine cri_eri_inner_calculation(nsf1_array, phi_i, Ome_kj, nsf1, nsf2, kpart, dv, & + multiplier, ncaddr, ncbeg, ia_nsup, ewald_charge, work_out_3d, work_in_3d, & + c, backup_eris, store_eris_inner) use exx_poisson, only: exx_v_on_grid, exx_ewald_charge @@ -923,7 +923,7 @@ subroutine cri_eri_inner_calculation(nsf1_array, phi_i, Ome_kj, nsf1, nsf2, kpar real(double), pointer, intent(in) :: Ome_kj(:,:,:), phi_i(:,:,:,:) integer, intent(in) :: kpart, nsf1, nsf2 ! The indices of the loops from which this function is called integer, intent(in) :: ncbeg, ia_nsup - real(double), intent(in) :: nsf1_array(:,:,:,:), multiplier + real(double), intent(in) :: nsf1_array(:,:,:,:), dv, multiplier real(double), intent(out) :: ewald_charge, work_out_3d(:,:,:), work_in_3d(:,:,:) real(double), intent(inout) :: c(:) ! Backup eris parameters. Optional as they are only needed by eri function @@ -957,7 +957,7 @@ subroutine cri_eri_inner_calculation(nsf1_array, phi_i, Ome_kj, nsf1, nsf2, kpar do nsf3 = 1, ia_nsup ! ! Can we instead always store directly into store_eris_inner(nsf2, nsf3)? - exx_mat_elem = dot((2*extent+1)**3, phi_i(:,:,:,nsf3), 1, Ome_kj, 1) * multiplier + exx_mat_elem = dot((2*extent+1)**3, phi_i(:,:,:,nsf3), 1, Ome_kj, 1) * dv * multiplier ! if ( backup_eris ) then ! @@ -1258,8 +1258,8 @@ subroutine m_kern_exx_cri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & do nsf_kg = 1, kg%nsup do nsf_ld = 1, jb%nsup ! - call cri_eri_inner_calculation(Phy_k, phi_i, Ome_kj, nsf_kg, nsf_ld, kpart, dv, & - ncaddr, ncbeg, ia%nsup, ewald_charge, work_out_3d, work_in_3d, c, & + call cri_eri_inner_calculation(Phy_k, phi_i, Ome_kj, nsf_kg, nsf_ld, kpart, dv, 1.0d0, & + ncaddr, ncbeg, ia%nsup, ewald_charge, work_out_3d, work_in_3d, c, & .false.) ! end do ! nsf_ld = 1, jb%nsup @@ -1623,7 +1623,7 @@ subroutine m_kern_exx_eri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & jb_count = j_count + (nsf_jb - 1) jb_count = jb_count * (ia%nsup - 1) ! - call cri_eri_inner_calculation(phi_l, phi_i, Ome_kj, nsf_ld, nsf_jb, kpart, dv * K_val, & + call cri_eri_inner_calculation(phi_l, phi_i, Ome_kj, nsf_ld, nsf_jb, kpart, dv, K_val, & ncaddr, ncbeg, ia%nsup, ewald_charge, work_out_3d, work_in_3d, c, & backup_eris, store_eris_inner) ! From 71f6a79bc474841eb483e19c05adfb5e3c1ba2a9 Mon Sep 17 00:00:00 2001 From: Connor Aird Date: Wed, 1 May 2024 15:39:52 +0100 Subject: [PATCH 14/19] Fix incrementing counter --- src/exx_kernel_default.f90 | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/exx_kernel_default.f90 b/src/exx_kernel_default.f90 index 0c0a3b215..d7789906a 100644 --- a/src/exx_kernel_default.f90 +++ b/src/exx_kernel_default.f90 @@ -641,6 +641,7 @@ subroutine get_X_matrix( exxspin, scheme, backup_eris, niter, siter, level ) if(stat/=0) call cq_abort('Error allocating memory toeris/exx !',stat) call reg_alloc_mem(area_exx, nb_eris, type_int,'eris',unit_memory_write) eris(kpart)%filter_eris = .true. + ! ! Second dummy call for poor-man filtering of ERIs ! @@ -929,7 +930,6 @@ subroutine cri_eri_inner_calculation(nsf1_array, phi_i, Ome_kj, nsf1, nsf2, kpar ! Backup eris parameters. Optional as they are only needed by eri function logical, intent(in) :: backup_eris real(double), pointer, intent(inout), OPTIONAL :: store_eris_inner(:,:) - integer :: ncaddr, nsf3 real(double) :: exx_mat_elem @@ -962,7 +962,7 @@ subroutine cri_eri_inner_calculation(nsf1_array, phi_i, Ome_kj, nsf1, nsf2, kpar if ( backup_eris ) then ! ! eris(kpart)%store_eris( count ) = exx_mat_elem - store_eris_inner(nsf2, nsf3) = exx_mat_elem + store_eris_inner(nsf3, nsf2) = exx_mat_elem ! else ! @@ -1423,14 +1423,13 @@ subroutine m_kern_exx_eri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & ! dr = grid_spacing dv = dr**3 - count = 1 + count = 0 ! !!$ !!$ ****[ k loop ]**** !!$ k_loop: do k = 1, ahalo%nh_part(kpart) ! - write (*,*) kpart, ahalo%nh_part(kpart), k k_in_halo = ahalo%j_beg(kpart) + k - 1 k_in_part = ahalo%j_seq(k_in_halo) nbkbeg = ibaddr (k_in_part) @@ -1607,8 +1606,8 @@ subroutine m_kern_exx_eri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & Ome_kj(1:2*extent+1, 1:2*extent+1, 1:2*extent+1) => Ome_kj_1d_buffer ! ! Point at the next block of eris to store and update counter - store_eris_inner(1:ia%nsup, 1:jb%nsup) => eris(kpart)%store_eris(count:count + (jb%nsup * ia%nsup)) - count = count + (jb%nsup * ia%nsup) + 1 + store_eris_inner(1:ia%nsup, 1:jb%nsup) => eris(kpart)%store_eris(count+1:count + (jb%nsup * ia%nsup)) + count = count + (jb%nsup * ia%nsup) ! jb_loop: do nsf_jb = 1, jb%nsup ! From 546d61c7b3a3993c46f469d1a108a85578de7f8c Mon Sep 17 00:00:00 2001 From: Connor Aird Date: Wed, 15 May 2024 15:00:31 +0100 Subject: [PATCH 15/19] Tidy up m_kern_exx_eri and correct phi_k index in cri_eri_inner_calculation --- src/exx_kernel_default.f90 | 114 +++---------------------------------- 1 file changed, 8 insertions(+), 106 deletions(-) diff --git a/src/exx_kernel_default.f90 b/src/exx_kernel_default.f90 index d7789906a..c71c4069e 100644 --- a/src/exx_kernel_default.f90 +++ b/src/exx_kernel_default.f90 @@ -905,15 +905,14 @@ end subroutine get_X_matrix ! ! To ensure thread safety, variables which are altered must be passed in as parameters rather than imported. ! TODO: Change name to something more descriptive - subroutine cri_eri_inner_calculation(nsf1_array, phi_i, Ome_kj, nsf1, nsf2, kpart, dv, & + subroutine cri_eri_inner_calculation(nsf1_array, phi_i, Ome_kj, nsf1, nsf2, nsf_kg, dv, & multiplier, ncaddr, ncbeg, ia_nsup, ewald_charge, work_out_3d, work_in_3d, & c, backup_eris, store_eris_inner) use exx_poisson, only: exx_v_on_grid, exx_ewald_charge use exx_types, only: phi_j, phi_k, ewald_rho, p_gauss, w_gauss, reckernel_3d, ewald_pot, & - pulay_radius, p_ngauss, r_int, p_omega, exx_psolver, exx_pscheme, extent, eris, & - store_eris + pulay_radius, p_ngauss, r_int, p_omega, exx_psolver, exx_pscheme, extent, store_eris use GenBlas, only: dot @@ -922,7 +921,7 @@ subroutine cri_eri_inner_calculation(nsf1_array, phi_i, Ome_kj, nsf1, nsf2, kpar implicit none real(double), pointer, intent(in) :: Ome_kj(:,:,:), phi_i(:,:,:,:) - integer, intent(in) :: kpart, nsf1, nsf2 ! The indices of the loops from which this function is called + integer, intent(in) :: nsf1, nsf2, nsf_kg ! The indices of the loops from which this function is called integer, intent(in) :: ncbeg, ia_nsup real(double), intent(in) :: nsf1_array(:,:,:,:), dv, multiplier real(double), intent(out) :: ewald_charge, work_out_3d(:,:,:), work_in_3d(:,:,:) @@ -950,7 +949,7 @@ subroutine cri_eri_inner_calculation(nsf1_array, phi_i, Ome_kj, nsf1, nsf2, kpar work_out_3d = work_out_3d + ewald_pot*ewald_charge end if ! - Ome_kj = work_out_3d * phi_k(:,:,:,nsf1) + Ome_kj = work_out_3d * phi_k(:,:,:,nsf_kg) ! ncaddr = ncbeg + ia_nsup * (nsf2 - 1) ! @@ -1258,7 +1257,7 @@ subroutine m_kern_exx_cri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & do nsf_kg = 1, kg%nsup do nsf_ld = 1, jb%nsup ! - call cri_eri_inner_calculation(Phy_k, phi_i, Ome_kj, nsf_kg, nsf_ld, kpart, dv, 1.0d0, & + call cri_eri_inner_calculation(Phy_k, phi_i, Ome_kj, nsf_kg, nsf_ld, nsf_kg, dv, 1.0d0, & ncaddr, ncbeg, ia%nsup, ewald_charge, work_out_3d, work_in_3d, c, & .false.) ! @@ -1393,7 +1392,6 @@ subroutine m_kern_exx_eri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & real(double), dimension(3) :: xyz_zero = zero ! real(double) :: dr,dv,K_val - real(double) :: exx_mat_elem ! ! We allocate pointers here to point at 1D arrays later and allow contiguous access when passing to BLAS dot later real(double), pointer :: phi_i(:,:,:,:), Ome_kj(:,:,:), store_eris_inner(:,:) @@ -1403,23 +1401,7 @@ subroutine m_kern_exx_eri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & type(neigh_atomic_data) :: kg !k_gamma type(neigh_atomic_data) :: ld !l_delta ! - integer :: nsf_kg, nsf_ld, nsf_ia, nsf_jb - integer :: r, s, t, stat - integer :: k_count, l_count, ld_count, kg_count, i_count, j_count, jb_count, count - ! - ! GTO - integer :: i_nx, j_nx, k_nx, l_nx - integer :: i_ny, j_ny, k_ny, l_ny - integer :: i_nz, j_nz, k_nz, l_nz - character(len=8) :: i_nt, j_nt, k_nt, l_nt - integer :: ia_gto, jb_gto, kg_gto, ld_gto - real(double) :: ai, aj, ak, al, di, dj, dk, dl - real(double) :: i_norm, j_norm, k_norm, l_norm - !real(double) :: xi, xj, xk, xl - !real(double) :: yi, yj, yk, yl - !real(double) :: zi, zj, zk, zl - - real(double) :: eri_gto, eri_pao, test + integer :: nsf_kg, nsf_ld, nsf_jb, count ! dr = grid_spacing dv = dr**3 @@ -1460,12 +1442,6 @@ subroutine m_kern_exx_eri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & ! nbbeg = nb_nd_kbeg ! - !print*, 'size jbnab2ch', size(jbnab2ch) - !print*, 'jbnab2ch', jbnab2ch - !print* - ! - ! The current state of count - k_count = (k - 1) * (nbnab(k_in_part) - 1) !!$ !!$ ****[ l do loop ]**** !!$ @@ -1484,27 +1460,10 @@ subroutine m_kern_exx_eri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & call exx_phi_on_grid(inode,ld%global_num,ld%spec,extent, & ld%xyz,ld%nsup,phi_l,r_int,xyz_zero) ! - !xl = ld%xyz_cv(1) - !yl = ld%xyz_cv(2) - !zl = ld%xyz_cv(3) - ! - ! The current state of count - ! l_count = (k - 1) * nbnab(k_in_part) * ld%nsup + & - ! (l - 1) * ld%nsup - l_count = k_count + (l - 1) - l_count = l_count * (ld%nsup - 1) - ! ld_loop: do nsf_ld = 1, ld%nsup ! nbaddr = nbbeg + kg%nsup * (nsf_ld - 1) ! - ! The current state of count - ! count = (k - 1) * nbnab(k_in_part) * ld%nsup * kg%nsup + & - ! (l - 1) * ld%nsup * kg%nsup + & - ! (nsf_ld - 1) * kg%nsup - ld_count = l_count + (nsf_ld - 1) - ld_count = ld_count * (kg%nsup - 1) - ! kg_loop: do nsf_kg = 1, kg%nsup ! if ( backup_eris ) then @@ -1513,14 +1472,6 @@ subroutine m_kern_exx_eri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & K_val = b(nbaddr+nsf_kg-1) end if ! - ! The current state of count - ! kg_count = (k - 1) * nbnab(k_in_part) * ld%nsup * kg%nsup * at%n_hnab(k_in_halo) + & - ! (l - 1) * ld%nsup * kg%nsup * at%n_hnab(k_in_halo) + & - ! (nsf_ld - 1) * kg%nsup * at%n_hnab(k_in_halo) + & - ! (nsf_kg - 1) * at%n_hnab(k_in_halo) - kg_count = ld_count + (nsf_kg - 1) - kg_count = kg_count * (at%n_hnab(k_in_halo) - 1) - ! !!$ !!$ ****[ i loop ]**** !!$ @@ -1530,42 +1481,21 @@ subroutine m_kern_exx_eri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & ni = bundle%iprim_seq (i_in_prim) np = bundle%iprim_part(i_in_prim) icad = (i_in_prim - 1) * chalo%ni_in_halo !*** - !nbbeg = nb_nd_kbeg ! call get_iprimdat(ia,kg,ni,i_in_prim,np,.true.,unit_exx_debug) ! - !print*, 'i',i, 'global_num',ia%ip,'spe',ia%spec, 'nsup', ia%nsup - ! if ( exx_alloc ) call exx_mem_alloc(extent,ia%nsup,0,'phi_i_1d_buffer','alloc') phi_i(1:2*extent+1, 1:2*extent+1, 1:2*extent+1, 1:ia%nsup) => phi_i_1d_buffer ! call exx_phi_on_grid(inode,ia%ip,ia%spec,extent, & ia%xyz,ia%nsup,phi_i,r_int,xyz_zero) ! - !xi = ia%xyz_ip(1) - !yi = ia%xyz_ip(2) - !zi = ia%xyz_ip(3) - - !print*, size(chalo%i_h2d), shape(chalo%i_h2d) - ! - ! The current state of count - ! i_count = (k - 1) * nbnab(k_in_part) * ld%nsup * kg%nsup * at%n_hnab(k_in_halo) * nbnab(k_in_part) + & - ! (l - 1) * ld%nsup * kg%nsup * at%n_hnab(k_in_halo) * nbnab(k_in_part) + & - ! (nsf_ld - 1) * kg%nsup * at%n_hnab(k_in_halo) * nbnab(k_in_part) + & - ! (nsf_kg - 1) * at%n_hnab(k_in_halo) * nbnab(k_in_part) + & - ! (i - 1) * nbnab(k_in_part) - i_count = kg_count + (i - 1) - i_count = i_count * (nbnab(k_in_part) - 1) - ! !!$ !!$ ****[ j loop ]**** !!$ - j_loop: do j = 1, nbnab(k_in_part)!mat(np,Xrange)%n_nab(ni) - !nbbeg = nb_nd_kbeg + j_loop: do j = 1, nbnab(k_in_part) j_in_halo = jbnab2ch(j) !*** ! - !print*, j, icad, j_in_halo - ! if ( j_in_halo /= 0 ) then ! ncbeg = chalo%i_h2d(icad + j_in_halo) !*** @@ -1577,9 +1507,6 @@ subroutine m_kern_exx_eri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & call get_halodat(jb,kg,jseq,chalo%i_hbeg(jpart), & BCS_parts%lab_cell(BCS_parts%inv_lab_cover(jpart)), & 'j',.true.,unit_exx_debug) - ! - !print*, 'j',j, 'global_num',jb%global_num,'spe',jb%spec,'nsup', jb%nsup - ! if ( exx_alloc ) call exx_mem_alloc(extent,jb%nsup,0,'phi_j','alloc') ! @@ -1588,20 +1515,6 @@ subroutine m_kern_exx_eri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & ! if ( exx_alloc ) call exx_mem_alloc(extent,0,0,'Ome_kj_1d_buffer','alloc') ! - !xj = jb%xyz_cv(1) - !yj = jb%xyz_cv(2) - !zj = jb%xyz_cv(3) - ! - ! The current state of count - ! j_count = (k - 1) * nbnab(k_in_part) * ld%nsup * kg%nsup * at%n_hnab(k_in_halo) * nbnab(k_in_part) * jb%nsup + & - ! (l - 1) * ld%nsup * kg%nsup * at%n_hnab(k_in_halo) * nbnab(k_in_part) * jb%nsup + & - ! (nsf_ld - 1) * kg%nsup * at%n_hnab(k_in_halo) * nbnab(k_in_part) * jb%nsup + & - ! (nsf_kg - 1) * at%n_hnab(k_in_halo) * nbnab(k_in_part) * jb%nsup + & - ! (i - 1) * nbnab(k_in_part) * jb%nsup + & - ! (j - 1) * jb%nsup + & - j_count = i_count + (j - 1) - j_count = j_count * (jb%nsup - 1) - ! ! TODO include bounds in Ome_kj_1d_buffer and store_eris Ome_kj(1:2*extent+1, 1:2*extent+1, 1:2*extent+1) => Ome_kj_1d_buffer ! @@ -1611,18 +1524,7 @@ subroutine m_kern_exx_eri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & ! jb_loop: do nsf_jb = 1, jb%nsup ! - ! The current state of count - ! jb_count = (k - 1) * nbnab(k_in_part) * ld%nsup * kg%nsup * at%n_hnab(k_in_halo) * nbnab(k_in_part) * jb%nsup * ia%nsup + & - ! (l - 1) * ld%nsup * kg%nsup * at%n_hnab(k_in_halo) * nbnab(k_in_part) * jb%nsup * ia%nsup + & - ! (nsf_ld - 1) * kg%nsup * at%n_hnab(k_in_halo) * nbnab(k_in_part) * jb%nsup * ia%nsup + & - ! (nsf_kg - 1) * at%n_hnab(k_in_halo) * nbnab(k_in_part) * jb%nsup * ia%nsup + & - ! (i - 1) * nbnab(k_in_part) * jb%nsup * ia%nsup + & - ! (j - 1) * jb%nsup * ia%nsup + & - ! (nsf_jb - 1) * ia%nsup - jb_count = j_count + (nsf_jb - 1) - jb_count = jb_count * (ia%nsup - 1) - ! - call cri_eri_inner_calculation(phi_l, phi_i, Ome_kj, nsf_ld, nsf_jb, kpart, dv, K_val, & + call cri_eri_inner_calculation(phi_l, phi_i, Ome_kj, nsf_ld, nsf_jb, nsf_kg, dv, K_val, & ncaddr, ncbeg, ia%nsup, ewald_charge, work_out_3d, work_in_3d, c, & backup_eris, store_eris_inner) ! From 2876497d3eed3d5dbc8f81c6e3cd6a8311e4ef0a Mon Sep 17 00:00:00 2001 From: Connor Aird Date: Fri, 17 May 2024 13:39:32 +0100 Subject: [PATCH 16/19] Add mac homebrew system.make --- src/system/system.mac.make | 56 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 src/system/system.mac.make diff --git a/src/system/system.mac.make b/src/system/system.mac.make new file mode 100644 index 000000000..99a0e1434 --- /dev/null +++ b/src/system/system.mac.make @@ -0,0 +1,56 @@ +# This is an example system-specific makefile. You will need to adjust +# it for the actual system you are running on. + +# Set compilers +FC=/opt/homebrew/bin/mpifort + +# OpenMP flags +# Set this to "OMPFLAGS= " if compiling without openmp +# Set this to "OMPFLAGS= -fopenmp" if compiling with openmp +OMPFLAGS= -fopenmp +# Set this to "OMP_DUMMY = DUMMY" if compiling without openmp +# Set this to "OMP_DUMMY = " if compiling with openmp +OMP_DUMMY = + +# Set BLAS and LAPACK libraries +# MacOS X +# BLAS= -lvecLibFort +# Intel MKL use the Intel tool +# Generic +BLAS= -llapack -lblas +# Full scalapack library call; remove -lscalapack if using dummy diag module. +# If using OpenMPI, use -lscalapack-openmpi instead. +# If using Cray-libsci, use -llibsci_cray_mpi instead. +SCALAPACK = -lscalapack + +# LibXC: choose between LibXC compatibility below or Conquest XC library + +# Conquest XC library +#XC_LIBRARY = CQ +#XC_LIB = +#XC_COMPFLAGS = + +# LibXC compatibility +# Choose LibXC version: v4 (deprecated) or v5/6 (v5 and v6 have the same interface) +#XC_LIBRARY = LibXC_v4 +XC_LIBRARY = LibXC_v5 +XC_LIB = -lxcf90 -lxc +XC_COMPFLAGS = -I/opt/homebrew/Cellar/libxc/6.2.2/include + +# Set FFT library +FFT_LIB=-lfftw3 +FFT_OBJ=fft_fftw3.o + +LIBS= $(FFT_LIB) $(XC_LIB) $(SCALAPACK) $(BLAS) + +# Compilation flags +# NB for gcc10 you need to add -fallow-argument-mismatch +COMPFLAGS= -fallow-argument-mismatch -O3 $(OMPFLAGS) $(XC_COMPFLAGS) -I/opt/homebrew/Cellar/openblas/0.3.27/include -I/opt/homebrew/Cellar/lapack/3.12.0/include -I/opt/homebrew/Cellar/fftw/3.3.10_1/include + +# Linking flags +LINKFLAGS= $(OMPFLAGS) -L/opt/homebrew/Cellar/openblas/0.3.27/lib -L/opt/homebrew/Cellar/lapack/3.12.0/lib -L/opt/homebrew/Cellar/fftw/3.3.10_1/lib -L/opt/homebrew/Cellar/libxc/6.2.2/lib -L/opt/homebrew/Cellar/scalapack/2.2.0_1/lib + +# Matrix multiplication kernel type +MULT_KERN = default +# Use dummy DiagModule or not +DIAG_DUMMY = From 0a1372b894211b714ffebc248b97c90d1392270e Mon Sep 17 00:00:00 2001 From: Connor Aird Date: Fri, 17 May 2024 17:29:27 +0100 Subject: [PATCH 17/19] Tidy up cri omp call and add omp parallel to eri kernel --- src/exx_kernel_default.f90 | 44 ++++++++++++++++++++++---------------- 1 file changed, 26 insertions(+), 18 deletions(-) diff --git a/src/exx_kernel_default.f90 b/src/exx_kernel_default.f90 index c71c4069e..f42aa7430 100644 --- a/src/exx_kernel_default.f90 +++ b/src/exx_kernel_default.f90 @@ -882,8 +882,10 @@ subroutine get_X_matrix( exxspin, scheme, backup_eris, niter, siter, level ) ! !call io_close(unit_eri_debug) !call io_close(unit_exx_debug) - call io_close(unit_memory_write) - call io_close(unit_timers_write) + if (fdf_boolean('IO.WriteOutToFile',.true.)) then + call io_close(unit_memory_write) + call io_close(unit_timers_write) + end if ! ! end if @@ -1086,7 +1088,7 @@ subroutine m_kern_exx_cri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & type(neigh_atomic_data) :: kg !k_gamma type(neigh_atomic_data) :: ld !l_delta ! - integer :: maxsuppfuncs, nsf_kg, nsf_ld + integer :: maxsuppfuncs, nsf_kg, nsf_ld, nsf_jb integer :: r, s, t, count ! ! @@ -1246,18 +1248,16 @@ subroutine m_kern_exx_cri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & ! Begin the parallel region here as earlier allocations make it difficult to do before now. ! However, this should be possible in future work. ! - !$omp parallel default(none) reduction(+: c) & - !$omp shared(kg,jb,tmr_std_exx_poisson,tmr_std_exx_nsup,Phy_k,phi_j,phi_k,ncbeg,ia,kpart, & - !$omp tmr_std_exx_matmult,ewald_pot,phi_i,exx_psolver,exx_pscheme,extent,dv, & - !$omp ewald_rho,inode,pulay_radius,p_omega,p_gauss,w_gauss,reckernel_3d,r_int) & - !$omp private(nsf_kg,nsf_ld,work_out_3d,work_in_3d,ewald_charge,Ome_kj_1d_buffer,Ome_kj, & - !$omp ncaddr,exx_mat_elem,r,s,t) + !$omp parallel default(none) reduction(+: c) & + !$omp shared(kg,jb,Phy_k,ncbeg,ia,phi_i,extent,dv) & + !$omp private(nsf_kg,nsf_jb,work_out_3d,work_in_3d,ewald_charge,Ome_kj_1d_buffer, & + !$omp Ome_kj,ncaddr) Ome_kj(1:2*extent+1, 1:2*extent+1, 1:2*extent+1) => Ome_kj_1d_buffer !$omp do schedule(runtime) collapse(2) do nsf_kg = 1, kg%nsup - do nsf_ld = 1, jb%nsup + do nsf_jb = 1, jb%nsup ! - call cri_eri_inner_calculation(Phy_k, phi_i, Ome_kj, nsf_kg, nsf_ld, nsf_kg, dv, 1.0d0, & + call cri_eri_inner_calculation(Phy_k, phi_i, Ome_kj, nsf_kg, nsf_jb, nsf_kg, dv, 1.0d0, & ncaddr, ncbeg, ia%nsup, ewald_charge, work_out_3d, work_in_3d, c, & .false.) ! @@ -1345,7 +1345,7 @@ subroutine m_kern_exx_eri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & ewald_charge, ewald_rho, ewald_pot, & pulay_radius, p_omega, p_ngauss, p_gauss, w_gauss, & exx_psolver,exx_pscheme, & - unit_exx_debug, unit_eri_debug + unit_exx_debug, unit_eri_debug, tmr_std_exx_nsup ! use exx_types, only: phi_i_1d_buffer, phi_j, phi_k, phi_l, & Ome_kj_1d_buffer, work_in_3d, work_out_3d, eris @@ -1515,13 +1515,19 @@ subroutine m_kern_exx_eri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & ! if ( exx_alloc ) call exx_mem_alloc(extent,0,0,'Ome_kj_1d_buffer','alloc') ! - ! TODO include bounds in Ome_kj_1d_buffer and store_eris - Ome_kj(1:2*extent+1, 1:2*extent+1, 1:2*extent+1) => Ome_kj_1d_buffer - ! ! Point at the next block of eris to store and update counter store_eris_inner(1:ia%nsup, 1:jb%nsup) => eris(kpart)%store_eris(count+1:count + (jb%nsup * ia%nsup)) count = count + (jb%nsup * ia%nsup) ! + call start_timer(tmr_std_exx_nsup) + ! + !$omp parallel default(none) reduction(+: c,store_eris_inner) & + !$omp shared(jb,ncbeg,ia,phi_l,phi_i,extent,dv,eris,K_val,backup_eris) & + !$omp private(nsf_kg,nsf_ld,nsf_jb,work_out_3d,work_in_3d,ewald_charge,Ome_kj_1d_buffer, & + !$omp Ome_kj,ncaddr) + ! TODO include bounds in Ome_kj_1d_buffer and store_eris + Ome_kj(1:2*extent+1, 1:2*extent+1, 1:2*extent+1) => Ome_kj_1d_buffer + !$omp do schedule(runtime) jb_loop: do nsf_jb = 1, jb%nsup ! call cri_eri_inner_calculation(phi_l, phi_i, Ome_kj, nsf_ld, nsf_jb, nsf_kg, dv, K_val, & @@ -1530,13 +1536,15 @@ subroutine m_kern_exx_eri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & ! end do jb_loop ! + call stop_timer(tmr_std_exx_nsup,.true.) + ! + if ( exx_alloc ) call exx_mem_alloc(extent,0,0,'Ome_kj_1d_buffer','dealloc') + if ( exx_alloc ) call exx_mem_alloc(extent,jb%nsup,0,'phi_j','dealloc') + ! end if ! end if ! - if ( exx_alloc ) call exx_mem_alloc(extent,0,0,'Ome_kj_1d_buffer','dealloc') - if ( exx_alloc ) call exx_mem_alloc(extent,jb%nsup,0,'phi_j','dealloc') - ! !!$ !!$ ****[ j end loop ]**** !!$ From ec4f1d9ea98a693fc8723e852ad47498ddf3bd81 Mon Sep 17 00:00:00 2001 From: Connor Aird Date: Mon, 20 May 2024 17:26:53 +0100 Subject: [PATCH 18/19] WIP add omp threading tojb_loop for ei kernel --- src/exx_kernel_default.f90 | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/src/exx_kernel_default.f90 b/src/exx_kernel_default.f90 index f42aa7430..d760ccf92 100644 --- a/src/exx_kernel_default.f90 +++ b/src/exx_kernel_default.f90 @@ -909,7 +909,7 @@ end subroutine get_X_matrix ! TODO: Change name to something more descriptive subroutine cri_eri_inner_calculation(nsf1_array, phi_i, Ome_kj, nsf1, nsf2, nsf_kg, dv, & multiplier, ncaddr, ncbeg, ia_nsup, ewald_charge, work_out_3d, work_in_3d, & - c, backup_eris, store_eris_inner) + c, backup_eris, store_eris_ptr) use exx_poisson, only: exx_v_on_grid, exx_ewald_charge @@ -930,7 +930,7 @@ subroutine cri_eri_inner_calculation(nsf1_array, phi_i, Ome_kj, nsf1, nsf2, nsf_ real(double), intent(inout) :: c(:) ! Backup eris parameters. Optional as they are only needed by eri function logical, intent(in) :: backup_eris - real(double), pointer, intent(inout), OPTIONAL :: store_eris_inner(:,:) + real(double), pointer, intent(inout), OPTIONAL :: store_eris_ptr(:,:) integer :: ncaddr, nsf3 real(double) :: exx_mat_elem @@ -957,13 +957,13 @@ subroutine cri_eri_inner_calculation(nsf1_array, phi_i, Ome_kj, nsf1, nsf2, nsf_ ! do nsf3 = 1, ia_nsup ! - ! Can we instead always store directly into store_eris_inner(nsf2, nsf3)? + ! Can we instead always store directly into store_eris_ptr(nsf2, nsf3)? exx_mat_elem = dot((2*extent+1)**3, phi_i(:,:,:,nsf3), 1, Ome_kj, 1) * dv * multiplier ! if ( backup_eris ) then ! ! eris(kpart)%store_eris( count ) = exx_mat_elem - store_eris_inner(nsf3, nsf2) = exx_mat_elem + store_eris_ptr(nsf3, nsf2) = exx_mat_elem ! else ! @@ -1394,7 +1394,7 @@ subroutine m_kern_exx_eri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & real(double) :: dr,dv,K_val ! ! We allocate pointers here to point at 1D arrays later and allow contiguous access when passing to BLAS dot later - real(double), pointer :: phi_i(:,:,:,:), Ome_kj(:,:,:), store_eris_inner(:,:) + real(double), pointer :: phi_i(:,:,:,:), Ome_kj(:,:,:), store_eris_ptr(:,:) ! type(prim_atomic_data) :: ia !i_alpha type(neigh_atomic_data) :: jb !j_beta @@ -1515,16 +1515,17 @@ subroutine m_kern_exx_eri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & ! if ( exx_alloc ) call exx_mem_alloc(extent,0,0,'Ome_kj_1d_buffer','alloc') ! + call start_timer(tmr_std_exx_nsup) + ! ! Point at the next block of eris to store and update counter - store_eris_inner(1:ia%nsup, 1:jb%nsup) => eris(kpart)%store_eris(count+1:count + (jb%nsup * ia%nsup)) + store_eris_ptr(1:ia%nsup, 1:jb%nsup) => eris(kpart)%store_eris(count+1:count + (jb%nsup * ia%nsup)) count = count + (jb%nsup * ia%nsup) ! - call start_timer(tmr_std_exx_nsup) + !$omp parallel default(none) reduction(+: c) & + !$omp shared(nsf_kg,nsf_ld,jb,ncbeg,ia,phi_k,phi_j,phi_l,phi_i,extent,dv,eris,K_val,backup_eris, & + !$omp phi_i_1d_buffer,kpart,store_eris_ptr) & + !$omp private(nsf_jb,work_out_3d,work_in_3d,ewald_charge,Ome_kj_1d_buffer,Ome_kj,ncaddr) ! - !$omp parallel default(none) reduction(+: c,store_eris_inner) & - !$omp shared(jb,ncbeg,ia,phi_l,phi_i,extent,dv,eris,K_val,backup_eris) & - !$omp private(nsf_kg,nsf_ld,nsf_jb,work_out_3d,work_in_3d,ewald_charge,Ome_kj_1d_buffer, & - !$omp Ome_kj,ncaddr) ! TODO include bounds in Ome_kj_1d_buffer and store_eris Ome_kj(1:2*extent+1, 1:2*extent+1, 1:2*extent+1) => Ome_kj_1d_buffer !$omp do schedule(runtime) @@ -1532,9 +1533,11 @@ subroutine m_kern_exx_eri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & ! call cri_eri_inner_calculation(phi_l, phi_i, Ome_kj, nsf_ld, nsf_jb, nsf_kg, dv, K_val, & ncaddr, ncbeg, ia%nsup, ewald_charge, work_out_3d, work_in_3d, c, & - backup_eris, store_eris_inner) + backup_eris, store_eris_ptr) ! end do jb_loop + !$omp end do + !$omp end parallel ! call stop_timer(tmr_std_exx_nsup,.true.) ! From c0e36f63344157085449b796e06cd6d1137d9bcf Mon Sep 17 00:00:00 2001 From: Connor Aird Date: Tue, 21 May 2024 17:13:51 +0100 Subject: [PATCH 19/19] Remove unused variables and comments --- src/exx_kernel_default.f90 | 376 +++++++------------------------------ 1 file changed, 69 insertions(+), 307 deletions(-) diff --git a/src/exx_kernel_default.f90 b/src/exx_kernel_default.f90 index d760ccf92..e2e29f55c 100644 --- a/src/exx_kernel_default.f90 +++ b/src/exx_kernel_default.f90 @@ -103,7 +103,7 @@ subroutine get_X_matrix( exxspin, scheme, backup_eris, niter, siter, level ) use GenComms, only: my_barrier, cq_abort, mtime use multiply_module, only: prefetch ! - use matrix_data, only: mat, Hrange, Srange, Xrange, SXrange, rcut + use matrix_data, only: Hrange, Srange, Xrange, SXrange use mult_module, only: S_X_SX, mat_p, mult use mult_module, only: matX, matK, matrix_scale, matrix_trace use mult_module, only: matrix_product_trace, matrix_product_trace_length @@ -130,16 +130,13 @@ subroutine get_X_matrix( exxspin, scheme, backup_eris, niter, siter, level ) file_exx_debug, file_eri_debug, & file_eri_filter_debug, sum_eri_gto - use exx_types, only: exx_alloc - - use exx_types, only: grid_spacing, r_int, extent, & + use exx_types, only: exx_alloc, r_int, extent, & ewald_alpha, ewald_rho, ewald_pot, & pulay_radius, p_omega, p_ngauss, p_gauss, w_gauss, & exx_psolver,exx_pscheme, kernel, & exx_total_time, eris, exx_filter, exx_gto ! use exx_poisson, only: exx_scal_rho_3d, exx_ewald_rho, exx_ewald_pot - use exx_types, only: isf_order, ngrid ! use input_module, only: fdf_boolean ! @@ -180,7 +177,7 @@ subroutine get_X_matrix( exxspin, scheme, backup_eris, niter, siter, level ) integer, dimension(MPI_STATUS_SIZE) :: mpi_stat integer :: offset,sends,i,j - logical :: get_exx, exist + logical :: get_exx real(double) :: t0,t1 integer :: maxsuppfuncs, nb_eris @@ -191,7 +188,7 @@ subroutine get_X_matrix( exxspin, scheme, backup_eris, niter, siter, level ) ! !type(store_eris), dimension(:), allocatable :: eris ! - real(double) :: xyz_ghost(3), r_ghost, t0_test, t1_test + real(double) :: xyz_ghost(3), r_ghost !character(len=20) :: filename3, filename4, filename5, filename6 type(cq_timer) :: backtrace_timer @@ -556,41 +553,27 @@ subroutine get_X_matrix( exxspin, scheme, backup_eris, niter, siter, level ) call m_kern_exx_cri( k_off,kpart,ib_nd_acc_rem,ibind_rem,nbnab_rem,& ibpart_rem,ibseq_rem,ibndimj_rem, & - !atrans, & b_rem, & mat_p(matX( exxspin ))%matrix, & mult(S_X_SX)%ahalo,mult(S_X_SX)%chalo,mult(S_X_SX)%ltrans, & mult(S_X_SX)%bmat( exxspin )%mx_abs,mult(S_X_SX)%parts%mx_mem_grp, & - mult(S_X_SX)%prim%mx_iprim, & - !lena, & lenb_rem, & mat_p(matX( exxspin ))%length) - !print*, ' mat X ', shape( mat_p(matX( exxspin ))%matrix) - !print*, ' mat K ', shape( mat_p(matK( exxspin ))%matrix) - else if ( scheme == 2 ) then if (iprint_exx > 5) write(io_lun,*) 'Proc :', myid, & 'EXX: performing on-the-fly ERI calculation on kpart =', kpart - !call cpu_time(t0_test) - call m_kern_exx_eri( k_off,kpart,ib_nd_acc_rem,ibind_rem,nbnab_rem,& - ibpart_rem,ibseq_rem,ibndimj_rem, & - !atrans, & + ibpart_rem,ibseq_rem, & b_rem, & mat_p(matX( exxspin ))%matrix, & mult(S_X_SX)%ahalo,mult(S_X_SX)%chalo,mult(S_X_SX)%ltrans, & mult(S_X_SX)%bmat( exxspin )%mx_abs,mult(S_X_SX)%parts%mx_mem_grp, & - mult(S_X_SX)%prim%mx_iprim, & - !lena, & lenb_rem, & mat_p(matX( exxspin ))%length, backup_eris) - !call cpu_time(t1_test) - !write(*,*) 'time =', t1_test - t0_test - else if (scheme == 3 ) then if ( niter == 1 ) then @@ -600,28 +583,17 @@ subroutine get_X_matrix( exxspin, scheme, backup_eris, niter, siter, level ) if (iprint_exx > 5) write(io_lun,*) 'Proc :', myid, & 'EXX: preparing store ERI calculation on kpart =', kpart ! - !if( myid==0 ) write(io_lun,*) 'EXX: rcut(Xrange) = ', rcut(Xrange) - !if( myid==0 ) write(io_lun,*) 'EXX: rcut(SXrange) = ', rcut(SXrange) - !if( myid==0 ) write(io_lun,*) 'EXX: rcut(Hrange) = ', rcut(Hrange) - !if( myid==0 ) write(io_lun,*) 'EXX: rcut(Srange) = ', rcut(Srange) - !if( myid==0 ) write(io_lun,*) 'EXX: rcut(SXrange) = ', rcut(SXrange) - ! ! First dummy call to get the number of ERIs on each proc ! call m_kern_exx_dummy( k_off,kpart,ib_nd_acc_rem,ibind_rem,nbnab_rem,& - ibpart_rem,ibseq_rem,ibndimj_rem, & - !atrans, & + ibpart_rem,ibseq_rem, & b_rem, & mat_p(matX( exxspin ))%matrix, & mult(S_X_SX)%ahalo,mult(S_X_SX)%chalo,mult(S_X_SX)%ltrans, & mult(S_X_SX)%bmat( exxspin )%mx_abs,mult(S_X_SX)%parts%mx_mem_grp, & - mult(S_X_SX)%prim%mx_iprim, & - !lena, & lenb_rem, & mat_p(matX( exxspin ))%length, nb_eris, get_exx, .false. ) - !if (iprint_exx > 5 .and. inode == ionode) write(io_lun,*) & - ! 'EXX: nb. or ERIs to allocate =', nb_eris if (iprint_exx > 5) write(io_lun,*) 'Proc :', myid, & 'EXX: allocate ERIs on kpart =', kpart @@ -651,53 +623,38 @@ subroutine get_X_matrix( exxspin, scheme, backup_eris, niter, siter, level ) 'EXX: setup filtering on kpart =', kpart ! call m_kern_exx_dummy( k_off,kpart,ib_nd_acc_rem,ibind_rem,nbnab_rem,& - ibpart_rem,ibseq_rem,ibndimj_rem, & - !atrans, & + ibpart_rem,ibseq_rem, & b_rem, & mat_p(matX( exxspin ))%matrix, & mult(S_X_SX)%ahalo,mult(S_X_SX)%chalo,mult(S_X_SX)%ltrans, & mult(S_X_SX)%bmat( exxspin )%mx_abs,mult(S_X_SX)%parts%mx_mem_grp, & - mult(S_X_SX)%prim%mx_iprim, & - !lena, & lenb_rem, & mat_p(matX( exxspin ))%length, nb_eris, get_exx, exx_filter ) end if ! - ! - ! should be a single call not embeded in the kpart loop... sorry for that - ! call fft3_init_wrapper( 2*extent+1 ) - ! if (iprint_exx > 5) write(io_lun,*) 'Proc :', myid, & 'EXX: compute and store ERIs on kpart =', kpart ! - !call fft3_init_wrapper( 2*extent+1 ) - ! ! Third call to compute and store ERIs ! if ( exx_gto ) then call m_kern_exx_eri_gto( k_off,kpart,ib_nd_acc_rem,ibind_rem,nbnab_rem,& - ibpart_rem,ibseq_rem,ibndimj_rem, & - !atrans, & + ibpart_rem,ibseq_rem, & b_rem, & mat_p(matX( exxspin ))%matrix, & mult(S_X_SX)%ahalo,mult(S_X_SX)%chalo,mult(S_X_SX)%ltrans, & mult(S_X_SX)%bmat( exxspin )%mx_abs,mult(S_X_SX)%parts%mx_mem_grp, & - mult(S_X_SX)%prim%mx_iprim, & - !lena, & lenb_rem, & mat_p(matX( exxspin ))%length, backup_eris) else call m_kern_exx_eri( k_off,kpart,ib_nd_acc_rem,ibind_rem,nbnab_rem,& - ibpart_rem,ibseq_rem,ibndimj_rem, & - !atrans, & + ibpart_rem,ibseq_rem, & b_rem, & mat_p(matX( exxspin ))%matrix, & mult(S_X_SX)%ahalo,mult(S_X_SX)%chalo,mult(S_X_SX)%ltrans, & mult(S_X_SX)%bmat( exxspin )%mx_abs,mult(S_X_SX)%parts%mx_mem_grp, & - mult(S_X_SX)%prim%mx_iprim, & - !lena, & lenb_rem, & mat_p(matX( exxspin ))%length, backup_eris) end if @@ -710,14 +667,11 @@ subroutine get_X_matrix( exxspin, scheme, backup_eris, niter, siter, level ) 'EXX: use stored ERIs to get X on kpart =', kpart call m_kern_exx_dummy( k_off,kpart,ib_nd_acc_rem,ibind_rem,nbnab_rem,& - ibpart_rem,ibseq_rem,ibndimj_rem, & - !atrans, & + ibpart_rem,ibseq_rem, & b_rem, & mat_p(matX( exxspin ))%matrix, & mult(S_X_SX)%ahalo,mult(S_X_SX)%chalo,mult(S_X_SX)%ltrans, & mult(S_X_SX)%bmat( exxspin )%mx_abs,mult(S_X_SX)%parts%mx_mem_grp, & - mult(S_X_SX)%prim%mx_iprim, & - !lena, & lenb_rem, & mat_p(matX( exxspin ))%length, nb_eris, get_exx, .false. ) end if @@ -730,14 +684,11 @@ subroutine get_X_matrix( exxspin, scheme, backup_eris, niter, siter, level ) 'EXX: dummy calculation on kpart =', kpart call m_kern_exx_dummy( k_off,kpart,ib_nd_acc_rem,ibind_rem,nbnab_rem,& - ibpart_rem,ibseq_rem,ibndimj_rem, & - !atrans, & + ibpart_rem,ibseq_rem, & b_rem, & mat_p(matX( exxspin ))%matrix, & mult(S_X_SX)%ahalo,mult(S_X_SX)%chalo,mult(S_X_SX)%ltrans, & mult(S_X_SX)%bmat( exxspin )%mx_abs,mult(S_X_SX)%parts%mx_mem_grp, & - mult(S_X_SX)%prim%mx_iprim, & - !lena, & lenb_rem, & mat_p(matX( exxspin ))%length, nb_eris, get_exx, .false. ) @@ -745,10 +696,6 @@ subroutine get_X_matrix( exxspin, scheme, backup_eris, niter, siter, level ) end do ! End of the kpart=1,ahalo%np_in_halo loop ! ! - ! #ifdef OMP_M - ! !$omp end do - ! !$omp end parallel - ! #end if ! call start_timer(tmr_std_exx_dealloc) if(allocated(b_rem)) deallocate(b_rem) @@ -786,16 +733,6 @@ subroutine get_X_matrix( exxspin, scheme, backup_eris, niter, siter, level ) ! call stop_timer(tmr_std_exx_kernel,.true.) ! - !exx = matrix_product_trace(matX(1),matK(1)) - !if (inode == ionode) then - ! print*, 'exx energy', -exx*1.d0/2.d0*0.25 - !end if - ! - !if ( exx_debug ) then - !call exx_write_tail(unit1,inode) - !end if - ! - ! if ( scheme > 0 ) then call exx_mem_alloc(extent,0,0,'work_3d' ,'dealloc') end if @@ -875,13 +812,7 @@ subroutine get_X_matrix( exxspin, scheme, backup_eris, niter, siter, level ) write(unit=unit_timers_write,fmt='("Timing: Proc ",i6,": Time spent in ", a50, " = ", & &f12.5," s")') inode, 'timer calls', tmr_std_exx%t_tot-exx_total_time write(unit_timers_write,*) - !call io_close(unit_matrix_write) - - !call io_close(unit_output_write) - !call io_close(unit_screen_write) ! - !call io_close(unit_eri_debug) - !call io_close(unit_exx_debug) if (fdf_boolean('IO.WriteOutToFile',.true.)) then call io_close(unit_memory_write) call io_close(unit_timers_write) @@ -890,8 +821,6 @@ subroutine get_X_matrix( exxspin, scheme, backup_eris, niter, siter, level ) ! end if - !close(200) - if ( exx_debug ) then call io_close(unit_eri_debug) call io_close(unit_exx_debug) @@ -907,6 +836,26 @@ end subroutine get_X_matrix ! ! To ensure thread safety, variables which are altered must be passed in as parameters rather than imported. ! TODO: Change name to something more descriptive + !!****f* exx_kernel_default/m_kern_exx_cri * + !! + !! NAME + !! cri_eri_inner_calculation + !! + !! PURPOSE + !! Deduplicate the inner calculations of m_kern_exx_cri and m_kern_exx_eri + !! + !! INPUTS + !! + !! AUTHOR + !! Connor Aird + !! + !! CREATION DATE + !! 2024/05/21 + !! + !! MODIFICATION HISTORY + !! + !! SOURCE + !! subroutine cri_eri_inner_calculation(nsf1_array, phi_i, Ome_kj, nsf1, nsf2, nsf_kg, dv, & multiplier, ncaddr, ncbeg, ia_nsup, ewald_charge, work_out_3d, work_in_3d, & c, backup_eris, store_eris_ptr) @@ -997,7 +946,7 @@ end subroutine cri_eri_inner_calculation !! subroutine m_kern_exx_cri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & ibpart, ibseq, bndim2, b, c, ahalo, chalo, at, mx_absb, mx_part, & - mx_iprim, lenb, lenc ) + lenb, lenc ) use numbers, only: zero, one use matrix_module, only: matrix_halo, matrix_trans @@ -1014,17 +963,10 @@ subroutine m_kern_exx_cri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & ! use exx_evalpao, only: exx_phi_on_grid ! - use exx_types, only: prim_atomic_data, neigh_atomic_data, & - tmr_std_exx_accumul, tmr_std_exx_matmult, tmr_std_exx_poisson, & - grid_spacing, r_int, extent, ewald_charge, ewald_rho, ewald_pot,& - pulay_radius, p_omega, p_ngauss, p_gauss, w_gauss, & - exx_psolver, exx_pscheme, & - unit_exx_debug, tmr_std_exx_nsup - ! - use exx_types, only: phi_i_1d_buffer, phi_j, phi_k, phi_l, & - Phy_k, Ome_kj_1d_buffer, & - work_in_3d, work_out_3d - use exx_types, only: exx_alloc + use exx_types, only: prim_atomic_data, neigh_atomic_data,tmr_std_exx_accumul, & + grid_spacing, r_int, extent, ewald_charge,p_ngauss,unit_exx_debug, & + tmr_std_exx_nsup,phi_i_1d_buffer, phi_j, phi_k, phi_l,Phy_k, & + Ome_kj_1d_buffer,work_in_3d,work_out_3d,exx_alloc ! use exx_memory, only: exx_mem_alloc use exx_poisson, only: exx_v_on_grid, exx_ewald_charge @@ -1036,7 +978,7 @@ subroutine m_kern_exx_cri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & ! Passed variables type(matrix_halo), intent(in) :: ahalo, chalo type(matrix_trans), intent(in) :: at - integer, intent(in) :: mx_absb, mx_part, mx_iprim, lenb, lenc + integer, intent(in) :: mx_absb, mx_part, lenb, lenc integer, intent(in) :: kpart, k_off real(double), intent(in) :: b(lenb) real(double), intent(inout) :: c(lenc) @@ -1048,21 +990,6 @@ subroutine m_kern_exx_cri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & integer(integ), intent(in) :: ibpart(mx_part*mx_absb) integer(integ), intent(in) :: ibseq(mx_part*mx_absb) integer(integ), intent(in) :: bndim2(mx_part*mx_absb) - -!!$ type(matrix_halo) :: ahalo, chalo -!!$ type(matrix_trans) :: at -!!$ integer :: mx_absb, mx_part, mx_iprim, lenb, lenc -!!$ integer :: kpart, k_off -!!$ real(double) :: b(lenb) -!!$ real(double) :: c(lenc) -!!$ ! -!!$ ! Remote indices -!!$ integer(integ) :: ib_nd_acc(mx_part) -!!$ integer(integ) :: ibaddr(mx_part) -!!$ integer(integ) :: nbnab(mx_part) -!!$ integer(integ) :: ibpart(mx_part*mx_absb) -!!$ integer(integ) :: ibseq(mx_part*mx_absb) -!!$ integer(integ) :: bndim2(mx_part*mx_absb) ! ! Local variables integer :: jbnab2ch(mx_absb) ! Automatic array @@ -1071,13 +998,11 @@ subroutine m_kern_exx_cri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & integer :: nb_nd_kbeg integer :: nd1, nd3 integer :: nbaddr, ncaddr - integer :: lbnab2ch(mx_absb) ! Automatic array integer :: l, lseq, lpart integer :: np, ni ! real(double), dimension(3) :: xyz_zero = zero real(double) :: dr,dv,K_val - real(double) :: exx_mat_elem ! ! ! We allocate pointers here to point at 1D arrays later and allow contiguous access when passing to BLAS dot later @@ -1088,17 +1013,12 @@ subroutine m_kern_exx_cri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & type(neigh_atomic_data) :: kg !k_gamma type(neigh_atomic_data) :: ld !l_delta ! - integer :: maxsuppfuncs, nsf_kg, nsf_ld, nsf_jb - integer :: r, s, t, count + integer :: maxsuppfuncs, nsf_kg, nsf_ld, nsf_jb, count ! ! dr = grid_spacing dv = dr**3 - !ewald_alpha = 0.5 maxsuppfuncs = maxval(nsf_species) - !range_ij = 0.5d0 - !range_kl = 0.5d0 - !unit_exx_debug1 = 333 ! count = 1 ! @@ -1115,28 +1035,20 @@ subroutine m_kern_exx_cri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & call get_halodat(kg,kg,k_in_part,ahalo%i_hbeg(ahalo%lab_hcover(kpart)), & ahalo%lab_hcell(kpart),'k',.true.,unit_exx_debug) ! - !print*, 'k',k, 'global_num',kg%global_num,'spe',kg%spec, kg%xyz - ! if ( exx_alloc ) call exx_mem_alloc(extent,kg%nsup,0,'phi_k','alloc') call exx_phi_on_grid(inode,kg%global_num,kg%spec,extent, & xyz_zero,kg%nsup,phi_k,r_int,xyz_zero) ! jbnab2ch = 0 - !print*, 'nbnab: ',nbnab(k_in_part),k_in_part do j = 1, nbnab(k_in_part) jpart = ibpart(nbkbeg+j-1) + k_off jseq = ibseq (nbkbeg+j-1) jbnab2ch(j) = chalo%i_halo(chalo%i_hbeg(jpart)+jseq-1) - !print*, 'jbnab2ch',j, jbnab2ch(j) end do ! nbbeg = nb_nd_kbeg ! - !print*, 'size jbnab2ch', size(jbnab2ch) - !print*, 'jbnab2ch', jbnab2ch - !print* - ! if ( exx_alloc ) call exx_mem_alloc(extent,kg%nsup,0,'Phy_k','alloc') ! Phy_k = zero @@ -1152,14 +1064,9 @@ subroutine m_kern_exx_cri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & BCS_parts%lab_cell(BCS_parts%inv_lab_cover(lpart)), & 'l',.true.,unit_exx_debug) ! - !write(*,*) 'l',l, 'global_num',ld%global_num,'spe',ld%spec - ! !!$ !!$ ****[ screening ]**** !!$ - !xyz_kl = kg%xyz - ld%xyz - !screen_kl = sqrt(dot_product(xyz_kl,xyz_kl)) - !if ( screen_kl < range_kl ) then ! if ( exx_alloc ) call exx_mem_alloc(extent,ld%nsup,0,'phi_l','alloc') ! @@ -1181,11 +1088,10 @@ subroutine m_kern_exx_cri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & end do end do ! - nbbeg = nbbeg + ld%nsup*kg%nsup !nd3 * nd2 + nbbeg = nbbeg + ld%nsup*kg%nsup ! if ( exx_alloc ) call exx_mem_alloc(extent,ld%nsup,0,'phi_l','dealloc') - !end if !( screen kl ) end do ! End of l = 1, nbnab(k_in_part) !!$ !!$ ****[ i loop ]**** @@ -1196,32 +1102,23 @@ subroutine m_kern_exx_cri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & ni = bundle%iprim_seq (i_in_prim) np = bundle%iprim_part(i_in_prim) icad = (i_in_prim - 1) * chalo%ni_in_halo !*** - !nbbeg = nb_nd_kbeg - ! - !print*, 'i_in_prim', i_in_prim, 'nd1', nd1, 'ni', ni, 'np', np ! call get_iprimdat(ia,kg,ni,i_in_prim,np,.true.,unit_exx_debug) if (ia%nsup/=ahalo%ndimi(i_in_prim)) call cq_abort('Error1: ',ia%nsup,ahalo%ndimi(i_in_prim)) ! - !print*, 'i',i, 'global_num',ia%ip,'spe',ia%spec - ! if ( exx_alloc ) call exx_mem_alloc(extent,ia%nsup,0,'phi_i_1d_buffer','alloc') phi_i(1:2*extent+1, 1:2*extent+1, 1:2*extent+1, 1:ia%nsup) => phi_i_1d_buffer ! call exx_phi_on_grid(inode,ia%ip,ia%spec,extent, & ia%xyz,ia%nsup,phi_i,r_int,xyz_zero) ! - !print*, size(chalo%i_h2d), shape(chalo%i_h2d) - ! !!$ !!$ ****[ j loop ]**** !!$ - do j = 1, nbnab(k_in_part)!mat(np,Xrange)%n_nab(ni) + do j = 1, nbnab(k_in_part) nbbeg = nb_nd_kbeg j_in_halo = jbnab2ch(j) !*** ! - !print*, j, icad, j_in_halo - ! if ( j_in_halo /= 0 ) then ! ncbeg = chalo%i_h2d(icad + j_in_halo) !*** @@ -1320,8 +1217,8 @@ end subroutine m_kern_exx_cri !! SOURCE !! subroutine m_kern_exx_eri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & - ibpart, ibseq, bndim2, b, c, ahalo, chalo, & - at, mx_absb, mx_part, mx_iprim, lenb, lenc, backup_eris ) + ibpart, ibseq, b, c, ahalo, chalo, & + at, mx_absb, mx_part, lenb, lenc, backup_eris ) use numbers, only: zero, one, pi use matrix_module, only: matrix_halo, matrix_trans @@ -1333,23 +1230,14 @@ subroutine m_kern_exx_eri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & use mult_module, only: return_matrix_value, S_X_SX use cover_module, only: BCS_parts ! - use gto_format_new, only: gto - ! use exx_evalpao, only: exx_phi_on_grid ! use exx_evalgto, only: exx_gto_on_grid_prim ! - use exx_types, only: prim_atomic_data, neigh_atomic_data, & - tmr_std_exx_accumul, tmr_std_exx_poisson, & - tmr_std_exx_poisson, grid_spacing, r_int, extent,& - ewald_charge, ewald_rho, ewald_pot, & - pulay_radius, p_omega, p_ngauss, p_gauss, w_gauss, & - exx_psolver,exx_pscheme, & - unit_exx_debug, unit_eri_debug, tmr_std_exx_nsup - ! - use exx_types, only: phi_i_1d_buffer, phi_j, phi_k, phi_l, & - Ome_kj_1d_buffer, work_in_3d, work_out_3d, eris - use exx_types, only: exx_alloc + use exx_types, only: prim_atomic_data,neigh_atomic_data,grid_spacing,r_int, & + extent,ewald_charge,p_ngauss,unit_exx_debug,tmr_std_exx_nsup,eris, & + phi_i_1d_buffer,phi_j,phi_k,phi_l,exx_alloc, & + Ome_kj_1d_buffer,work_in_3d,work_out_3d ! use exx_memory, only: exx_mem_alloc ! @@ -1364,7 +1252,7 @@ subroutine m_kern_exx_eri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & ! Passed variables type(matrix_halo), intent(in) :: ahalo, chalo type(matrix_trans), intent(in) :: at - integer, intent(in) :: mx_absb, mx_part, mx_iprim, lenb, lenc + integer, intent(in) :: mx_absb, mx_part, lenb, lenc integer, intent(in) :: kpart, k_off real(double), intent(in) :: b(lenb) real(double), intent(inout) :: c(lenc) @@ -1376,7 +1264,6 @@ subroutine m_kern_exx_eri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & integer(integ), intent(in) :: nbnab(mx_part) integer(integ), intent(in) :: ibpart(mx_part*mx_absb) integer(integ), intent(in) :: ibseq(mx_part*mx_absb) - integer(integ), intent(in) :: bndim2(mx_part*mx_absb) ! ! Local variables integer :: jbnab2ch(mx_absb) ! Automatic array @@ -1385,7 +1272,6 @@ subroutine m_kern_exx_eri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & integer :: nb_nd_kbeg integer :: nd1, nd3 integer :: nbaddr, ncaddr - integer :: lbnab2ch(mx_absb) ! Automatic array integer :: l, lseq, lpart integer :: np, ni ! @@ -1427,17 +1313,11 @@ subroutine m_kern_exx_eri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & call exx_phi_on_grid(inode,kg%global_num,kg%spec,extent, & kg%xyz,kg%nsup,phi_k,r_int,xyz_zero) ! - !xk = kg%xyz_cv(1) - !yk = kg%xyz_cv(2) - !zk = kg%xyz_cv(3) - ! jbnab2ch = 0 - !print*, 'nbnab: ',nbnab(k_in_part),k_in_part do j = 1, nbnab(k_in_part) jpart = ibpart(nbkbeg+j-1) + k_off jseq = ibseq (nbkbeg+j-1) jbnab2ch(j) = chalo%i_halo(chalo%i_hbeg(jpart)+jseq-1) - !print*, 'jbnab2ch',j, jbnab2ch(j) end do ! nbbeg = nb_nd_kbeg @@ -1453,8 +1333,6 @@ subroutine m_kern_exx_eri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & BCS_parts%lab_cell(BCS_parts%inv_lab_cover(lpart)), & 'l',.true.,unit_exx_debug) ! - !write(*,*) 'l',l, 'global_num',ld%global_num,'spe',ld%spec, 'nsup', ld%nsup - ! if ( exx_alloc ) call exx_mem_alloc(extent,ld%nsup,0,'phi_l','alloc') ! call exx_phi_on_grid(inode,ld%global_num,ld%spec,extent, & @@ -1480,7 +1358,7 @@ subroutine m_kern_exx_eri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & nd1 = ahalo%ndimi (i_in_prim) ni = bundle%iprim_seq (i_in_prim) np = bundle%iprim_part(i_in_prim) - icad = (i_in_prim - 1) * chalo%ni_in_halo !*** + icad = (i_in_prim - 1) * chalo%ni_in_halo ! call get_iprimdat(ia,kg,ni,i_in_prim,np,.true.,unit_exx_debug) ! @@ -1494,11 +1372,11 @@ subroutine m_kern_exx_eri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & !!$ ****[ j loop ]**** !!$ j_loop: do j = 1, nbnab(k_in_part) - j_in_halo = jbnab2ch(j) !*** + j_in_halo = jbnab2ch(j) ! if ( j_in_halo /= 0 ) then ! - ncbeg = chalo%i_h2d(icad + j_in_halo) !*** + ncbeg = chalo%i_h2d(icad + j_in_halo) ! if ( ncbeg /= 0 ) then jpart = ibpart(nbkbeg+j-1) + k_off @@ -1584,14 +1462,6 @@ subroutine m_kern_exx_eri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & ! end do k_loop ! -10 format(I8,X,2F16.10,X,A,2I4,A,2I4,A,4X,A,2I4,A,2I4,A,A,2A4,A,2A4,A,X,8F12.6) - - !write(*,*) 'PRINT' - !do i = 1, count - ! write(*,'(4X,I12,6F16.10)') i, pao_eris(i), pao_eris2(i), gto_eris(i), pao_eris(i) - gto_eris(i) - !end do - !write(*,'(4X,"sum",2F24.10)') sum( pao_eris ), sum(pao_eris2), sum( gto_eris ) - return end subroutine m_kern_exx_eri ! @@ -1618,8 +1488,8 @@ end subroutine m_kern_exx_eri !! SOURCE !! subroutine m_kern_exx_eri_gto(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & - ibpart, ibseq, bndim2, b, c, ahalo, chalo, & - at, mx_absb, mx_part, mx_iprim, lenb, lenc, backup_eris ) + ibpart, ibseq, b, c, ahalo, chalo, & + at, mx_absb, mx_part, lenb, lenc, backup_eris ) use numbers, only: zero, one, pi use matrix_module, only: matrix_halo, matrix_trans @@ -1631,25 +1501,12 @@ subroutine m_kern_exx_eri_gto(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & use mult_module, only: return_matrix_value, S_X_SX use cover_module, only: BCS_parts ! - use gto_format_new, only: gto - ! - use species_module, only: nsf_species - ! use exx_evalpao, only: exx_phi_on_grid ! use exx_evalgto, only: exx_gto_on_grid_prim ! - use exx_types, only: prim_atomic_data, neigh_atomic_data, & - tmr_std_exx_accumul, tmr_std_exx_poisson, & - tmr_std_exx_poisson, grid_spacing, r_int, extent,& - ewald_charge, ewald_rho, ewald_pot, eris, & - pulay_radius, p_omega, p_ngauss, p_gauss, w_gauss, & - exx_psolver,exx_pscheme, & - unit_exx_debug, unit_eri_debug, sum_eri_gto - ! - use exx_types, only: phi_i, phi_j, phi_k, phi_l, eris, & - work_in_3d, work_out_3d, exx_gto, exx_gto_poisson - use exx_types, only: exx_alloc + use exx_types, only: prim_atomic_data, neigh_atomic_data,eris, & + p_ngauss,unit_exx_debug, unit_eri_debug, sum_eri_gto ! use exx_memory, only: exx_mem_alloc ! @@ -1664,7 +1521,7 @@ subroutine m_kern_exx_eri_gto(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & ! Passed variables type(matrix_halo), intent(in) :: ahalo, chalo type(matrix_trans), intent(in) :: at - integer, intent(in) :: mx_absb, mx_part, mx_iprim, lenb, lenc + integer, intent(in) :: mx_absb, mx_part, lenb, lenc integer, intent(in) :: kpart, k_off real(double), intent(in) :: b(lenb) real(double), intent(inout) :: c(lenc) @@ -1676,7 +1533,6 @@ subroutine m_kern_exx_eri_gto(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & integer(integ), intent(in) :: nbnab(mx_part) integer(integ), intent(in) :: ibpart(mx_part*mx_absb) integer(integ), intent(in) :: ibseq(mx_part*mx_absb) - integer(integ), intent(in) :: bndim2(mx_part*mx_absb) ! ! Local variables integer :: jbnab2ch(mx_absb) ! Automatic array @@ -1685,11 +1541,9 @@ subroutine m_kern_exx_eri_gto(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & integer :: nb_nd_kbeg integer :: nd1, nd3 integer :: nbaddr, ncaddr - integer :: lbnab2ch(mx_absb) ! Automatic array integer :: l, lseq, lpart integer :: np, ni ! - real(double), dimension(3) :: xyz_zero = zero ! type(prim_atomic_data) :: ia ! i_alpha type(neigh_atomic_data) :: jb ! j_beta @@ -1699,27 +1553,13 @@ subroutine m_kern_exx_eri_gto(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & integer :: nsf_kg, nsf_ld, nsf_ia, nsf_jb ! ! GTO - integer :: i_nx, j_nx, k_nx, l_nx - integer :: i_ny, j_ny, k_ny, l_ny - integer :: i_nz, j_nz, k_nz, l_nz - integer :: i_n, j_n, k_n, l_n - character(len=8) :: i_nt, j_nt, k_nt, l_nt - integer :: ia_gto, jb_gto, kg_gto, ld_gto - real(double) :: ai, aj, ak, al, di, dj, dk, dl - real(double) :: i_norm, j_norm, k_norm, l_norm real(double) :: xi, xj, xk, xl real(double) :: yi, yj, yk, yl real(double) :: zi, zj, zk, zl - real(double) :: ci, cj, ck, cl - real(double) :: K_val - ! + real(double) :: K_val,eri_gto integer :: count - real(double) :: eri_gto, eri_pao, test - real(double) :: xyz_i_dummy(3), xyz_j_dummy(3), xyz_k_dummy(3), xyz_l_dummy(3) - - !TYPE(libint_t), DIMENSION(1) :: erieval ! count = 1 ! @@ -1736,39 +1576,29 @@ subroutine m_kern_exx_eri_gto(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & call get_halodat(kg,kg,k_in_part,ahalo%i_hbeg(ahalo%lab_hcover(kpart)), & ahalo%lab_hcell(kpart),'k',.true.,unit_exx_debug) ! - !print*, 'k',k, 'global_num',kg%global_num,'spe',kg%spec,'nsup', kg%nsup - ! xk = kg%xyz_cv(1) yk = kg%xyz_cv(2) zk = kg%xyz_cv(3) ! jbnab2ch = 0 - !print*, 'nbnab: ',nbnab(k_in_part),k_in_part do j = 1, nbnab(k_in_part) jpart = ibpart(nbkbeg+j-1) + k_off jseq = ibseq (nbkbeg+j-1) jbnab2ch(j) = chalo%i_halo(chalo%i_hbeg(jpart)+jseq-1) - !print*, 'jbnab2ch',j, jbnab2ch(j) end do ! nbbeg = nb_nd_kbeg ! - !print*, 'size jbnab2ch', size(jbnab2ch) - !print*, 'jbnab2ch', jbnab2ch - !print* !!$ !!$ ****[ l do loop ]**** !!$ l_loop: do l = 1, nbnab(k_in_part) - !l_in_halo = lbnab2ch(l) lpart = ibpart(nbkbeg+l-1) + k_off lseq = ibseq (nbkbeg+l-1) call get_halodat(ld,kg,lseq,chalo%i_hbeg(lpart), & BCS_parts%lab_cell(BCS_parts%inv_lab_cover(lpart)), & 'l',.true.,unit_exx_debug) ! - !write(*,*) 'l',l, 'global_num',ld%global_num,'spe',ld%spec, 'nsup', ld%nsup - ! xl = ld%xyz_cv(1) yl = ld%xyz_cv(2) zl = ld%xyz_cv(3) @@ -1779,11 +1609,7 @@ subroutine m_kern_exx_eri_gto(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & ! kg_loop: do nsf_kg = 1, kg%nsup ! - !if ( backup_eris ) then - ! K_val = real(1,double) - !else K_val = b(nbaddr+nsf_kg-1) - !end if !!$ !!$ ****[ i loop ]**** !!$ @@ -1792,13 +1618,10 @@ subroutine m_kern_exx_eri_gto(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & nd1 = ahalo%ndimi (i_in_prim) ni = bundle%iprim_seq (i_in_prim) np = bundle%iprim_part(i_in_prim) - icad = (i_in_prim - 1) * chalo%ni_in_halo !*** - !nbbeg = nb_nd_kbeg + icad = (i_in_prim - 1) * chalo%ni_in_halo ! call get_iprimdat(ia,kg,ni,i_in_prim,np,.true.,unit_exx_debug) ! - !print*, 'i',i, 'global_num',ia%ip,'spe',ia%spec, 'nsup', ia%nsup - ! xi = ia%xyz_ip(1) yi = ia%xyz_ip(2) zi = ia%xyz_ip(3) @@ -1807,13 +1630,11 @@ subroutine m_kern_exx_eri_gto(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & !!$ ****[ j loop ]**** !!$ j_loop: do j = 1, nbnab(k_in_part) - j_in_halo = jbnab2ch(j) !*** + j_in_halo = jbnab2ch(j) ! - !print*, j, icad, j_in_halo - ! if ( j_in_halo /= 0 ) then ! - ncbeg = chalo%i_h2d(icad + j_in_halo) !*** + ncbeg = chalo%i_h2d(icad + j_in_halo) ! if ( ncbeg /= 0 ) then jpart = ibpart(nbkbeg+j-1) + k_off @@ -1823,8 +1644,6 @@ subroutine m_kern_exx_eri_gto(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & BCS_parts%lab_cell(BCS_parts%inv_lab_cover(jpart)), & 'j',.true.,unit_exx_debug) ! - !print*, 'j',j, 'global_num',jb%global_num,'spe',jb%spec,'nsup', jb%nsup - ! xj = jb%xyz_cv(1) yj = jb%xyz_cv(2) zj = jb%xyz_cv(3) @@ -1842,32 +1661,12 @@ subroutine m_kern_exx_eri_gto(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & if ( abs(ia%xyz_ip(3)-kg%xyz_cv(3)) < ( ia%radi + kg%radi) & .and. abs(jb%xyz_cv(3)-ld%xyz_cv(3)) < ( jb%radi + ld%radi) ) then ! - !.and. abs(ia%xyz_ip(3)-ld%xyz_cv(3)) < ( ia%radi + ld%radi) & - !.and. abs(jb%xyz_cv(3)-kg%xyz_cv(3)) < ( jb%radi + kg%radi)) then - !print*, ia%radi + kg%radi, jb%radi + ld%radi - ! call compute_eri_hoh( nsf_ia, nsf_jb, nsf_kg, nsf_ld, & ia%spec, jb%spec, kg%spec, ld%spec, & ia%xyz_ip, jb%xyz_cv, kg%xyz_cv, ld%xyz_cv,& i_nt, j_nt, k_nt, l_nt,& eri_gto ) ! - ! xyz_i_dummy = 0.0d0 - ! xyz_i_dummy(3) = 10.0d0 - ! xyz_j_dummy = 0.0d0 - ! xyz_j_dummy(3) = 2.0d0 - ! xyz_k_dummy = 0.0d0 - ! xyz_k_dummy(3) = 5.0d0 - ! xyz_l_dummy = 0.0d0 - ! xyz_l_dummy(3) = 1.0d0 - ! - !call compute_eri_hoh( nsf_ia, nsf_jb, nsf_kg, nsf_ld, & - ! ia%spec, jb%spec, kg%spec, ld%spec, & - ! xyz_i_dummy, xyz_j_dummy, xyz_k_dummy, xyz_l_dummy,& - ! i_nt, j_nt, k_nt, l_nt,& - ! eri_gto ) - !print*, eri_gto - ! else ! eri_gto = 0.0d0 @@ -1877,18 +1676,8 @@ subroutine m_kern_exx_eri_gto(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & ! if (exx_debug) then - !if ( eri_gto > 1.0d-8 ) then - sum_eri_gto = sum_eri_gto + eri_gto - !call compute_eri_hoh( nsf_ia, nsf_jb, nsf_kg, nsf_ld, & - ! ia%spec, jb%spec, kg%spec, ld%spec, & - ! ia%xyz_ip, jb%xyz_cv, kg%xyz_cv, ld%xyz_cv,& - ! i_nt, j_nt, k_nt, l_nt,& - ! eri_gto ) - - - write(unit_eri_debug,10) count, eri_gto, & '[',ia%ip, kg%global_num,'|',ld%global_num, jb%global_num,']', & '(',nsf_ia,nsf_kg, '|',nsf_ld,nsf_jb, ')' , & @@ -1897,11 +1686,7 @@ subroutine m_kern_exx_eri_gto(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & ia%xyz_ip(3), kg%xyz_cv(3), ld%xyz_cv(3), jb%xyz_cv(3), & ia%xyz_ip(1), kg%xyz_cv(1), ld%xyz_cv(1), jb%xyz_cv(1),& ia%xyz_ip(2), kg%xyz_cv(2), ld%xyz_cv(2), jb%xyz_cv(2),& - !abs(ia%xyz_ip(3)-jb%xyz_cv(3)), abs(kg%xyz_cv(3)-ld%xyz_cv(3)), & - !abs(ia%xyz_ip(3)-kg%xyz_cv(3)), abs(ia%xyz_ip(3)-ld%xyz_cv(3)), & sum_eri_gto - !zi, zk, zl, zj, ai, ak, al, aj - !end if ! end if ! @@ -1988,8 +1773,8 @@ end subroutine m_kern_exx_eri_gto !! SOURCE !! subroutine m_kern_exx_dummy(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & - ibpart, ibseq, bndim2, b, c, ahalo, chalo, & - at, mx_absb, mx_part, mx_iprim, lenb, lenc, & + ibpart, ibseq, b, c, ahalo, chalo, & + at, mx_absb, mx_part, lenb, lenc, & count_eris, compute_exx, filter_eris) use global_module, only: iprint_exx @@ -2002,7 +1787,7 @@ subroutine m_kern_exx_dummy(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & use basic_types, only: primary_set use primary_module, only: bundle use matrix_data, only: Hrange, SXrange, Xrange, Srange - use mult_module, only: matK, return_matrix_value, S_X_SX + use mult_module, only: return_matrix_value, S_X_SX use cover_module, only: BCS_parts ! use species_module, only: nsf_species @@ -2011,13 +1796,11 @@ subroutine m_kern_exx_dummy(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & ! use exx_evalpao, only: exx_phi_on_grid use exx_types, only: reckernel_3d_filter - use exx_types, only: prim_atomic_data, neigh_atomic_data, & - grid_spacing, r_int, unit_eri_debug, unit_exx_debug, & - pulay_radius, p_omega, p_ngauss, p_gauss, w_gauss, & - exx_psolver, exx_pscheme - ! - use exx_types, only: eris, exx_filter_thr, exx_filter_extent - use exx_types, only: unit_eri_filter_debug + use exx_types, only: prim_atomic_data,neigh_atomic_data, & + grid_spacing,r_int,unit_exx_debug,pulay_radius,eris, & + p_omega,p_ngauss,p_gauss,w_gauss,exx_psolver, & + exx_pscheme,exx_filter_thr,exx_filter_extent, & + unit_eri_filter_debug use exx_module, only: get_halodat, get_iprimdat ! @@ -2028,7 +1811,7 @@ subroutine m_kern_exx_dummy(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & ! Passed variables type(matrix_halo), intent(in) :: ahalo, chalo type(matrix_trans), intent(in) :: at - integer, intent(in) :: mx_absb, mx_part, mx_iprim, lenb, lenc + integer, intent(in) :: mx_absb, mx_part, lenb, lenc integer, intent(in) :: kpart, k_off real(double), intent(in) :: b(lenb) real(double), intent(inout) :: c(lenc) @@ -2042,7 +1825,6 @@ subroutine m_kern_exx_dummy(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & integer(integ), intent(in) :: nbnab(mx_part) integer(integ), intent(in) :: ibpart(mx_part*mx_absb) integer(integ), intent(in) :: ibseq(mx_part*mx_absb) - integer(integ), intent(in) :: bndim2(mx_part*mx_absb) ! ! Local variables integer :: jbnab2ch(mx_absb) ! Automatic array @@ -2051,7 +1833,6 @@ subroutine m_kern_exx_dummy(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & integer :: nb_nd_kbeg integer :: nd1, nd3 integer :: nbaddr, ncaddr - integer :: lbnab2ch(mx_absb) ! Automatic array integer :: l, lseq, lpart integer :: np, ni ! @@ -2071,7 +1852,6 @@ subroutine m_kern_exx_dummy(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & ! real(double), dimension(:,:,:,:), allocatable :: phi_i_filter, phi_j_filter real(double), dimension(:,:,:,:), allocatable :: phi_k_filter, phi_l_filter - real(double), dimension(:,:,:,:,:), allocatable :: rho_ki_filter, vhf_lj_filter real(double), dimension(:,:,:), allocatable :: work_in, work_out ! dr = grid_spacing @@ -2124,21 +1904,16 @@ subroutine m_kern_exx_dummy(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & call get_halodat(kg,kg,k_in_part,ahalo%i_hbeg(ahalo%lab_hcover(kpart)), & ahalo%lab_hcell(kpart),'k',.true.,unit_exx_debug) ! - !print*, 'k',k, 'global_num',kg%global_num,'spe',kg%spec - ! if ( filter_eris ) then call exx_phi_on_grid(inode,kg%global_num,kg%spec,exx_filter_extent, & xyz_zero,maxsuppfuncs,phi_k_filter,r_int,xyz_zero) - !print*, maxval(abs(phi_k_filter)) end if jbnab2ch = 0 - !print*, 'nbnab: ',nbnab(k_in_part),k_in_part do j = 1, nbnab(k_in_part) jpart = ibpart(nbkbeg+j-1) + k_off jseq = ibseq (nbkbeg+j-1) jbnab2ch(j) = chalo%i_halo(chalo%i_hbeg(jpart)+jseq-1) - !print*, 'jbnab2ch',j, jbnab2ch(j) end do ! nbbeg = nb_nd_kbeg @@ -2147,15 +1922,12 @@ subroutine m_kern_exx_dummy(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & !!$ ****[ l do loop ]**** !!$ l_loop: do l = 1, nbnab(k_in_part) - !l_in_halo = lbnab2ch(l) lpart = ibpart(nbkbeg+l-1) + k_off lseq = ibseq (nbkbeg+l-1) call get_halodat(ld,kg,lseq,chalo%i_hbeg(lpart), & BCS_parts%lab_cell(BCS_parts%inv_lab_cover(lpart)), & 'l',.true.,unit_exx_debug) ! - !write(*,*) 'l',l, 'global_num',ld%global_num,'spe',ld%spec - ! if ( filter_eris ) then ! call exx_phi_on_grid(inode,ld%global_num,ld%spec,exx_filter_extent, & @@ -2163,7 +1935,6 @@ subroutine m_kern_exx_dummy(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & ! end if ! - ! count_ld = 0 ld_loop: do nsf_ld = 1, ld%nsup ! nbaddr = nbbeg + kg%nsup * (nsf_ld - 1) @@ -2181,12 +1952,9 @@ subroutine m_kern_exx_dummy(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & ni = bundle%iprim_seq (i_in_prim) np = bundle%iprim_part(i_in_prim) icad = (i_in_prim - 1) * chalo%ni_in_halo !*** - !nbbeg = nb_nd_kbeg ! call get_iprimdat(ia,kg,ni,i_in_prim,np,.true.,unit_exx_debug) ! - !print*, 'i',i, 'global_num',ia%ip,'spe',ia%spec - ! if ( filter_eris ) then call exx_phi_on_grid(inode,ia%ip,ia%spec,exx_filter_extent, & ia%xyz,maxsuppfuncs,phi_i_filter,r_int,xyz_zero) @@ -2196,11 +1964,8 @@ subroutine m_kern_exx_dummy(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & !!$ ****[ j loop ]**** !!$ j_loop: do j = 1, nbnab(k_in_part)!mat(np,Xrange)%n_nab(ni) - !nbbeg = nb_nd_kbeg j_in_halo = jbnab2ch(j) !*** ! - !print*, j, icad, j_in_halo - ! if ( j_in_halo /= 0 ) then ! ncbeg = chalo%i_h2d(icad + j_in_halo) !*** @@ -2238,7 +2003,6 @@ subroutine m_kern_exx_dummy(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & ! c(ncaddr + nsf_ia - 1) = c(ncaddr + nsf_ia - 1) + & eris(kpart)%store_eris(count_eris + 1) * K_val - !write(200,*) count_eris, eris(kpart)%store_eris(count_eris + 1), K_val ! else ! @@ -2277,8 +2041,6 @@ subroutine m_kern_exx_dummy(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & ! end if ! - !c(ncaddr + nsf_ia - 1) = c(ncaddr + nsf_ia - 1) + 0.0d0 - ! end if ! count_eris = count_eris + 1 @@ -2371,7 +2133,7 @@ subroutine plot1d_obj(obj_on_grid,extent,r_int,unit,filename) ! << Local variables >> real(double) :: h - integer :: i, j, k, ngrid + integer :: i, ngrid ngrid = 2*extent+1