49 use,
intrinsic :: iso_c_binding
57 real(kind=
rp),
allocatable :: g11(:,:,:,:)
59 real(kind=
rp),
allocatable :: g22(:,:,:,:)
61 real(kind=
rp),
allocatable :: g33(:,:,:,:)
63 real(kind=
rp),
allocatable :: g12(:,:,:,:)
65 real(kind=
rp),
allocatable :: g13(:,:,:,:)
67 real(kind=
rp),
allocatable :: g23(:,:,:,:)
69 real(kind=
rp),
allocatable :: mult(:,:,:,:)
74 real(kind=
rp),
allocatable :: dxdr(:,:,:,:), dydr(:,:,:,:), dzdr(:,:,:,:)
75 real(kind=
rp),
allocatable :: dxds(:,:,:,:), dyds(:,:,:,:), dzds(:,:,:,:)
76 real(kind=
rp),
allocatable :: dxdt(:,:,:,:), dydt(:,:,:,:), dzdt(:,:,:,:)
80 real(kind=
rp),
allocatable :: drdx(:,:,:,:), drdy(:,:,:,:), drdz(:,:,:,:)
81 real(kind=
rp),
allocatable :: dsdx(:,:,:,:), dsdy(:,:,:,:), dsdz(:,:,:,:)
82 real(kind=
rp),
allocatable :: dtdx(:,:,:,:), dtdy(:,:,:,:), dtdz(:,:,:,:)
84 real(kind=
rp),
allocatable :: h1(:,:,:,:)
85 real(kind=
rp),
allocatable :: h2(:,:,:,:)
88 real(kind=
rp),
allocatable :: jac(:,:,:,:)
89 real(kind=
rp),
allocatable :: jacinv(:,:,:,:)
90 real(kind=
rp),
allocatable :: b(:,:,:,:)
91 real(kind=
rp),
allocatable :: binv(:,:,:,:)
93 real(kind=
rp),
allocatable :: area(:,:,:,:)
94 real(kind=
rp),
allocatable :: nx(:,:,:,:)
95 real(kind=
rp),
allocatable :: ny(:,:,:,:)
96 real(kind=
rp),
allocatable :: nz(:,:,:,:)
99 real(kind=
rp) :: volume
104 type(
gs_t),
pointer :: gs_h=> null()
110 type(c_ptr) :: g11_d = c_null_ptr
111 type(c_ptr) :: g22_d = c_null_ptr
112 type(c_ptr) :: g33_d = c_null_ptr
113 type(c_ptr) :: g12_d = c_null_ptr
114 type(c_ptr) :: g13_d = c_null_ptr
115 type(c_ptr) :: g23_d = c_null_ptr
116 type(c_ptr) :: dxdr_d = c_null_ptr
117 type(c_ptr) :: dydr_d = c_null_ptr
118 type(c_ptr) :: dzdr_d = c_null_ptr
119 type(c_ptr) :: dxds_d = c_null_ptr
120 type(c_ptr) :: dyds_d = c_null_ptr
121 type(c_ptr) :: dzds_d = c_null_ptr
122 type(c_ptr) :: dxdt_d = c_null_ptr
123 type(c_ptr) :: dydt_d = c_null_ptr
124 type(c_ptr) :: dzdt_d = c_null_ptr
125 type(c_ptr) :: drdx_d = c_null_ptr
126 type(c_ptr) :: drdy_d = c_null_ptr
127 type(c_ptr) :: drdz_d = c_null_ptr
128 type(c_ptr) :: dsdx_d = c_null_ptr
129 type(c_ptr) :: dsdy_d = c_null_ptr
130 type(c_ptr) :: dsdz_d = c_null_ptr
131 type(c_ptr) :: dtdx_d = c_null_ptr
132 type(c_ptr) :: dtdy_d = c_null_ptr
133 type(c_ptr) :: dtdz_d = c_null_ptr
134 type(c_ptr) :: mult_d = c_null_ptr
135 type(c_ptr) :: h1_d = c_null_ptr
136 type(c_ptr) :: h2_d = c_null_ptr
137 type(c_ptr) :: jac_d = c_null_ptr
138 type(c_ptr) :: jacinv_d = c_null_ptr
139 type(c_ptr) :: b_d = c_null_ptr
140 type(c_ptr) :: binv_d = c_null_ptr
141 type(c_ptr) :: area_d = c_null_ptr
142 type(c_ptr) :: nx_d = c_null_ptr
143 type(c_ptr) :: ny_d = c_null_ptr
144 type(c_ptr) :: nz_d = c_null_ptr
153 generic :: init => init_empty, init_all
160 class(
coef_t),
intent(inout) :: this
161 type(
space_t),
intent(inout),
target :: Xh
162 type(
mesh_t),
intent(inout),
target :: msh
168 allocate(this%drdx(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
169 allocate(this%dsdx(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
170 allocate(this%dtdx(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
172 allocate(this%drdy(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
173 allocate(this%dsdy(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
174 allocate(this%dtdy(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
176 allocate(this%drdz(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
177 allocate(this%dsdz(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
178 allocate(this%dtdz(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
185 n = this%Xh%lx * this%Xh%ly * this%Xh%lz * this%msh%nelv
206 class(
coef_t),
intent(inout) :: this
207 type(
gs_t),
intent(inout),
target :: gs_h
211 this%msh => gs_h%dofmap%msh
212 this%Xh => gs_h%dofmap%Xh
213 this%dof => gs_h%dofmap
220 allocate(this%G11(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
221 allocate(this%G22(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
222 allocate(this%G33(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
223 allocate(this%G12(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
224 allocate(this%G13(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
225 allocate(this%G23(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
227 allocate(this%dxdr(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
228 allocate(this%dxds(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
229 allocate(this%dxdt(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
231 allocate(this%dydr(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
232 allocate(this%dyds(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
233 allocate(this%dydt(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
235 allocate(this%dzdr(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
236 allocate(this%dzds(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
237 allocate(this%dzdt(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
239 allocate(this%drdx(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
240 allocate(this%dsdx(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
241 allocate(this%dtdx(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
243 allocate(this%drdy(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
244 allocate(this%dsdy(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
245 allocate(this%dtdy(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
247 allocate(this%drdz(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
248 allocate(this%dsdz(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
249 allocate(this%dtdz(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
251 allocate(this%jac(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
252 allocate(this%jacinv(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
254 allocate(this%area(this%Xh%lx, this%Xh%ly, 6, this%msh%nelv))
255 allocate(this%nx(this%Xh%lx, this%Xh%ly, 6, this%msh%nelv))
256 allocate(this%ny(this%Xh%lx, this%Xh%ly, 6, this%msh%nelv))
257 allocate(this%nz(this%Xh%lx, this%Xh%ly, 6, this%msh%nelv))
259 allocate(this%B(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
260 allocate(this%Binv(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
262 allocate(this%h1(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
263 allocate(this%h2(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
265 allocate(this%mult(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
272 n = this%Xh%lx * this%Xh%ly * this%Xh%lz * this%msh%nelv
310 call device_map(this%jacinv, this%jacinv_d, n)
314 m = this%Xh%lx * this%Xh%ly * 6 * this%msh%nelv
354 call rone(this%mult, n)
371 class(
coef_t),
intent(inout) :: this
373 if (
allocated(this%G11))
then
377 if (
allocated(this%G22))
then
381 if (
allocated(this%G33))
then
385 if (
allocated(this%G12))
then
389 if (
allocated(this%G13))
then
393 if (
allocated(this%G23))
then
397 if (
allocated(this%mult))
then
398 deallocate(this%mult)
401 if (
allocated(this%B))
then
405 if (
allocated(this%Binv))
then
406 deallocate(this%Binv)
409 if(
allocated(this%dxdr))
then
410 deallocate(this%dxdr)
413 if(
allocated(this%dxds))
then
414 deallocate(this%dxds)
417 if(
allocated(this%dxdt))
then
418 deallocate(this%dxdt)
421 if(
allocated(this%dydr))
then
422 deallocate(this%dydr)
425 if(
allocated(this%dyds))
then
426 deallocate(this%dyds)
429 if(
allocated(this%dydt))
then
430 deallocate(this%dydt)
433 if(
allocated(this%dzdr))
then
434 deallocate(this%dzdr)
437 if(
allocated(this%dzds))
then
438 deallocate(this%dzds)
441 if(
allocated(this%dzdt))
then
442 deallocate(this%dzdt)
445 if(
allocated(this%drdx))
then
446 deallocate(this%drdx)
449 if(
allocated(this%dsdx))
then
450 deallocate(this%dsdx)
453 if(
allocated(this%dtdx))
then
454 deallocate(this%dtdx)
457 if(
allocated(this%drdy))
then
458 deallocate(this%drdy)
461 if(
allocated(this%dsdy))
then
462 deallocate(this%dsdy)
465 if(
allocated(this%dtdy))
then
466 deallocate(this%dtdy)
469 if(
allocated(this%drdz))
then
470 deallocate(this%drdz)
473 if(
allocated(this%dsdz))
then
474 deallocate(this%dsdz)
477 if(
allocated(this%dtdz))
then
478 deallocate(this%dtdz)
481 if(
allocated(this%jac))
then
485 if(
allocated(this%jacinv))
then
486 deallocate(this%jacinv)
489 if(
allocated(this%h1))
then
493 if(
allocated(this%h2))
then
497 if (
allocated(this%area))
then
498 deallocate(this%area)
501 if (
allocated(this%nx))
then
505 if (
allocated(this%ny))
then
509 if (
allocated(this%nz))
then
522 if (c_associated(this%G11_d))
then
526 if (c_associated(this%G22_d))
then
530 if (c_associated(this%G33_d))
then
534 if (c_associated(this%G12_d))
then
538 if (c_associated(this%G13_d))
then
542 if (c_associated(this%G23_d))
then
546 if (c_associated(this%dxdr_d))
then
550 if (c_associated(this%dydr_d))
then
554 if (c_associated(this%dzdr_d))
then
558 if (c_associated(this%dxds_d))
then
562 if (c_associated(this%dyds_d))
then
566 if (c_associated(this%dzds_d))
then
570 if (c_associated(this%dxdt_d))
then
574 if (c_associated(this%dydt_d))
then
578 if (c_associated(this%dzdt_d))
then
582 if (c_associated(this%drdx_d))
then
586 if (c_associated(this%drdy_d))
then
590 if (c_associated(this%drdz_d))
then
594 if (c_associated(this%dsdx_d))
then
598 if (c_associated(this%dsdy_d))
then
602 if (c_associated(this%dsdz_d))
then
606 if (c_associated(this%dtdx_d))
then
610 if (c_associated(this%dtdy_d))
then
614 if (c_associated(this%dtdz_d))
then
618 if (c_associated(this%mult_d))
then
622 if (c_associated(this%h1_d))
then
626 if (c_associated(this%h2_d))
then
630 if (c_associated(this%jac_d))
then
634 if (c_associated(this%jacinv_d))
then
638 if (c_associated(this%B_d))
then
642 if (c_associated(this%Binv_d))
then
646 if (c_associated(this%area_d))
then
650 if (c_associated(this%nx_d))
then
654 if (c_associated(this%ny_d))
then
658 if (c_associated(this%nz_d))
then
666 type(
coef_t),
intent(inout) :: c
667 integer :: e,i,lxy,lyz,ntot
673 associate(drdx => c%drdx, drdy => c%drdy, drdz => c%drdz, &
674 dsdx => c%dsdx, dsdy => c%dsdy, dsdz => c%dsdz, &
675 dtdx => c%dtdx, dtdy => c%dtdy, dtdz => c%dtdz, &
676 dxdr => c%dxdr, dydr => c%dydr, dzdr => c%dzdr, &
677 dxds => c%dxds, dyds => c%dyds, dzds => c%dzds, &
678 dxdt => c%dxdt, dydt => c%dydt, dzdt => c%dzdt, &
679 dx => c%Xh%dx, dy => c%Xh%dy, dz => c%Xh%dz, &
680 x => c%dof%x, y => c%dof%y, z => c%dof%z, &
681 lx => c%Xh%lx, ly => c%Xh%ly, lz => c%Xh%lz, &
682 dyt => c%Xh%dyt, dzt => c%Xh%dzt, &
683 jacinv => c%jacinv, jac => c%jac)
688 c%dsdx_d, c%dsdy_d, c%dsdz_d, c%dtdx_d, c%dtdy_d, c%dtdz_d, &
689 c%dxdr_d, c%dydr_d, c%dzdr_d, c%dxds_d, c%dyds_d, c%dzds_d, &
690 c%dxdt_d, c%dydt_d, c%dzdt_d, c%Xh%dx_d, c%Xh%dy_d, c%Xh%dz_d, &
691 c%dof%x_d, c%dof%y_d, c%dof%z_d, c%jacinv_d, c%jac_d, &
718 call mxm(dx, lx, x(1,1,1,e), lx, dxdr(1,1,1,e), lyz)
719 call mxm(dx, lx, y(1,1,1,e), lx, dydr(1,1,1,e), lyz)
720 call mxm(dx, lx, z(1,1,1,e), lx, dzdr(1,1,1,e), lyz)
723 call mxm(x(1,1,i,e), lx, dyt, ly, dxds(1,1,i,e), ly)
724 call mxm(y(1,1,i,e), lx, dyt, ly, dyds(1,1,i,e), ly)
725 call mxm(z(1,1,i,e), lx, dyt, ly, dzds(1,1,i,e), ly)
729 if(c%msh%gdim .eq. 3)
then
730 call mxm(x(1,1,1,e), lxy, dzt, lz, dxdt(1,1,1,e), lz)
731 call mxm(y(1,1,1,e), lxy, dzt, lz, dydt(1,1,1,e), lz)
732 call mxm(z(1,1,1,e), lxy, dzt, lz, dzdt(1,1,1,e), lz)
734 call rzero(dxdt(1,1,1,e), lxy)
735 call rzero(dydt(1,1,1,e), lxy)
736 call rone(dzdt(1,1,1,e), lxy)
740 if (c%msh%gdim .eq. 2)
then
741 call rzero (jac, ntot)
742 call addcol3 (jac, dxdr, dyds, ntot)
743 call subcol3 (jac, dxds, dydr, ntot)
744 call copy (drdx, dyds, ntot)
745 call copy (drdy, dxds, ntot)
747 call copy (dsdx, dydr, ntot)
749 call copy (dsdy, dxdr, ntot)
750 call rzero (drdz, ntot)
751 call rzero (dsdz, ntot)
752 call rone (dtdz, ntot)
756 c%jac(i, 1, 1, 1) = 0.0_rp
760 c%jac(i, 1, 1, 1) = c%jac(i, 1, 1, 1) + ( c%dxdr(i, 1, 1, 1) &
761 * c%dyds(i, 1, 1, 1) * c%dzdt(i, 1, 1, 1) )
763 c%jac(i, 1, 1, 1) = c%jac(i, 1, 1, 1) + ( c%dxdt(i, 1, 1, 1) &
764 * c%dydr(i, 1, 1, 1) * c%dzds(i, 1, 1, 1) )
766 c%jac(i, 1, 1, 1) = c%jac(i, 1, 1, 1) + ( c%dxds(i, 1, 1, 1) &
767 * c%dydt(i, 1, 1, 1) * c%dzdr(i, 1, 1, 1) )
771 c%jac(i, 1, 1, 1) = c%jac(i, 1, 1, 1) - ( c%dxdr(i, 1, 1, 1) &
772 * c%dydt(i, 1, 1, 1) * c%dzds(i, 1, 1, 1) )
774 c%jac(i, 1, 1, 1) = c%jac(i, 1, 1, 1) - ( c%dxds(i, 1, 1, 1) &
775 * c%dydr(i, 1, 1, 1) * c%dzdt(i, 1, 1, 1) )
777 c%jac(i, 1, 1, 1) = c%jac(i, 1, 1, 1) - ( c%dxdt(i, 1, 1, 1) &
778 * c%dyds(i, 1, 1, 1) * c%dzdr(i, 1, 1, 1) )
782 c%drdx(i, 1, 1, 1) = c%dyds(i, 1, 1, 1) * c%dzdt(i, 1, 1, 1) &
783 - c%dydt(i, 1, 1, 1) * c%dzds(i, 1, 1, 1)
785 c%drdy(i, 1, 1, 1) = c%dxdt(i, 1, 1, 1) * c%dzds(i, 1, 1, 1) &
786 - c%dxds(i, 1, 1, 1) * c%dzdt(i, 1, 1, 1)
788 c%drdz(i, 1, 1, 1) = c%dxds(i, 1, 1, 1) * c%dydt(i, 1, 1, 1) &
789 - c%dxdt(i, 1, 1, 1) * c%dyds(i, 1, 1, 1)
793 c%dsdx(i, 1, 1, 1) = c%dydt(i, 1, 1, 1) * c%dzdr(i, 1, 1, 1) &
794 - c%dydr(i, 1, 1, 1) * c%dzdt(i, 1, 1, 1)
796 c%dsdy(i, 1, 1, 1) = c%dxdr(i, 1, 1, 1) * c%dzdt(i, 1, 1, 1) &
797 - c%dxdt(i, 1, 1, 1) * c%dzdr(i, 1, 1, 1)
799 c%dsdz(i, 1, 1, 1) = c%dxdt(i, 1, 1, 1) * c%dydr(i, 1, 1, 1) &
800 - c%dxdr(i, 1, 1, 1) * c%dydt(i, 1, 1, 1)
804 c%dtdx(i, 1, 1, 1) = c%dydr(i, 1, 1, 1) * c%dzds(i, 1, 1, 1) &
805 - c%dyds(i, 1, 1, 1) * c%dzdr(i, 1, 1, 1)
807 c%dtdy(i, 1, 1, 1) = c%dxds(i, 1, 1, 1) * c%dzdr(i, 1, 1, 1) &
808 - c%dxdr(i, 1, 1, 1) * c%dzds(i, 1, 1, 1)
810 c%dtdz(i, 1, 1, 1) = c%dxdr(i, 1, 1, 1) * c%dyds(i, 1, 1, 1) &
811 - c%dxds(i, 1, 1, 1) * c%dydr(i, 1, 1, 1)
815 call invers2(jacinv, jac, ntot)
824 type(
coef_t),
intent(inout) :: c
825 integer :: e, i, lxyz, ntot
827 lxyz = c%Xh%lx * c%Xh%ly * c%Xh%lz
830 if (neko_bcknd_device .eq. 1)
then
832 call device_coef_generate_geo(c%G11_d, c%G12_d, c%G13_d, &
833 c%G22_d, c%G23_d, c%G33_d, &
834 c%drdx_d, c%drdy_d, c%drdz_d, &
835 c%dsdx_d, c%dsdy_d, c%dsdz_d, &
836 c%dtdx_d, c%dtdy_d, c%dtdz_d, &
837 c%jacinv_d, c%Xh%w3_d, c%msh%nelv, &
840 call device_memcpy(c%G11, c%G11_d, ntot, device_to_host, sync=.false.)
841 call device_memcpy(c%G22, c%G22_d, ntot, device_to_host, sync=.false.)
842 call device_memcpy(c%G33, c%G33_d, ntot, device_to_host, sync=.false.)
843 call device_memcpy(c%G12, c%G12_d, ntot, device_to_host, sync=.false.)
844 call device_memcpy(c%G13, c%G13_d, ntot, device_to_host, sync=.false.)
845 call device_memcpy(c%G23, c%G23_d, ntot, device_to_host, sync=.true.)
848 if(c%msh%gdim .eq. 2)
then
851 c%G11(i, 1, 1, 1) = c%drdx(i, 1, 1, 1) * c%drdx(i, 1, 1, 1) &
852 + c%drdy(i, 1, 1, 1) * c%drdy(i, 1, 1, 1)
854 c%G22(i, 1, 1, 1) = c%dsdx(i, 1, 1, 1) * c%dsdx(i, 1, 1, 1) &
855 + c%dsdy(i, 1, 1, 1) * c%dsdy(i, 1, 1, 1)
857 c%G12(i, 1, 1, 1) = c%drdx(i, 1, 1, 1) * c%dsdx(i, 1, 1, 1) &
858 + c%drdy(i, 1, 1, 1) * c%dsdy(i, 1, 1, 1)
862 c%G11(i, 1, 1, 1) = c%G11(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
863 c%G22(i, 1, 1, 1) = c%G22(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
864 c%G12(i, 1, 1, 1) = c%G12(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
865 c%G33(i, 1, 1, 1) = 0.0_rp
866 c%G13(i, 1, 1, 1) = 0.0_rp
867 c%G23(i, 1, 1, 1) = 0.0_rp
870 do concurrent(e = 1:c%msh%nelv)
871 do concurrent(i = 1:lxyz)
872 c%G11(i,1,1,e) = c%G11(i,1,1,e) * c%Xh%w3(i,1,1)
873 c%G22(i,1,1,e) = c%G22(i,1,1,e) * c%Xh%w3(i,1,1)
874 c%G12(i,1,1,e) = c%G12(i,1,1,e) * c%Xh%w3(i,1,1)
881 c%G11(i, 1, 1, 1) = c%drdx(i, 1, 1, 1) * c%drdx(i, 1, 1, 1) &
882 + c%drdy(i, 1, 1, 1) * c%drdy(i, 1, 1, 1) &
883 + c%drdz(i, 1, 1, 1) * c%drdz(i, 1, 1, 1)
885 c%G22(i, 1, 1, 1) = c%dsdx(i, 1, 1, 1) * c%dsdx(i, 1, 1, 1) &
886 + c%dsdy(i, 1, 1, 1) * c%dsdy(i, 1, 1, 1) &
887 + c%dsdz(i, 1, 1, 1) * c%dsdz(i, 1, 1, 1)
889 c%G33(i, 1, 1, 1) = c%dtdx(i, 1, 1, 1) * c%dtdx(i, 1, 1, 1) &
890 + c%dtdy(i, 1, 1, 1) * c%dtdy(i, 1, 1, 1) &
891 + c%dtdz(i, 1, 1, 1) * c%dtdz(i, 1, 1, 1)
895 c%G11(i, 1, 1, 1) = c%G11(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
896 c%G22(i, 1, 1, 1) = c%G22(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
897 c%G33(i, 1, 1, 1) = c%G33(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
901 c%G12(i, 1, 1, 1) = c%drdx(i, 1, 1, 1) * c%dsdx(i, 1, 1, 1) &
902 + c%drdy(i, 1, 1, 1) * c%dsdy(i, 1, 1, 1) &
903 + c%drdz(i, 1, 1, 1) * c%dsdz(i, 1, 1, 1)
905 c%G13(i, 1, 1, 1) = c%drdx(i, 1, 1, 1) * c%dtdx(i, 1, 1, 1) &
906 + c%drdy(i, 1, 1, 1) * c%dtdy(i, 1, 1, 1) &
907 + c%drdz(i, 1, 1, 1) * c%dtdz(i, 1, 1, 1)
909 c%G23(i, 1, 1, 1) = c%dsdx(i, 1, 1, 1) * c%dtdx(i, 1, 1, 1) &
910 + c%dsdy(i, 1, 1, 1) * c%dtdy(i, 1, 1, 1) &
911 + c%dsdz(i, 1, 1, 1) * c%dtdz(i, 1, 1, 1)
915 c%G12(i, 1, 1, 1) = c%G12(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
916 c%G13(i, 1, 1, 1) = c%G13(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
917 c%G23(i, 1, 1, 1) = c%G23(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
920 do concurrent(e = 1:c%msh%nelv)
921 do concurrent(i = 1:lxyz)
922 c%G11(i,1,1,e) = c%G11(i,1,1,e) * c%Xh%w3(i,1,1)
923 c%G22(i,1,1,e) = c%G22(i,1,1,e) * c%Xh%w3(i,1,1)
924 c%G12(i,1,1,e) = c%G12(i,1,1,e) * c%Xh%w3(i,1,1)
926 c%G33(i,1,1,e) = c%G33(i,1,1,e) * c%Xh%w3(i,1,1)
927 c%G13(i,1,1,e) = c%G13(i,1,1,e) * c%Xh%w3(i,1,1)
928 c%G23(i,1,1,e) = c%G23(i,1,1,e) * c%Xh%w3(i,1,1)
940 type(
coef_t),
intent(inout) :: c
941 integer :: e, i, lxyz, ntot
943 lxyz = c%Xh%lx * c%Xh%ly * c%Xh%lz
947 do concurrent(e = 1:c%msh%nelv)
949 do concurrent(i = 1:lxyz)
950 c%B(i,1,1,e) = c%jac(i,1,1,e) * c%Xh%w3(i,1,1)
951 c%Binv(i,1,1,e) = c%B(i,1,1,e)
955 if (neko_bcknd_device .eq. 1)
then
956 call device_memcpy(c%B, c%B_d, ntot, host_to_device, sync=.false.)
957 call device_memcpy(c%Binv, c%Binv_d, ntot, host_to_device, sync=.false.)
960 call c%gs_h%op(c%Binv, ntot, gs_op_add)
962 if (neko_bcknd_device .eq. 1)
then
963 call device_invcol1(c%Binv_d, ntot)
964 call device_memcpy(c%Binv, c%Binv_d, ntot, device_to_host, sync=.true.)
966 call invcol1(c%Binv, ntot)
970 if (neko_bcknd_device .eq. 1)
then
971 c%volume = device_glsum(c%B_d, ntot)
973 c%volume = glsum(c%B, ntot)
979 class(
coef_t),
intent(in) :: this
980 integer,
intent(in) :: i, j, k, e, facet
981 real(kind=rp) :: normal(3)
985 normal(1) = this%nx(j, k, facet, e)
986 normal(2) = this%ny(j, k, facet, e)
987 normal(3) = this%nz(j, k, facet, e)
989 normal(1) = this%nx(i, k, facet, e)
990 normal(2) = this%ny(i, k, facet, e)
991 normal(3) = this%nz(i, k, facet, e)
993 normal(1) = this%nx(i, j, facet, e)
994 normal(2) = this%ny(i, j, facet, e)
995 normal(3) = this%nz(i, j, facet, e)
1000 class(
coef_t),
intent(in) :: this
1001 integer,
intent(in) :: i, j, k, e, facet
1002 real(kind=rp) :: area
1006 area = this%area(j, k, facet, e)
1008 area = this%area(i, k, facet, e)
1010 area = this%area(i, j, facet, e)
1017 type(
coef_t),
intent(inout) :: coef
1018 real(kind=rp),
allocatable :: a(:,:,:,:)
1019 real(kind=rp),
allocatable :: b(:,:,:,:)
1020 real(kind=rp),
allocatable :: c(:,:,:,:)
1021 real(kind=rp),
allocatable :: dot(:,:,:,:)
1022 integer :: n, e, i, j, k, lx
1023 real(kind=rp) :: weight, len
1027 allocate(a(coef%Xh%lx, coef%Xh%lx, coef%Xh%lx, coef%msh%nelv))
1028 allocate(b(coef%Xh%lx, coef%Xh%lx, coef%Xh%lx, coef%msh%nelv))
1029 allocate(c(coef%Xh%lx, coef%Xh%lx, coef%Xh%lx, coef%msh%nelv))
1030 allocate(dot(coef%Xh%lx, coef%Xh%lx, coef%Xh%lx, coef%msh%nelv))
1034 a(i, 1, 1, 1) = coef%dyds(i, 1, 1, 1) * coef%dzdt(i, 1, 1, 1) &
1035 - coef%dzds(i, 1, 1, 1) * coef%dydt(i, 1, 1, 1)
1037 b(i, 1, 1, 1) = coef%dzds(i, 1, 1, 1) * coef%dxdt(i, 1, 1, 1) &
1038 - coef%dxds(i, 1, 1, 1) * coef%dzdt(i, 1, 1, 1)
1040 c(i, 1, 1, 1) = coef%dxds(i, 1, 1, 1) * coef%dydt(i, 1, 1, 1) &
1041 - coef%dyds(i, 1, 1, 1) * coef%dxdt(i, 1, 1, 1)
1045 dot(i, 1, 1, 1) = a(i, 1, 1, 1) * a(i, 1, 1, 1) &
1046 + b(i, 1, 1, 1) * b(i, 1, 1, 1) &
1047 + c(i, 1, 1, 1) * c(i, 1, 1, 1)
1050 do concurrent(e = 1:coef%msh%nelv)
1051 do concurrent(k = 1:coef%Xh%lx)
1052 do concurrent(j = 1:coef%Xh%lx)
1053 weight = coef%Xh%wy(j) * coef%Xh%wz(k)
1054 coef%area(j, k, 2, e) = sqrt(dot(lx, j, k, e)) * weight
1055 coef%area(j, k, 1, e) = sqrt(dot(1, j, k, e)) * weight
1056 coef%nx(j,k, 1, e) = -a(1, j, k, e)
1057 coef%nx(j,k, 2, e) = a(lx, j, k, e)
1058 coef%ny(j,k, 1, e) = -b(1, j, k, e)
1059 coef%ny(j,k, 2, e) = b(lx, j, k, e)
1060 coef%nz(j,k, 1, e) = -c(1, j, k, e)
1061 coef%nz(j,k, 2, e) = c(lx, j, k, e)
1068 a(i, 1, 1, 1) = coef%dydr(i, 1, 1, 1) * coef%dzdt(i, 1, 1, 1) &
1069 - coef%dzdr(i, 1, 1, 1) * coef%dydt(i, 1, 1, 1)
1071 b(i, 1, 1, 1) = coef%dzdr(i, 1, 1, 1) * coef%dxdt(i, 1, 1, 1) &
1072 - coef%dxdr(i, 1, 1, 1) * coef%dzdt(i, 1, 1, 1)
1074 c(i, 1, 1, 1) = coef%dxdr(i, 1, 1, 1) * coef%dydt(i, 1, 1, 1) &
1075 - coef%dydr(i, 1, 1, 1) * coef%dxdt(i, 1, 1, 1)
1079 dot(i, 1, 1, 1) = a(i, 1, 1, 1) * a(i, 1, 1, 1) &
1080 + b(i, 1, 1, 1) * b(i, 1, 1, 1) &
1081 + c(i, 1, 1, 1) * c(i, 1, 1, 1)
1084 do concurrent(e = 1:coef%msh%nelv)
1085 do concurrent(k = 1:coef%Xh%lx)
1086 do concurrent(j = 1:coef%Xh%lx)
1087 weight = coef%Xh%wx(j) * coef%Xh%wz(k)
1088 coef%area(j, k, 3, e) = sqrt(dot(j, 1, k, e)) * weight
1089 coef%area(j, k, 4, e) = sqrt(dot(j, lx, k, e)) * weight
1090 coef%nx(j,k, 3, e) = a(j, 1, k, e)
1091 coef%nx(j,k, 4, e) = -a(j, lx, k, e)
1092 coef%ny(j,k, 3, e) = b(j, 1, k, e)
1093 coef%ny(j,k, 4, e) = -b(j, lx, k, e)
1094 coef%nz(j,k, 3, e) = c(j, 1, k, e)
1095 coef%nz(j,k, 4, e) = -c(j, lx, k, e)
1102 a(i, 1, 1, 1) = coef%dydr(i, 1, 1, 1) * coef%dzds(i, 1, 1, 1) &
1103 - coef%dzdr(i, 1, 1, 1) * coef%dyds(i, 1, 1, 1)
1105 b(i, 1, 1, 1) = coef%dzdr(i, 1, 1, 1) * coef%dxds(i, 1, 1, 1) &
1106 - coef%dxdr(i, 1, 1, 1) * coef%dzds(i, 1, 1, 1)
1108 c(i, 1, 1, 1) = coef%dxdr(i, 1, 1, 1) * coef%dyds(i, 1, 1, 1) &
1109 - coef%dydr(i, 1, 1, 1) * coef%dxds(i, 1, 1, 1)
1113 dot(i, 1, 1, 1) = a(i, 1, 1, 1) * a(i, 1, 1, 1) &
1114 + b(i, 1, 1, 1) * b(i, 1, 1, 1) &
1115 + c(i, 1, 1, 1) * c(i, 1, 1, 1)
1118 do concurrent(e = 1:coef%msh%nelv)
1119 do concurrent(k = 1:coef%Xh%lx)
1120 do concurrent(j = 1:coef%Xh%lx)
1121 weight = coef%Xh%wx(j) * coef%Xh%wy(k)
1122 coef%area(j, k, 5, e) = sqrt(dot(j, k, 1, e)) * weight
1123 coef%area(j, k, 6, e) = sqrt(dot(j, k, lx, e)) * weight
1124 coef%nx(j,k, 5, e) = -a(j, k, 1, e)
1125 coef%nx(j,k, 6, e) = a(j, k, lx, e)
1126 coef%ny(j,k, 5, e) = -b(j, k, 1, e)
1127 coef%ny(j,k, 6, e) = b(j, k, lx, e)
1128 coef%nz(j,k, 5, e) = -c(j, k, 1, e)
1129 coef%nz(j,k, 6, e) = c(j, k, lx, e)
1135 do j = 1,
size(coef%nz)
1136 len = sqrt(coef%nx(j,1,1,1)**2 + &
1137 coef%ny(j,1,1,1)**2 + coef%nz(j,1,1,1)**2)
1138 if (len .gt. neko_eps)
then
1139 coef%nx(j,1,1,1) = coef%nx(j,1,1,1) / len
1140 coef%ny(j,1,1,1) = coef%ny(j,1,1,1) / len
1141 coef%nz(j,1,1,1) = coef%nz(j,1,1,1) / len
1150 if (neko_bcknd_device .eq. 1)
then
1152 call device_memcpy(coef%area, coef%area_d, n, &
1153 host_to_device, sync=.false.)
1154 call device_memcpy(coef%nx, coef%nx_d, n, &
1155 host_to_device, sync=.false.)
1156 call device_memcpy(coef%ny, coef%ny_d, n, &
1157 host_to_device, sync=.false.)
1158 call device_memcpy(coef%nz, coef%nz_d, n, &
1159 host_to_device, sync=.false.)
Map a Fortran array to a device (allocate and associate)
Copy data between host and device (or device and device)
subroutine coef_init_empty(this, Xh, msh)
Initialize empty coefs for a space and a mesh.
subroutine coef_generate_geo(c)
Generate geometric data for the given mesh.
subroutine coef_free(this)
Deallocate coefficients.
pure real(kind=rp) function coef_get_area(this, i, j, k, e, facet)
pure real(kind=rp) function, dimension(3) coef_get_normal(this, i, j, k, e, facet)
subroutine coef_generate_dxyzdrst(c)
subroutine coef_generate_area_and_normal(coef)
Generate facet area and surface normals.
subroutine coef_init_all(this, gs_h)
Initialize coefficients.
subroutine coef_generate_mass(c)
Generate mass matrix B for the given mesh and space.
subroutine, public device_coef_generate_dxydrst(drdx_d, drdy_d, drdz_d, dsdx_d, dsdy_d, dsdz_d, dtdx_d, dtdy_d, dtdz_d, dxdr_d, dydr_d, dzdr_d, dxds_d, dyds_d, dzds_d, dxdt_d, dydt_d, dzdt_d, dx_d, dy_d, dz_d, x_d, y_d, z_d, jacinv_d, jac_d, lx, nel)
subroutine, public device_coef_generate_geo(G11_d, G12_d, G13_d, G22_d, G23_d, G33_d, drdx_d, drdy_d, drdz_d, dsdx_d, dsdy_d, dsdz_d, dtdx_d, dtdy_d, dtdz_d, jacinv_d, w3_d, nel, lx, gdim)
subroutine, public device_rone(a_d, n)
Set all elements to one.
subroutine, public device_invcol1(a_d, n)
Invert a vector .
real(kind=rp) function, public device_glsum(a_d, n)
Sum a vector of length n.
Device abstraction, common interface for various accelerators.
subroutine, public device_free(x_d)
Deallocate memory on the device.
integer, parameter, public device_to_host
Defines a mapping of the degrees of freedom.
Defines Gather-scatter operations.
integer, parameter, public gs_op_add
subroutine, public invers2(a, b, n)
Compute inverted vector .
subroutine, public subcol3(a, b, c, n)
Returns .
subroutine, public rone(a, n)
Set all elements to one.
real(kind=rp) function, public glsum(a, n)
Sum a vector of length n.
subroutine, public addcol3(a, b, c, n)
Returns .
subroutine, public invcol1(a, n)
Invert a vector .
subroutine, public chsign(a, n)
Change sign of vector .
subroutine, public copy(a, b, n)
Copy a vector .
real(kind=rp), parameter, public neko_eps
Machine epsilon .
subroutine, public rzero(a, n)
Zero a real vector.
Wrapper for all matrix-matrix product implementations.
subroutine, public mxm(a, n1, b, n2, c, n3)
Compute matrix-matrix product for contiguously packed matrices A,B, and C.
integer, parameter neko_bcknd_device
integer, parameter, public rp
Global precision used in computations.
Defines a function space.
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
The function space for the SEM solution fields.