Neko 1.99.3
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
coef.f90
Go to the documentation of this file.
1! Copyright (c) 2020-2026, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
34module coefs
35 use gather_scatter, only : gs_t
36 use gs_ops, only : gs_op_add
38 use num_types, only : rp
39 use dofmap, only : dofmap_t
40 use space, only : space_t
41 use math, only : rone, invcol1, addcol3, subcol3, copy, &
43 use mesh, only : mesh_t
49 use mxm_wrapper, only : mxm
50 use device
53 use comm, only : neko_comm
55 use mpi_f08, only : mpi_allreduce, mpi_integer, mpi_sum
56 use, intrinsic :: iso_fortran_env
57 use, intrinsic :: iso_c_binding
58 implicit none
59 private
60
63 type, public :: coef_t
65 real(kind=rp), allocatable :: g11(:,:,:,:)
67 real(kind=rp), allocatable :: g22(:,:,:,:)
69 real(kind=rp), allocatable :: g33(:,:,:,:)
71 real(kind=rp), allocatable :: g12(:,:,:,:)
73 real(kind=rp), allocatable :: g13(:,:,:,:)
75 real(kind=rp), allocatable :: g23(:,:,:,:)
76
77 real(kind=rp), allocatable :: mult(:,:,:,:)
82 real(kind=rp), allocatable :: dxdr(:,:,:,:), dydr(:,:,:,:), dzdr(:,:,:,:)
83 real(kind=rp), allocatable :: dxds(:,:,:,:), dyds(:,:,:,:), dzds(:,:,:,:)
84 real(kind=rp), allocatable :: dxdt(:,:,:,:), dydt(:,:,:,:), dzdt(:,:,:,:)
88 real(kind=rp), allocatable :: drdx(:,:,:,:), drdy(:,:,:,:), drdz(:,:,:,:)
89 real(kind=rp), allocatable :: dsdx(:,:,:,:), dsdy(:,:,:,:), dsdz(:,:,:,:)
90 real(kind=rp), allocatable :: dtdx(:,:,:,:), dtdy(:,:,:,:), dtdz(:,:,:,:)
91
92 real(kind=rp), allocatable :: h1(:,:,:,:)
93 real(kind=rp), allocatable :: h2(:,:,:,:)
94 logical :: ifh2
95
96 real(kind=rp), allocatable :: jac(:,:,:,:)
97 real(kind=rp), allocatable :: jacinv(:,:,:,:)
98 real(kind=rp), allocatable :: b(:,:,:,:)
99 real(kind=rp), allocatable :: binv(:,:,:,:)
100 real(kind=rp), pointer :: blag(:,:,:,:) => null()
101 real(kind=rp), pointer :: blaglag(:,:,:,:) => null()
102 real(kind=rp), allocatable :: area(:,:,:,:)
103 real(kind=rp), allocatable :: nx(:,:,:,:)
104 real(kind=rp), allocatable :: ny(:,:,:,:)
105 real(kind=rp), allocatable :: nz(:,:,:,:)
106 logical :: cyclic = .false.
107 integer, allocatable :: cyc_msk(:)
108 real(kind=rp), allocatable :: r11(:)
109 real(kind=rp), allocatable :: r12(:)
110
111 !! True if geometric metrics have been initialized
112 logical, private :: coef_metrics_initialized = .false.
113
115
116 real(kind=rp) :: volume
117
118 type(space_t), pointer :: xh => null()
119 type(mesh_t), pointer :: msh => null()
120 type(dofmap_t), pointer :: dof => null()
121 type(gs_t), pointer :: gs_h=> null()
122
123 !
124 ! Device pointers (if present)
125 !
126
127 type(c_ptr) :: g11_d = c_null_ptr
128 type(c_ptr) :: g22_d = c_null_ptr
129 type(c_ptr) :: g33_d = c_null_ptr
130 type(c_ptr) :: g12_d = c_null_ptr
131 type(c_ptr) :: g13_d = c_null_ptr
132 type(c_ptr) :: g23_d = c_null_ptr
133 type(c_ptr) :: dxdr_d = c_null_ptr
134 type(c_ptr) :: dydr_d = c_null_ptr
135 type(c_ptr) :: dzdr_d = c_null_ptr
136 type(c_ptr) :: dxds_d = c_null_ptr
137 type(c_ptr) :: dyds_d = c_null_ptr
138 type(c_ptr) :: dzds_d = c_null_ptr
139 type(c_ptr) :: dxdt_d = c_null_ptr
140 type(c_ptr) :: dydt_d = c_null_ptr
141 type(c_ptr) :: dzdt_d = c_null_ptr
142 type(c_ptr) :: drdx_d = c_null_ptr
143 type(c_ptr) :: drdy_d = c_null_ptr
144 type(c_ptr) :: drdz_d = c_null_ptr
145 type(c_ptr) :: dsdx_d = c_null_ptr
146 type(c_ptr) :: dsdy_d = c_null_ptr
147 type(c_ptr) :: dsdz_d = c_null_ptr
148 type(c_ptr) :: dtdx_d = c_null_ptr
149 type(c_ptr) :: dtdy_d = c_null_ptr
150 type(c_ptr) :: dtdz_d = c_null_ptr
151 type(c_ptr) :: mult_d = c_null_ptr
152 type(c_ptr) :: h1_d = c_null_ptr
153 type(c_ptr) :: h2_d = c_null_ptr
154 type(c_ptr) :: jac_d = c_null_ptr
155 type(c_ptr) :: jacinv_d = c_null_ptr
156 type(c_ptr) :: b_d = c_null_ptr
157 type(c_ptr) :: blag_d = c_null_ptr
158 type(c_ptr) :: blaglag_d = c_null_ptr
159 type(c_ptr) :: binv_d = c_null_ptr
160 type(c_ptr) :: area_d = c_null_ptr
161 type(c_ptr) :: nx_d = c_null_ptr
162 type(c_ptr) :: ny_d = c_null_ptr
163 type(c_ptr) :: nz_d = c_null_ptr
164 type(c_ptr) :: cyc_msk_d = c_null_ptr
165 type(c_ptr) :: r11_d = c_null_ptr
166 type(c_ptr) :: r12_d = c_null_ptr
167
168
169
170 contains
171 procedure, private, pass(this) :: init_empty => coef_init_empty
172 procedure, private, pass(this) :: init_all => coef_init_all
173 procedure, pass(this) :: free => coef_free
174 procedure, pass(this) :: get_normal => coef_get_normal
175 procedure, pass(this) :: get_area => coef_get_area
176 procedure, pass(this) :: generate_cyclic_bc => coef_generate_cyclic_bc
177 procedure, pass(this) :: recompute_metrics => coef_recompute_metrics
178 procedure, pass(this) :: enable_b_history => coef_enable_lagged_mass
179 procedure, pass(this) :: update_b_history => coef_update_lagged_mass
180 generic :: init => init_empty, init_all
181 end type coef_t
182
183contains
184
186 subroutine coef_init_empty(this, Xh, msh)
187 class(coef_t), intent(inout) :: this
188 type(space_t), intent(inout), target :: Xh
189 type(mesh_t), intent(inout), target :: msh
190 integer :: n
191 call this%free()
192 this%msh => msh
193 this%Xh => xh
194
195 allocate(this%drdx(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
196 allocate(this%dsdx(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
197 allocate(this%dtdx(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
198
199 allocate(this%drdy(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
200 allocate(this%dsdy(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
201 allocate(this%dtdy(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
202
203 allocate(this%drdz(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
204 allocate(this%dsdz(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
205 allocate(this%dtdz(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
206
207
208 !
209 ! Setup device memory (if present)
210 !
211
212 n = this%Xh%lx * this%Xh%ly * this%Xh%lz * this%msh%nelv
213 if (neko_bcknd_device .eq. 1) then
214
215 call device_map(this%drdx, this%drdx_d, n)
216 call device_map(this%drdy, this%drdy_d, n)
217 call device_map(this%drdz, this%drdz_d, n)
218
219 call device_map(this%dsdx, this%dsdx_d, n)
220 call device_map(this%dsdy, this%dsdy_d, n)
221 call device_map(this%dsdz, this%dsdz_d, n)
222
223 call device_map(this%dtdx, this%dtdx_d, n)
224 call device_map(this%dtdy, this%dtdy_d, n)
225 call device_map(this%dtdz, this%dtdz_d, n)
226
227 end if
228
229 end subroutine coef_init_empty
230
232 subroutine coef_init_all(this, gs_h)
233 class(coef_t), intent(inout), target :: this
234 type(gs_t), intent(inout), target :: gs_h
235 integer :: n, m, ncyc
236 call this%free()
237
238 this%msh => gs_h%dofmap%msh
239 this%Xh => gs_h%dofmap%Xh
240 this%dof => gs_h%dofmap
241 this%gs_h => gs_h
242
243 !
244 ! Allocate arrays for geometric data
245 !
247 allocate(this%G11(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
248 allocate(this%G22(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
249 allocate(this%G33(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
250 allocate(this%G12(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
251 allocate(this%G13(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
252 allocate(this%G23(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
253
254 allocate(this%dxdr(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
255 allocate(this%dxds(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
256 allocate(this%dxdt(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
257
258 allocate(this%dydr(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
259 allocate(this%dyds(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
260 allocate(this%dydt(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
261
262 allocate(this%dzdr(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
263 allocate(this%dzds(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
264 allocate(this%dzdt(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
265
266 allocate(this%drdx(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
267 allocate(this%dsdx(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
268 allocate(this%dtdx(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
269
270 allocate(this%drdy(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
271 allocate(this%dsdy(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
272 allocate(this%dtdy(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
273
274 allocate(this%drdz(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
275 allocate(this%dsdz(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
276 allocate(this%dtdz(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
277
278 allocate(this%jac(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
279 allocate(this%jacinv(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
280
281 allocate(this%area(this%Xh%lx, this%Xh%ly, 6, this%msh%nelv))
282 allocate(this%nx(this%Xh%lx, this%Xh%ly, 6, this%msh%nelv))
283 allocate(this%ny(this%Xh%lx, this%Xh%ly, 6, this%msh%nelv))
284 allocate(this%nz(this%Xh%lx, this%Xh%ly, 6, this%msh%nelv))
285
286 allocate(this%B(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
287 allocate(this%Binv(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
288
289 ! We do this so in a static simulation we don't allocate extra memory
290 this%Blag => this%B
291 this%Blaglag => this%B
292
293 allocate(this%h1(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
294 allocate(this%h2(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
295
296 allocate(this%mult(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
297
298
299 !
300 ! Setup device memory (if present)
301 !
302
303 n = this%Xh%lx * this%Xh%ly * this%Xh%lz * this%msh%nelv
304 if (neko_bcknd_device .eq. 1) then
305 call device_map(this%G11, this%G11_d, n)
306 call device_map(this%G22, this%G22_d, n)
307 call device_map(this%G33, this%G33_d, n)
308 call device_map(this%G12, this%G12_d, n)
309 call device_map(this%G13, this%G13_d, n)
310 call device_map(this%G23, this%G23_d, n)
311
312 call device_map(this%dxdr, this%dxdr_d, n)
313 call device_map(this%dydr, this%dydr_d, n)
314 call device_map(this%dzdr, this%dzdr_d, n)
315
316 call device_map(this%dxds, this%dxds_d, n)
317 call device_map(this%dyds, this%dyds_d, n)
318 call device_map(this%dzds, this%dzds_d, n)
319
320 call device_map(this%dxdt, this%dxdt_d, n)
321 call device_map(this%dydt, this%dydt_d, n)
322 call device_map(this%dzdt, this%dzdt_d, n)
323
324 call device_map(this%drdx, this%drdx_d, n)
325 call device_map(this%drdy, this%drdy_d, n)
326 call device_map(this%drdz, this%drdz_d, n)
327
328 call device_map(this%dsdx, this%dsdx_d, n)
329 call device_map(this%dsdy, this%dsdy_d, n)
330 call device_map(this%dsdz, this%dsdz_d, n)
331
332 call device_map(this%dtdx, this%dtdx_d, n)
333 call device_map(this%dtdy, this%dtdy_d, n)
334 call device_map(this%dtdz, this%dtdz_d, n)
335
336 call device_map(this%mult, this%mult_d, n)
337 call device_map(this%h1, this%h1_d, n)
338 call device_map(this%h2, this%h2_d, n)
339
340 call device_map(this%jac, this%jac_d, n)
341 call device_map(this%jacinv, this%jacinv_d, n)
342 call device_map(this%B, this%B_d, n)
343 call device_map(this%Binv, this%Binv_d, n)
344
345 this%Blag_d = this%B_d
346 this%Blaglag_d = this%B_d
347
348 m = this%Xh%lx * this%Xh%ly * 6 * this%msh%nelv
349
350 call device_map(this%area, this%area_d, m)
351 call device_map(this%nx, this%nx_d, m)
352 call device_map(this%ny, this%ny_d, m)
353 call device_map(this%nz, this%nz_d, m)
354
355 end if
356
357 call coef_generate_dxyzdrst(this)
358
359 call coef_generate_geo(this)
360
362
363 call coef_generate_mass(this)
364
365 this%coef_metrics_initialized = .true.
366
367
368 ! This is a placeholder, just for now
369 ! We can probably find a prettier solution
370 if (neko_bcknd_device .eq. 1) then
371 call device_rone(this%h1_d, n)
372 call device_rone(this%h2_d, n)
373 call device_memcpy(this%h1, this%h1_d, n, &
374 device_to_host, sync = .false.)
375 call device_memcpy(this%h2, this%h2_d, n, &
376 device_to_host, sync = .false.)
377 else
378 call rone(this%h1,n)
379 call rone(this%h2,n)
380 end if
381
382 this%ifh2 = .false.
383
384 !
385 ! Set up multiplicity
386 !
387 if (neko_bcknd_device .eq. 1) then
388 call device_rone(this%mult_d, n)
389 else
390 call rone(this%mult, n)
391 end if
392
393 call gs_h%op(this%mult, n, gs_op_add)
394
395 if (neko_bcknd_device .eq. 1) then
396 call device_invcol1(this%mult_d, n)
397 call device_memcpy(this%mult, this%mult_d, n, &
398 device_to_host, sync = .true.)
399 else
400 call invcol1(this%mult, n)
401 end if
402
403 ncyc = this%msh%periodic%size * this%Xh%lx * this%Xh%lx
404 allocate(this%cyc_msk(0:ncyc))
405 this%cyc_msk(0) = ncyc + 1
406 if (ncyc .gt. 0) then
407 allocate(this%R11(ncyc))
408 allocate(this%R12(ncyc))
409
411 call rone(this%R11, ncyc)
412 call rzero(this%R12, ncyc)
413
414 if (neko_bcknd_device .eq. 1) then
415 call device_map(this%cyc_msk, this%cyc_msk_d, ncyc+1)
416 call device_map(this%R11, this%R11_d, ncyc)
417 call device_map(this%R12, this%R12_d, ncyc)
418
419 call device_memcpy(this%cyc_msk, this%cyc_msk_d, ncyc+1, &
420 host_to_device, sync = .false.)
421 call device_memcpy(this%R11, this%R11_d, ncyc, &
422 host_to_device, sync = .false.)
423 call device_memcpy(this%R12, this%R12_d, ncyc, &
424 host_to_device, sync = .false.)
425 end if
426
427 end if
428 end subroutine coef_init_all
429
431 subroutine coef_free(this)
432 class(coef_t), intent(inout), target :: this
433
434 if (allocated(this%G11)) then
435 if (neko_bcknd_device .eq. 1) call device_unmap(this%G11, this%G11_d)
436 deallocate(this%G11)
437 end if
438
439 if (allocated(this%G22)) then
440 if (neko_bcknd_device .eq. 1) call device_unmap(this%G22, this%G22_d)
441 deallocate(this%G22)
442 end if
443
444 if (allocated(this%G33)) then
445 if (neko_bcknd_device .eq. 1) call device_unmap(this%G33, this%G33_d)
446 deallocate(this%G33)
447 end if
448
449 if (allocated(this%G12)) then
450 if (neko_bcknd_device .eq. 1) call device_unmap(this%G12, this%G12_d)
451 deallocate(this%G12)
452 end if
453
454 if (allocated(this%G13)) then
455 if (neko_bcknd_device .eq. 1) call device_unmap(this%G13, this%G13_d)
456 deallocate(this%G13)
457 end if
458
459 if (allocated(this%G23)) then
460 if (neko_bcknd_device .eq. 1) call device_unmap(this%G23, this%G23_d)
461 deallocate(this%G23)
462 end if
463
464 if (allocated(this%mult)) then
465 if (neko_bcknd_device .eq. 1) call device_unmap(this%mult, this%mult_d)
466 deallocate(this%mult)
467 end if
468
469 if (associated(this%Blag) .and. &
470 .not. associated(this%Blag, this%B)) then
471 if (c_associated(this%Blag_d) .and. &
472 .not. c_associated(this%Blag_d, this%B_d)) then
473 call device_unmap(this%Blag, this%Blag_d)
474 end if
475 deallocate(this%Blag)
476 end if
477 nullify(this%Blag)
478
479 if (associated(this%Blaglag) .and. &
480 .not. associated(this%Blaglag, this%B)) then
481 if (c_associated(this%Blaglag_d) .and. &
482 .not. c_associated(this%Blaglag_d, this%B_d)) then
483 call device_unmap(this%Blaglag, this%Blaglag_d)
484 end if
485 deallocate(this%Blaglag)
486 end if
487 nullify(this%Blaglag)
488
489 if (c_associated(this%Blag_d) .and. &
490 .not. c_associated(this%Blag_d, this%B_d)) then
491 this%Blag_d = c_null_ptr
492 end if
493 this%Blag_d = c_null_ptr
494
495 if (c_associated(this%Blaglag_d) .and. &
496 .not. c_associated(this%Blaglag_d, this%B_d)) then
497 this%Blaglag_d = c_null_ptr
498 end if
499 this%Blaglag_d = c_null_ptr
500
501 if (allocated(this%B)) then
502 if (neko_bcknd_device .eq. 1) call device_unmap(this%B, this%B_d)
503 deallocate(this%B)
504 end if
505
506 if (allocated(this%Binv)) then
507 if (neko_bcknd_device .eq. 1) call device_unmap(this%Binv, this%Binv_d)
508 deallocate(this%Binv)
509 end if
510
511 if (allocated(this%dxdr)) then
512 if (neko_bcknd_device .eq. 1) call device_unmap(this%dxdr, this%dxdr_d)
513 deallocate(this%dxdr)
514 end if
515
516 if (allocated(this%dxds)) then
517 if (neko_bcknd_device .eq. 1) call device_unmap(this%dxds, this%dxds_d)
518 deallocate(this%dxds)
519 end if
520
521 if (allocated(this%dxdt)) then
522 if (neko_bcknd_device .eq. 1) call device_unmap(this%dxdt, this%dxdt_d)
523 deallocate(this%dxdt)
524 end if
525
526 if (allocated(this%dydr)) then
527 if (neko_bcknd_device .eq. 1) call device_unmap(this%dydr, this%dydr_d)
528 deallocate(this%dydr)
529 end if
530
531 if (allocated(this%dyds)) then
532 if (neko_bcknd_device .eq. 1) call device_unmap(this%dyds, this%dyds_d)
533 deallocate(this%dyds)
534 end if
535
536 if (allocated(this%dydt)) then
537 if (neko_bcknd_device .eq. 1) call device_unmap(this%dydt, this%dydt_d)
538 deallocate(this%dydt)
539 end if
540
541 if (allocated(this%dzdr)) then
542 if (neko_bcknd_device .eq. 1) call device_unmap(this%dzdr, this%dzdr_d)
543 deallocate(this%dzdr)
544 end if
545
546 if (allocated(this%dzds)) then
547 if (neko_bcknd_device .eq. 1) call device_unmap(this%dzds, this%dzds_d)
548 deallocate(this%dzds)
549 end if
550
551 if (allocated(this%dzdt)) then
552 if (neko_bcknd_device .eq. 1) call device_unmap(this%dzdt, this%dzdt_d)
553 deallocate(this%dzdt)
554 end if
555
556 if (allocated(this%drdx)) then
557 if (neko_bcknd_device .eq. 1) call device_unmap(this%drdx, this%drdx_d)
558 deallocate(this%drdx)
559 end if
560
561 if (allocated(this%dsdx)) then
562 if (neko_bcknd_device .eq. 1) call device_unmap(this%dsdx, this%dsdx_d)
563 deallocate(this%dsdx)
564 end if
565
566 if (allocated(this%dtdx)) then
567 if (neko_bcknd_device .eq. 1) call device_unmap(this%dtdx, this%dtdx_d)
568 deallocate(this%dtdx)
569 end if
570
571 if (allocated(this%drdy)) then
572 if (neko_bcknd_device .eq. 1) call device_unmap(this%drdy, this%drdy_d)
573 deallocate(this%drdy)
574 end if
575
576 if (allocated(this%dsdy)) then
577 if (neko_bcknd_device .eq. 1) call device_unmap(this%dsdy, this%dsdy_d)
578 deallocate(this%dsdy)
579 end if
580
581 if (allocated(this%dtdy)) then
582 if (neko_bcknd_device .eq. 1) call device_unmap(this%dtdy, this%dtdy_d)
583 deallocate(this%dtdy)
584 end if
585
586 if (allocated(this%drdz)) then
587 if (neko_bcknd_device .eq. 1) call device_unmap(this%drdz, this%drdz_d)
588 deallocate(this%drdz)
589 end if
590
591 if (allocated(this%dsdz)) then
592 if (neko_bcknd_device .eq. 1) call device_unmap(this%dsdz, this%dsdz_d)
593 deallocate(this%dsdz)
594 end if
595
596 if (allocated(this%dtdz)) then
597 if (neko_bcknd_device .eq. 1) call device_unmap(this%dtdz, this%dtdz_d)
598 deallocate(this%dtdz)
599 end if
600
601 if (allocated(this%jac)) then
602 if (neko_bcknd_device .eq. 1) call device_unmap(this%jac, this%jac_d)
603 deallocate(this%jac)
604 end if
605
606 if (allocated(this%jacinv)) then
607 if (neko_bcknd_device .eq. 1) then
608 call device_unmap(this%jacinv, this%jacinv_d)
609 end if
610 deallocate(this%jacinv)
611 end if
612
613 if (allocated(this%h1)) then
614 if (neko_bcknd_device .eq. 1) call device_unmap(this%h1, this%h1_d)
615 deallocate(this%h1)
616 end if
617
618 if (allocated(this%h2)) then
619 if (neko_bcknd_device .eq. 1) call device_unmap(this%h2, this%h2_d)
620 deallocate(this%h2)
621 end if
622
623 if (allocated(this%area)) then
624 if (neko_bcknd_device .eq. 1) call device_unmap(this%area, this%area_d)
625 deallocate(this%area)
626 end if
627
628 if (allocated(this%nx)) then
629 if (neko_bcknd_device .eq. 1) call device_unmap(this%nx, this%nx_d)
630 deallocate(this%nx)
631 end if
632
633 if (allocated(this%ny)) then
634 if (neko_bcknd_device .eq. 1) call device_unmap(this%ny, this%ny_d)
635 deallocate(this%ny)
636 end if
637
638 if (allocated(this%nz)) then
639 if (neko_bcknd_device .eq. 1) call device_unmap(this%nz, this%nz_d)
640 deallocate(this%nz)
641 end if
642
643 if (allocated(this%cyc_msk)) then
644 if (neko_bcknd_device .eq. 1) then
645 call device_unmap(this%cyc_msk, this%cyc_msk_d)
646 end if
647 deallocate(this%cyc_msk)
648 end if
649
650 if (allocated(this%R11)) then
651 if (neko_bcknd_device .eq. 1) call device_unmap(this%R11, this%R11_d)
652 deallocate(this%R11)
653 end if
654
655 if (allocated(this%R12)) then
656 if (neko_bcknd_device .eq. 1) call device_unmap(this%R12, this%R12_d)
657 deallocate(this%R12)
658 end if
659
660
661 nullify(this%msh)
662 nullify(this%Xh)
663 nullify(this%dof)
664 nullify(this%gs_h)
665
666 end subroutine coef_free
667
669 type(coef_t), intent(inout) :: c
670 integer :: e, i, lxy, lyz, ntot
671
672 lxy = c%Xh%lx*c%Xh%ly
673 lyz = c%Xh%ly*c%Xh%lz
674 ntot = c%dof%size()
675
676 associate(drdx => c%drdx, drdy => c%drdy, drdz => c%drdz, &
677 dsdx => c%dsdx, dsdy => c%dsdy, dsdz => c%dsdz, &
678 dtdx => c%dtdx, dtdy => c%dtdy, dtdz => c%dtdz, &
679 dxdr => c%dxdr, dydr => c%dydr, dzdr => c%dzdr, &
680 dxds => c%dxds, dyds => c%dyds, dzds => c%dzds, &
681 dxdt => c%dxdt, dydt => c%dydt, dzdt => c%dzdt, &
682 dx => c%Xh%dx, dy => c%Xh%dy, dz => c%Xh%dz, &
683 x => c%dof%x, y => c%dof%y, z => c%dof%z, &
684 lx => c%Xh%lx, ly => c%Xh%ly, lz => c%Xh%lz, &
685 dyt => c%Xh%dyt, dzt => c%Xh%dzt, &
686 jacinv => c%jacinv, jac => c%jac)
687
688 if (neko_bcknd_device .eq. 1) then
689
690 call device_coef_generate_dxydrst(c%drdx_d, c%drdy_d, c%drdz_d, &
691 c%dsdx_d, c%dsdy_d, c%dsdz_d, c%dtdx_d, c%dtdy_d, c%dtdz_d, &
692 c%dxdr_d, c%dydr_d, c%dzdr_d, c%dxds_d, c%dyds_d, c%dzds_d, &
693 c%dxdt_d, c%dydt_d, c%dzdt_d, c%Xh%dx_d, c%Xh%dy_d, c%Xh%dz_d, &
694 c%dof%x_d, c%dof%y_d, c%dof%z_d, c%jacinv_d, c%jac_d, &
695 c%Xh%lx, c%msh%nelv)
696
697 ! copy to host only at initialization.
698 if (.not. c%coef_metrics_initialized) then
699 call device_memcpy(dxdr, c%dxdr_d, ntot, device_to_host, &
700 sync = .false.)
701 call device_memcpy(dydr, c%dydr_d, ntot, device_to_host, &
702 sync = .false.)
703 call device_memcpy(dzdr, c%dzdr_d, ntot, device_to_host, &
704 sync = .false.)
705 call device_memcpy(dxds, c%dxds_d, ntot, device_to_host, &
706 sync = .false.)
707 call device_memcpy(dyds, c%dyds_d, ntot, device_to_host, &
708 sync = .false.)
709 call device_memcpy(dzds, c%dzds_d, ntot, device_to_host, &
710 sync = .false.)
711 call device_memcpy(dxdt, c%dxdt_d, ntot, device_to_host, &
712 sync = .false.)
713 call device_memcpy(dydt, c%dydt_d, ntot, device_to_host, &
714 sync = .false.)
715 call device_memcpy(dzdt, c%dzdt_d, ntot, device_to_host, &
716 sync = .false.)
717 call device_memcpy(drdx, c%drdx_d, ntot, device_to_host, &
718 sync = .false.)
719 call device_memcpy(drdy, c%drdy_d, ntot, device_to_host, &
720 sync = .false.)
721 call device_memcpy(drdz, c%drdz_d, ntot, device_to_host, &
722 sync = .false.)
723 call device_memcpy(dsdx, c%dsdx_d, ntot, device_to_host, &
724 sync = .false.)
725 call device_memcpy(dsdy, c%dsdy_d, ntot, device_to_host, &
726 sync = .false.)
727 call device_memcpy(dsdz, c%dsdz_d, ntot, device_to_host, &
728 sync = .false.)
729 call device_memcpy(dtdx, c%dtdx_d, ntot, device_to_host, &
730 sync = .false.)
731 call device_memcpy(dtdy, c%dtdy_d, ntot, device_to_host, &
732 sync = .false.)
733 call device_memcpy(dtdz, c%dtdz_d, ntot, device_to_host, &
734 sync = .false.)
735 call device_memcpy(jac, c%jac_d, ntot, device_to_host, &
736 sync = .false.)
737 call device_memcpy(jacinv, c%jacinv_d, ntot, device_to_host, &
738 sync = .true.)
739 end if
740
741 else
742 !$omp parallel do private(i)
743 do e = 1, c%msh%nelv
744 call mxm(dx, lx, x(1,1,1,e), lx, dxdr(1,1,1,e), lyz)
745 call mxm(dx, lx, y(1,1,1,e), lx, dydr(1,1,1,e), lyz)
746 call mxm(dx, lx, z(1,1,1,e), lx, dzdr(1,1,1,e), lyz)
747
748 do i = 1, lz
749 call mxm(x(1,1,i,e), lx, dyt, ly, dxds(1,1,i,e), ly)
750 call mxm(y(1,1,i,e), lx, dyt, ly, dyds(1,1,i,e), ly)
751 call mxm(z(1,1,i,e), lx, dyt, ly, dzds(1,1,i,e), ly)
752 end do
753
754 ! We actually take 2d into account, wow, need to do that for the rest.
755 if (c%msh%gdim .eq. 3) then
756 call mxm(x(1,1,1,e), lxy, dzt, lz, dxdt(1,1,1,e), lz)
757 call mxm(y(1,1,1,e), lxy, dzt, lz, dydt(1,1,1,e), lz)
758 call mxm(z(1,1,1,e), lxy, dzt, lz, dzdt(1,1,1,e), lz)
759 else
760 call rzero(dxdt(1,1,1,e), lxy)
761 call rzero(dydt(1,1,1,e), lxy)
762 call rone(dzdt(1,1,1,e), lxy)
763 end if
764 end do
765 !$omp end parallel do
766
767 if (c%msh%gdim .eq. 2) then
768 call rzero (jac, ntot)
769 call addcol3 (jac, dxdr, dyds, ntot)
770 call subcol3 (jac, dxds, dydr, ntot)
771 call copy (drdx, dyds, ntot)
772 call copy (drdy, dxds, ntot)
773 call chsign (drdy, ntot)
774 call copy (dsdx, dydr, ntot)
775 call chsign (dsdx, ntot)
776 call copy (dsdy, dxdr, ntot)
777 call rzero (drdz, ntot)
778 call rzero (dsdz, ntot)
779 call rone (dtdz, ntot)
780 else
781 !$omp parallel private(i)
782 !$omp do simd
783 do i = 1, ntot
784 c%jac(i, 1, 1, 1) = 0.0_rp
785 end do
786 !$omp end do simd
787 !$omp do simd
788 do i = 1, ntot
789 c%jac(i, 1, 1, 1) = c%jac(i, 1, 1, 1) + ( c%dxdr(i, 1, 1, 1) &
790 * c%dyds(i, 1, 1, 1) * c%dzdt(i, 1, 1, 1) )
791
792 c%jac(i, 1, 1, 1) = c%jac(i, 1, 1, 1) + ( c%dxdt(i, 1, 1, 1) &
793 * c%dydr(i, 1, 1, 1) * c%dzds(i, 1, 1, 1) )
794
795 c%jac(i, 1, 1, 1) = c%jac(i, 1, 1, 1) + ( c%dxds(i, 1, 1, 1) &
796 * c%dydt(i, 1, 1, 1) * c%dzdr(i, 1, 1, 1) )
797 end do
798 !$omp end do simd
799 !$omp do simd
800 do i = 1, ntot
801 c%jac(i, 1, 1, 1) = c%jac(i, 1, 1, 1) - ( c%dxdr(i, 1, 1, 1) &
802 * c%dydt(i, 1, 1, 1) * c%dzds(i, 1, 1, 1) )
803
804 c%jac(i, 1, 1, 1) = c%jac(i, 1, 1, 1) - ( c%dxds(i, 1, 1, 1) &
805 * c%dydr(i, 1, 1, 1) * c%dzdt(i, 1, 1, 1) )
806
807 c%jac(i, 1, 1, 1) = c%jac(i, 1, 1, 1) - ( c%dxdt(i, 1, 1, 1) &
808 * c%dyds(i, 1, 1, 1) * c%dzdr(i, 1, 1, 1) )
809 end do
810 !$omp end do simd
811 !$omp do simd
812 do i = 1, ntot
813 c%drdx(i, 1, 1, 1) = c%dyds(i, 1, 1, 1) * c%dzdt(i, 1, 1, 1) &
814 - c%dydt(i, 1, 1, 1) * c%dzds(i, 1, 1, 1)
815
816 c%drdy(i, 1, 1, 1) = c%dxdt(i, 1, 1, 1) * c%dzds(i, 1, 1, 1) &
817 - c%dxds(i, 1, 1, 1) * c%dzdt(i, 1, 1, 1)
818
819 c%drdz(i, 1, 1, 1) = c%dxds(i, 1, 1, 1) * c%dydt(i, 1, 1, 1) &
820 - c%dxdt(i, 1, 1, 1) * c%dyds(i, 1, 1, 1)
821 end do
822 !$omp end do simd
823 !$omp do simd
824 do i = 1, ntot
825 c%dsdx(i, 1, 1, 1) = c%dydt(i, 1, 1, 1) * c%dzdr(i, 1, 1, 1) &
826 - c%dydr(i, 1, 1, 1) * c%dzdt(i, 1, 1, 1)
827
828 c%dsdy(i, 1, 1, 1) = c%dxdr(i, 1, 1, 1) * c%dzdt(i, 1, 1, 1) &
829 - c%dxdt(i, 1, 1, 1) * c%dzdr(i, 1, 1, 1)
830
831 c%dsdz(i, 1, 1, 1) = c%dxdt(i, 1, 1, 1) * c%dydr(i, 1, 1, 1) &
832 - c%dxdr(i, 1, 1, 1) * c%dydt(i, 1, 1, 1)
833 end do
834 !$omp end do simd
835 !$omp do simd
836 do i = 1, ntot
837 c%dtdx(i, 1, 1, 1) = c%dydr(i, 1, 1, 1) * c%dzds(i, 1, 1, 1) &
838 - c%dyds(i, 1, 1, 1) * c%dzdr(i, 1, 1, 1)
839
840 c%dtdy(i, 1, 1, 1) = c%dxds(i, 1, 1, 1) * c%dzdr(i, 1, 1, 1) &
841 - c%dxdr(i, 1, 1, 1) * c%dzds(i, 1, 1, 1)
842
843 c%dtdz(i, 1, 1, 1) = c%dxdr(i, 1, 1, 1) * c%dyds(i, 1, 1, 1) &
844 - c%dxds(i, 1, 1, 1) * c%dydr(i, 1, 1, 1)
845 end do
846 !$omp end do simd
847 !$omp end parallel
848 end if
849 call invers2(jacinv, jac, ntot)
850 end if
851 end associate
852
853 end subroutine coef_generate_dxyzdrst
854
857 subroutine coef_generate_geo(c)
858 type(coef_t), intent(inout) :: c
859 integer :: e, i, lxyz, ntot
860
861 lxyz = c%Xh%lx * c%Xh%ly * c%Xh%lz
862 ntot = c%dof%size()
863
864 if (neko_bcknd_device .eq. 1) then
865
866 call device_coef_generate_geo(c%G11_d, c%G12_d, c%G13_d, &
867 c%G22_d, c%G23_d, c%G33_d, &
868 c%drdx_d, c%drdy_d, c%drdz_d, &
869 c%dsdx_d, c%dsdy_d, c%dsdz_d, &
870 c%dtdx_d, c%dtdy_d, c%dtdz_d, &
871 c%jacinv_d, c%Xh%w3_d, c%msh%nelv, &
872 c%Xh%lx, c%msh%gdim)
873
874 ! copy to host only at initialization.
875 if (.not. c%coef_metrics_initialized) then
876 call device_memcpy(c%G11, c%G11_d, ntot, device_to_host, &
877 sync = .false.)
878 call device_memcpy(c%G22, c%G22_d, ntot, device_to_host, &
879 sync = .false.)
880 call device_memcpy(c%G33, c%G33_d, ntot, device_to_host, &
881 sync = .false.)
882 call device_memcpy(c%G12, c%G12_d, ntot, device_to_host, &
883 sync = .false.)
884 call device_memcpy(c%G13, c%G13_d, ntot, device_to_host, &
885 sync = .false.)
886 call device_memcpy(c%G23, c%G23_d, ntot, device_to_host, &
887 sync = .true.)
888 end if
889
890 else
891 if (c%msh%gdim .eq. 2) then
892
893 do i = 1, ntot
894 c%G11(i, 1, 1, 1) = c%drdx(i, 1, 1, 1) * c%drdx(i, 1, 1, 1) &
895 + c%drdy(i, 1, 1, 1) * c%drdy(i, 1, 1, 1)
896
897 c%G22(i, 1, 1, 1) = c%dsdx(i, 1, 1, 1) * c%dsdx(i, 1, 1, 1) &
898 + c%dsdy(i, 1, 1, 1) * c%dsdy(i, 1, 1, 1)
899
900 c%G12(i, 1, 1, 1) = c%drdx(i, 1, 1, 1) * c%dsdx(i, 1, 1, 1) &
901 + c%drdy(i, 1, 1, 1) * c%dsdy(i, 1, 1, 1)
902 end do
903
904 do i = 1, ntot
905 c%G11(i, 1, 1, 1) = c%G11(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
906 c%G22(i, 1, 1, 1) = c%G22(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
907 c%G12(i, 1, 1, 1) = c%G12(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
908 c%G33(i, 1, 1, 1) = 0.0_rp
909 c%G13(i, 1, 1, 1) = 0.0_rp
910 c%G23(i, 1, 1, 1) = 0.0_rp
911 end do
912
913 do concurrent(e = 1:c%msh%nelv)
914 do concurrent(i = 1:lxyz)
915 c%G11(i,1,1,e) = c%G11(i,1,1,e) * c%Xh%w3(i,1,1)
916 c%G22(i,1,1,e) = c%G22(i,1,1,e) * c%Xh%w3(i,1,1)
917 c%G12(i,1,1,e) = c%G12(i,1,1,e) * c%Xh%w3(i,1,1)
918 end do
919 end do
920
921 else
922 !$omp parallel private(i)
923 !$omp do
924 do i = 1, ntot
925 c%G11(i, 1, 1, 1) = c%drdx(i, 1, 1, 1) * c%drdx(i, 1, 1, 1) &
926 + c%drdy(i, 1, 1, 1) * c%drdy(i, 1, 1, 1) &
927 + c%drdz(i, 1, 1, 1) * c%drdz(i, 1, 1, 1)
928
929 c%G22(i, 1, 1, 1) = c%dsdx(i, 1, 1, 1) * c%dsdx(i, 1, 1, 1) &
930 + c%dsdy(i, 1, 1, 1) * c%dsdy(i, 1, 1, 1) &
931 + c%dsdz(i, 1, 1, 1) * c%dsdz(i, 1, 1, 1)
932
933 c%G33(i, 1, 1, 1) = c%dtdx(i, 1, 1, 1) * c%dtdx(i, 1, 1, 1) &
934 + c%dtdy(i, 1, 1, 1) * c%dtdy(i, 1, 1, 1) &
935 + c%dtdz(i, 1, 1, 1) * c%dtdz(i, 1, 1, 1)
936 end do
937 !$omp end do
938 !$omp do
939 do i = 1, ntot
940 c%G11(i, 1, 1, 1) = c%G11(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
941 c%G22(i, 1, 1, 1) = c%G22(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
942 c%G33(i, 1, 1, 1) = c%G33(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
943 end do
944 !$omp end do
945 !$omp do
946 do i = 1, ntot
947 c%G12(i, 1, 1, 1) = c%drdx(i, 1, 1, 1) * c%dsdx(i, 1, 1, 1) &
948 + c%drdy(i, 1, 1, 1) * c%dsdy(i, 1, 1, 1) &
949 + c%drdz(i, 1, 1, 1) * c%dsdz(i, 1, 1, 1)
950
951 c%G13(i, 1, 1, 1) = c%drdx(i, 1, 1, 1) * c%dtdx(i, 1, 1, 1) &
952 + c%drdy(i, 1, 1, 1) * c%dtdy(i, 1, 1, 1) &
953 + c%drdz(i, 1, 1, 1) * c%dtdz(i, 1, 1, 1)
954
955 c%G23(i, 1, 1, 1) = c%dsdx(i, 1, 1, 1) * c%dtdx(i, 1, 1, 1) &
956 + c%dsdy(i, 1, 1, 1) * c%dtdy(i, 1, 1, 1) &
957 + c%dsdz(i, 1, 1, 1) * c%dtdz(i, 1, 1, 1)
958 end do
959 !$omp end do
960 !$omp do
961 do i = 1, ntot
962 c%G12(i, 1, 1, 1) = c%G12(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
963 c%G13(i, 1, 1, 1) = c%G13(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
964 c%G23(i, 1, 1, 1) = c%G23(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
965 end do
966 !$omp end do
967 !$omp do
968 do e = 1, c%msh%nelv
969 do concurrent(i = 1:lxyz)
970 c%G11(i,1,1,e) = c%G11(i,1,1,e) * c%Xh%w3(i,1,1)
971 c%G22(i,1,1,e) = c%G22(i,1,1,e) * c%Xh%w3(i,1,1)
972 c%G12(i,1,1,e) = c%G12(i,1,1,e) * c%Xh%w3(i,1,1)
973
974 c%G33(i,1,1,e) = c%G33(i,1,1,e) * c%Xh%w3(i,1,1)
975 c%G13(i,1,1,e) = c%G13(i,1,1,e) * c%Xh%w3(i,1,1)
976 c%G23(i,1,1,e) = c%G23(i,1,1,e) * c%Xh%w3(i,1,1)
977 end do
978 end do
979 !$omp end do
980 !$omp end parallel
981 end if
982 end if
983
984 end subroutine coef_generate_geo
985
988 subroutine coef_generate_mass(c)
989 type(coef_t), intent(inout) :: c
990 integer :: e, i, lxyz, ntot
991
992 lxyz = c%Xh%lx * c%Xh%ly * c%Xh%lz
993 ntot = c%dof%size()
994
995 if (neko_bcknd_device .eq. 1) then
996 call device_coef_generate_mass(c%B_d, c%Binv_d, c%jac_d, c%Xh%w3_d, &
997 lxyz, c%msh%nelv)
998 ! copy to host only at initialization.
999 if (.not. c%coef_metrics_initialized) then
1000 call device_memcpy(c%B, c%B_d, ntot, device_to_host, sync = .false.)
1001 end if
1002 else
1003 do concurrent(e = 1:c%msh%nelv)
1004 ! Here we need to handle things differently for axis symmetric elements
1005 do concurrent(i = 1:lxyz)
1006 c%B(i,1,1,e) = c%jac(i,1,1,e) * c%Xh%w3(i,1,1)
1007 c%Binv(i,1,1,e) = c%B(i,1,1,e)
1008 end do
1009 end do
1010 end if
1011
1012 call c%gs_h%op(c%Binv, ntot, gs_op_add)
1013
1014 if (neko_bcknd_device .eq. 1) then
1015 call device_invcol1(c%Binv_d, ntot)
1016 ! copy to host only at initialization.
1017 if (.not. c%coef_metrics_initialized) then
1018 call device_memcpy(c%Binv, c%Binv_d, ntot, &
1019 device_to_host, sync = .true.)
1020 end if
1021 else
1022 call invcol1(c%Binv, ntot)
1023 end if
1024
1026 if (neko_bcknd_device .eq. 1) then
1027 c%volume = device_glsum(c%B_d, ntot)
1028 else
1029 c%volume = glsum(c%B, ntot)
1030 end if
1031
1032 end subroutine coef_generate_mass
1033
1034 pure function coef_get_normal(this, i, j, k, e, facet) result(normal)
1035 class(coef_t), intent(in) :: this
1036 integer, intent(in) :: i, j, k, e, facet
1037 real(kind=rp) :: normal(3)
1038
1039 select case (facet)
1040 case (1, 2)
1041 normal(1) = this%nx(j, k, facet, e)
1042 normal(2) = this%ny(j, k, facet, e)
1043 normal(3) = this%nz(j, k, facet, e)
1044 case (3, 4)
1045 normal(1) = this%nx(i, k, facet, e)
1046 normal(2) = this%ny(i, k, facet, e)
1047 normal(3) = this%nz(i, k, facet, e)
1048 case (5, 6)
1049 normal(1) = this%nx(i, j, facet, e)
1050 normal(2) = this%ny(i, j, facet, e)
1051 normal(3) = this%nz(i, j, facet, e)
1052 end select
1053 end function coef_get_normal
1054
1055 pure function coef_get_area(this, i, j, k, e, facet) result(area)
1056 class(coef_t), intent(in) :: this
1057 integer, intent(in) :: i, j, k, e, facet
1058 real(kind=rp) :: area
1059
1060 select case (facet)
1061 case (1, 2)
1062 area = this%area(j, k, facet, e)
1063 case (3, 4)
1064 area = this%area(i, k, facet, e)
1065 case (5, 6)
1066 area = this%area(i, j, facet, e)
1067 end select
1068 end function coef_get_area
1069
1070
1073 type(coef_t), intent(inout) :: coef
1074 real(kind=rp), allocatable :: a(:,:,:,:)
1075 real(kind=rp), allocatable :: b(:,:,:,:)
1076 real(kind=rp), allocatable :: c(:,:,:,:)
1077 real(kind=rp), allocatable :: dot(:,:,:,:)
1078 integer :: n, m, e, i, j, k, lx
1079 real(kind=rp) :: weight, len
1080 n = coef%dof%size()
1081 lx = coef%Xh%lx
1082
1083 if (neko_bcknd_device .eq. 1) then
1084
1085 call device_coef_generate_area_and_normal( &
1086 coef%area_d, coef%nx_d, coef%ny_d, coef%nz_d, &
1087 coef%dxdr_d, coef%dydr_d, coef%dzdr_d, &
1088 coef%dxds_d, coef%dyds_d, coef%dzds_d, &
1089 coef%dxdt_d, coef%dydt_d, coef%dzdt_d, &
1090 coef%Xh%wx_d, coef%Xh%wy_d, coef%Xh%wz_d, &
1091 lx, coef%msh%nelv, neko_eps)
1092
1093 ! Here, we always copy back to host.
1094 m = size(coef%area)
1095 call device_memcpy(coef%area, coef%area_d, m, &
1096 device_to_host, sync = .false.)
1097 call device_memcpy(coef%nx, coef%nx_d, m, &
1098 device_to_host, sync = .false.)
1099 call device_memcpy(coef%ny, coef%ny_d, m, &
1100 device_to_host, sync = .false.)
1101 call device_memcpy(coef%nz, coef%nz_d, &
1102 m, device_to_host, sync = .true.)
1103
1104 else
1105
1106 allocate(a(coef%Xh%lx, coef%Xh%lx, coef%Xh%lx, coef%msh%nelv))
1107 allocate(b(coef%Xh%lx, coef%Xh%lx, coef%Xh%lx, coef%msh%nelv))
1108 allocate(c(coef%Xh%lx, coef%Xh%lx, coef%Xh%lx, coef%msh%nelv))
1109 allocate(dot(coef%Xh%lx, coef%Xh%lx, coef%Xh%lx, coef%msh%nelv))
1110
1111 !$omp parallel private (e, i, j, k, weight, len)
1112
1113 ! ds x dt
1114 !$omp do simd
1115 do i = 1, n
1116 a(i, 1, 1, 1) = coef%dyds(i, 1, 1, 1) * coef%dzdt(i, 1, 1, 1) &
1117 - coef%dzds(i, 1, 1, 1) * coef%dydt(i, 1, 1, 1)
1118
1119 b(i, 1, 1, 1) = coef%dzds(i, 1, 1, 1) * coef%dxdt(i, 1, 1, 1) &
1120 - coef%dxds(i, 1, 1, 1) * coef%dzdt(i, 1, 1, 1)
1121
1122 c(i, 1, 1, 1) = coef%dxds(i, 1, 1, 1) * coef%dydt(i, 1, 1, 1) &
1123 - coef%dyds(i, 1, 1, 1) * coef%dxdt(i, 1, 1, 1)
1124 end do
1125 !$omp end do simd
1126 !$omp do simd
1127 do i = 1, n
1128 dot(i, 1, 1, 1) = a(i, 1, 1, 1) * a(i, 1, 1, 1) &
1129 + b(i, 1, 1, 1) * b(i, 1, 1, 1) &
1130 + c(i, 1, 1, 1) * c(i, 1, 1, 1)
1131 end do
1132 !$omp end do simd
1133 !$omp do
1134 do e = 1, coef%msh%nelv
1135 do concurrent(k = 1:coef%Xh%lx)
1136 do concurrent(j = 1:coef%Xh%lx)
1137 weight = coef%Xh%wy(j) * coef%Xh%wz(k)
1138 coef%area(j, k, 2, e) = sqrt(dot(lx, j, k, e)) * weight
1139 coef%area(j, k, 1, e) = sqrt(dot(1, j, k, e)) * weight
1140 coef%nx(j,k, 1, e) = -a(1, j, k, e)
1141 coef%nx(j,k, 2, e) = a(lx, j, k, e)
1142 coef%ny(j,k, 1, e) = -b(1, j, k, e)
1143 coef%ny(j,k, 2, e) = b(lx, j, k, e)
1144 coef%nz(j,k, 1, e) = -c(1, j, k, e)
1145 coef%nz(j,k, 2, e) = c(lx, j, k, e)
1146 end do
1147 end do
1148 end do
1149 !$omp end do
1150
1151 ! dr x dt
1152 !$omp do simd
1153 do i = 1, n
1154 a(i, 1, 1, 1) = coef%dydr(i, 1, 1, 1) * coef%dzdt(i, 1, 1, 1) &
1155 - coef%dzdr(i, 1, 1, 1) * coef%dydt(i, 1, 1, 1)
1156
1157 b(i, 1, 1, 1) = coef%dzdr(i, 1, 1, 1) * coef%dxdt(i, 1, 1, 1) &
1158 - coef%dxdr(i, 1, 1, 1) * coef%dzdt(i, 1, 1, 1)
1159
1160 c(i, 1, 1, 1) = coef%dxdr(i, 1, 1, 1) * coef%dydt(i, 1, 1, 1) &
1161 - coef%dydr(i, 1, 1, 1) * coef%dxdt(i, 1, 1, 1)
1162 end do
1163 !$omp end do simd
1164 !$omp do simd
1165 do i = 1, n
1166 dot(i, 1, 1, 1) = a(i, 1, 1, 1) * a(i, 1, 1, 1) &
1167 + b(i, 1, 1, 1) * b(i, 1, 1, 1) &
1168 + c(i, 1, 1, 1) * c(i, 1, 1, 1)
1169 end do
1170 !$omp end do simd
1171 !$omp do
1172 do e = 1, coef%msh%nelv
1173 do concurrent(k = 1:coef%Xh%lx)
1174 do concurrent(j = 1:coef%Xh%lx)
1175 weight = coef%Xh%wx(j) * coef%Xh%wz(k)
1176 coef%area(j, k, 3, e) = sqrt(dot(j, 1, k, e)) * weight
1177 coef%area(j, k, 4, e) = sqrt(dot(j, lx, k, e)) * weight
1178 coef%nx(j,k, 3, e) = a(j, 1, k, e)
1179 coef%nx(j,k, 4, e) = -a(j, lx, k, e)
1180 coef%ny(j,k, 3, e) = b(j, 1, k, e)
1181 coef%ny(j,k, 4, e) = -b(j, lx, k, e)
1182 coef%nz(j,k, 3, e) = c(j, 1, k, e)
1183 coef%nz(j,k, 4, e) = -c(j, lx, k, e)
1184 end do
1185 end do
1186 end do
1187 !$omp end do
1188 ! dr x ds
1189 !$omp do simd
1190 do i = 1, n
1191 a(i, 1, 1, 1) = coef%dydr(i, 1, 1, 1) * coef%dzds(i, 1, 1, 1) &
1192 - coef%dzdr(i, 1, 1, 1) * coef%dyds(i, 1, 1, 1)
1193
1194 b(i, 1, 1, 1) = coef%dzdr(i, 1, 1, 1) * coef%dxds(i, 1, 1, 1) &
1195 - coef%dxdr(i, 1, 1, 1) * coef%dzds(i, 1, 1, 1)
1196
1197 c(i, 1, 1, 1) = coef%dxdr(i, 1, 1, 1) * coef%dyds(i, 1, 1, 1) &
1198 - coef%dydr(i, 1, 1, 1) * coef%dxds(i, 1, 1, 1)
1199 end do
1200 !$omp end do simd
1201 !$omp do simd
1202 do i = 1, n
1203 dot(i, 1, 1, 1) = a(i, 1, 1, 1) * a(i, 1, 1, 1) &
1204 + b(i, 1, 1, 1) * b(i, 1, 1, 1) &
1205 + c(i, 1, 1, 1) * c(i, 1, 1, 1)
1206 end do
1207 !$omp end do simd
1208 !$omp do
1209 do e = 1, coef%msh%nelv
1210 do concurrent(k = 1:coef%Xh%lx)
1211 do concurrent(j = 1:coef%Xh%lx)
1212 weight = coef%Xh%wx(j) * coef%Xh%wy(k)
1213 coef%area(j, k, 5, e) = sqrt(dot(j, k, 1, e)) * weight
1214 coef%area(j, k, 6, e) = sqrt(dot(j, k, lx, e)) * weight
1215 coef%nx(j,k, 5, e) = -a(j, k, 1, e)
1216 coef%nx(j,k, 6, e) = a(j, k, lx, e)
1217 coef%ny(j,k, 5, e) = -b(j, k, 1, e)
1218 coef%ny(j,k, 6, e) = b(j, k, lx, e)
1219 coef%nz(j,k, 5, e) = -c(j, k, 1, e)
1220 coef%nz(j,k, 6, e) = c(j, k, lx, e)
1221 end do
1222 end do
1223 end do
1224 !$omp end do
1225 ! Normalize
1226 !$omp do
1227 do j = 1, size(coef%nz)
1228 len = sqrt(coef%nx(j,1,1,1)**2 + &
1229 coef%ny(j,1,1,1)**2 + coef%nz(j,1,1,1)**2)
1230 if (len .gt. neko_eps) then
1231 coef%nx(j,1,1,1) = coef%nx(j,1,1,1) / len
1232 coef%ny(j,1,1,1) = coef%ny(j,1,1,1) / len
1233 coef%nz(j,1,1,1) = coef%nz(j,1,1,1) / len
1234 end if
1235 end do
1236 !$omp end do
1237 !$omp end parallel
1238
1239 deallocate(dot)
1240 deallocate(c)
1241 deallocate(b)
1242 deallocate(a)
1243
1244 end if
1245
1246 end subroutine coef_generate_area_and_normal
1247
1249 class(coef_t), intent(inout) :: this
1250 real(kind=rp) :: un(3), len, d
1251 integer :: lx, ly, lz, np, np_glb, ierr
1252 integer :: i, j, k, pf, pe, n, nc, ncyc
1253
1254 if (.not. this%cyclic) return
1255
1256 np = this%msh%periodic%size
1257 call mpi_allreduce(np, np_glb, 1, &
1258 mpi_integer, mpi_sum, neko_comm, ierr)
1259
1260 if (np_glb .eq. 0) then
1261 call neko_error("There are no periodic boundaries. " // &
1262 "Switch cyclic off in the case file.")
1263 end if
1264
1265 if (np .eq. 0) return
1266
1267 lx = this%Xh%lx
1268 ly = this%Xh%ly
1269 lz = this%Xh%lz
1270 ncyc = this%cyc_msk(0) - 1
1271 nc = 1
1272 do n = 1, np
1273 pf = this%msh%periodic%facet_el(n)%x(1)
1274 pe = this%msh%periodic%facet_el(n)%x(2)
1275 do k = 1, lz
1276 do j = 1, ly
1277 do i = 1, lx
1278 if (index_is_on_facet(i, j, k, lx, ly, lz, pf)) then
1279 un = this%get_normal(i, j, k, pe, pf)
1280 len = sqrt(un(1) * un(1) + un(2) * un(2))
1281 if (len .gt. neko_eps) then
1282 d = this%dof%y(i, j, k, pe) * un(1) &
1283 - this%dof%x(i, j, k, pe) * un(2)
1284
1285 this%cyc_msk(nc) = linear_index(i, j, k, pe, lx, ly, lz)
1286 this%R11(nc) = un(1) / len * sign(1.0_rp, d)
1287 this%R12(nc) = un(2) / len * sign(1.0_rp, d)
1288 nc = nc + 1
1289 else
1290 call neko_error("x and y components of surface " // &
1291 "normals are zero. Cyclic rotations must be " // &
1292 "around z-axis.")
1293 end if
1294 end if
1295 end do
1296 end do
1297 end do
1298 end do
1299
1300 if (nc - 1 /= ncyc) then
1301 call neko_error("The number of cyclic GLL points were " // &
1302 "not estimated correctly.")
1303 end if
1304
1305 if (neko_bcknd_device .eq. 1) then
1306 call device_memcpy(this%cyc_msk, this%cyc_msk_d, ncyc+1, &
1307 host_to_device, sync = .false.)
1308 call device_memcpy(this%R11, this%R11_d, ncyc, &
1309 host_to_device, sync = .false.)
1310 call device_memcpy(this%R12, this%R12_d, ncyc, &
1311 host_to_device, sync = .false.)
1312 end if
1313
1314 end subroutine coef_generate_cyclic_bc
1315
1316
1318 subroutine coef_recompute_metrics(this)
1319 class(coef_t), intent(inout) :: this
1320
1321 call coef_generate_dxyzdrst(this)
1322 call coef_generate_geo(this)
1324 call coef_generate_mass(this)
1325 if (this%cyclic) then
1326 call coef_generate_cyclic_bc(this)
1327 end if
1328 end subroutine coef_recompute_metrics
1329
1330
1334 class(coef_t), intent(inout), target :: this
1335 integer :: n
1336
1337 ! Return if already allocated distinctly
1338 if (.not. associated(this%Blag, this%B)) return
1339
1340 n = this%Xh%lx * this%Xh%ly * this%Xh%lz * this%msh%nelv
1341
1342
1343 nullify(this%Blag)
1344 nullify(this%Blaglag)
1345
1346 allocate(this%Blag(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
1347 allocate(this%Blaglag(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
1348
1349 this%Blag = this%B
1350 this%Blaglag = this%B
1351
1352 if (neko_bcknd_device .eq. 1) then
1353
1354 this%Blag_d = c_null_ptr
1355 this%Blaglag_d = c_null_ptr
1356
1357 call device_map(this%Blag, this%Blag_d, n)
1358 call device_map(this%Blaglag, this%Blaglag_d, n)
1359
1360 call device_memcpy(this%Blag, this%Blag_d, n, &
1361 host_to_device, sync = .false.)
1362 call device_memcpy(this%Blaglag, this%Blaglag_d, n, &
1363 host_to_device, sync = .true.)
1364 end if
1365
1366 end subroutine coef_enable_lagged_mass
1367
1368
1371 class(coef_t), intent(inout), target :: this
1372 integer :: n
1373 integer(c_size_t) :: n_bytes
1374
1375 ! If this%Blag does not have separate memory, we don't need to update it.
1376 if (associated(this%Blag, this%B)) return
1377
1378 this%Blaglag = this%Blag
1379 this%Blag = this%B
1380
1381 if (neko_bcknd_device .eq. 1) then
1382 n = this%Xh%lx * this%Xh%ly * this%Xh%lz * this%msh%nelv
1383 if (rp .eq. real32) then
1384 n_bytes = int(n, c_size_t) * 4_c_size_t
1385 else
1386 n_bytes = int(n, c_size_t) * 8_c_size_t
1387 end if
1388
1389 call device_memcpy(this%Blaglag_d, this%Blag_d, n_bytes, &
1390 device_to_device, sync = .false.)
1391
1392 call device_memcpy(this%Blag_d, this%B_d, n_bytes, &
1393 device_to_device, sync = .true.)
1394 end if
1395
1396 end subroutine coef_update_lagged_mass
1397
1398end module coefs
Map a Fortran array to a device (allocate and associate)
Definition device.F90:78
Copy data between host and device (or device and device)
Definition device.F90:72
Unmap a Fortran array from a device (deassociate and free)
Definition device.F90:84
Coefficients.
Definition coef.f90:34
subroutine coef_generate_geo(c)
Generate geometric data for the given mesh.
Definition coef.f90:858
subroutine coef_free(this)
Deallocate coefficients.
Definition coef.f90:432
subroutine coef_recompute_metrics(this)
Recompute and update geometric factors (ALE)
Definition coef.f90:1319
pure real(kind=rp) function coef_get_area(this, i, j, k, e, facet)
Definition coef.f90:1056
pure real(kind=rp) function, dimension(3) coef_get_normal(this, i, j, k, e, facet)
Definition coef.f90:1035
subroutine coef_update_lagged_mass(this)
Update history: Blaglag = Blag, Blag = B.
Definition coef.f90:1371
subroutine coef_generate_dxyzdrst(c)
Definition coef.f90:669
subroutine coef_generate_area_and_normal(coef)
Generate facet area and surface normals.
Definition coef.f90:1073
subroutine coef_init_empty(this, xh, msh)
Initialize empty coefs for a space and a mesh.
Definition coef.f90:187
subroutine coef_init_all(this, gs_h)
Initialize coefficients.
Definition coef.f90:233
subroutine coef_enable_lagged_mass(this)
Enable separate memory for lagged B matrices if needed. For eg. when mesh moves.
Definition coef.f90:1334
subroutine coef_generate_cyclic_bc(this)
Definition coef.f90:1249
subroutine coef_generate_mass(c)
Generate mass matrix B for the given mesh and space.
Definition coef.f90:989
Definition comm.F90:1
type(mpi_comm), public neko_comm
MPI communicator.
Definition comm.F90:45
subroutine, public device_coef_generate_area_and_normal(area_d, nx_d, ny_d, nz_d, dxdr_d, dydr_d, dzdr_d, dxds_d, dyds_d, dzds_d, dxdt_d, dydt_d, dzdt_d, wx_d, wy_d, wz_d, lx, nel, eps)
subroutine, public device_coef_generate_dxydrst(drdx_d, drdy_d, drdz_d, dsdx_d, dsdy_d, dsdz_d, dtdx_d, dtdy_d, dtdz_d, dxdr_d, dydr_d, dzdr_d, dxds_d, dyds_d, dzds_d, dxdt_d, dydt_d, dzdt_d, dx_d, dy_d, dz_d, x_d, y_d, z_d, jacinv_d, jac_d, lx, nel)
subroutine, public device_coef_generate_mass(b, binv, jac, w3, lxyz, nel)
subroutine, public device_coef_generate_geo(g11_d, g12_d, g13_d, g22_d, g23_d, g33_d, drdx_d, drdy_d, drdz_d, dsdx_d, dsdy_d, dsdz_d, dtdx_d, dtdy_d, dtdz_d, jacinv_d, w3_d, nel, lx, gdim)
real(kind=rp) function, public device_glsum(a_d, n, strm)
Sum a vector of length n.
subroutine, public device_invcol1(a_d, n, strm)
Invert a vector .
subroutine, public device_rone(a_d, n, strm)
Set all elements to one.
Device abstraction, common interface for various accelerators.
Definition device.F90:34
integer, parameter, public host_to_device
Definition device.F90:48
integer, parameter, public device_to_host
Definition device.F90:48
Defines a mapping of the degrees of freedom.
Definition dofmap.f90:35
Gather-scatter.
Defines Gather-scatter operations.
Definition gs_ops.f90:34
integer, parameter, public gs_op_add
Definition gs_ops.f90:36
Definition math.f90:60
subroutine, public invers2(a, b, n)
Compute inverted vector .
Definition math.f90:798
subroutine, public subcol3(a, b, c, n)
Returns .
Definition math.f90:1075
subroutine, public rone(a, n)
Set all elements to one.
Definition math.f90:275
real(kind=rp) function, public glsum(a, n)
Sum a vector of length n.
Definition math.f90:627
subroutine, public addcol3(a, b, c, n)
Returns .
Definition math.f90:1162
subroutine, public invcol1(a, n)
Invert a vector .
Definition math.f90:769
subroutine, public chsign(a, n)
Change sign of vector .
Definition math.f90:724
subroutine, public copy(a, b, n)
Copy a vector .
Definition math.f90:289
real(kind=rp), parameter, public neko_eps
Machine epsilon .
Definition math.f90:70
subroutine, public rzero(a, n)
Zero a real vector.
Definition math.f90:233
Defines a mesh.
Definition mesh.f90:34
Wrapper for all matrix-matrix product implementations.
subroutine, public mxm(a, n1, b, n2, c, n3)
Compute matrix-matrix product for contiguously packed matrices A,B, and C.
Build configurations.
integer, parameter neko_bcknd_device
integer, parameter neko_bcknd_opencl
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Defines a function space.
Definition space.f90:34
Utilities.
Definition utils.f90:35
pure logical function, public index_is_on_facet(i, j, k, lx, ly, lz, facet)
Definition utils.f90:314
pure integer function, public linear_index(i, j, k, l, lx, ly, lz)
Compute the address of a (i,j,k,l) array with sizes (1:lx, 1:ly, 1:lz, :)
Definition utils.f90:289
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition coef.f90:63
The function space for the SEM solution fields.
Definition space.f90:63