Neko 1.99.3
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
coef.f90
Go to the documentation of this file.
1! Copyright (c) 2020-2026, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
34module coefs
35 use gather_scatter, only : gs_t
36 use gs_ops, only : gs_op_add
38 use num_types, only : rp
39 use dofmap, only : dofmap_t
40 use space, only : space_t
41 use math, only : rone, invcol1, addcol3, subcol3, copy, &
43 use mesh, only : mesh_t
49 use mxm_wrapper, only : mxm
50 use device
53 use comm, only : neko_comm
55 use mpi_f08, only : mpi_allreduce, mpi_integer, mpi_sum
56 use, intrinsic :: iso_fortran_env
57 use, intrinsic :: iso_c_binding
58 implicit none
59 private
60
63 type, public :: coef_t
65 real(kind=rp), allocatable :: g11(:,:,:,:)
67 real(kind=rp), allocatable :: g22(:,:,:,:)
69 real(kind=rp), allocatable :: g33(:,:,:,:)
71 real(kind=rp), allocatable :: g12(:,:,:,:)
73 real(kind=rp), allocatable :: g13(:,:,:,:)
75 real(kind=rp), allocatable :: g23(:,:,:,:)
76
77 real(kind=rp), allocatable :: mult(:,:,:,:)
82 real(kind=rp), allocatable :: dxdr(:,:,:,:), dydr(:,:,:,:), dzdr(:,:,:,:)
83 real(kind=rp), allocatable :: dxds(:,:,:,:), dyds(:,:,:,:), dzds(:,:,:,:)
84 real(kind=rp), allocatable :: dxdt(:,:,:,:), dydt(:,:,:,:), dzdt(:,:,:,:)
88 real(kind=rp), allocatable :: drdx(:,:,:,:), drdy(:,:,:,:), drdz(:,:,:,:)
89 real(kind=rp), allocatable :: dsdx(:,:,:,:), dsdy(:,:,:,:), dsdz(:,:,:,:)
90 real(kind=rp), allocatable :: dtdx(:,:,:,:), dtdy(:,:,:,:), dtdz(:,:,:,:)
91
92 real(kind=rp), allocatable :: h1(:,:,:,:)
93 real(kind=rp), allocatable :: h2(:,:,:,:)
94 logical :: ifh2
95
96 real(kind=rp), allocatable :: jac(:,:,:,:)
97 real(kind=rp), allocatable :: jacinv(:,:,:,:)
98 real(kind=rp), allocatable :: b(:,:,:,:)
99 real(kind=rp), allocatable :: binv(:,:,:,:)
100 real(kind=rp), pointer :: blag(:,:,:,:) => null()
101 real(kind=rp), pointer :: blaglag(:,:,:,:) => null()
102 real(kind=rp), allocatable :: area(:,:,:,:)
103 real(kind=rp), allocatable :: nx(:,:,:,:)
104 real(kind=rp), allocatable :: ny(:,:,:,:)
105 real(kind=rp), allocatable :: nz(:,:,:,:)
106 logical :: cyclic = .false.
107 integer, allocatable :: cyc_msk(:)
108 real(kind=rp), allocatable :: r11(:)
109 real(kind=rp), allocatable :: r12(:)
110
111 !! True if geometric metrics have been initialized
112 logical, private :: coef_metrics_initialized = .false.
113
115
116 real(kind=rp) :: volume
117
118 type(space_t), pointer :: xh => null()
119 type(mesh_t), pointer :: msh => null()
120 type(dofmap_t), pointer :: dof => null()
121 type(gs_t), pointer :: gs_h=> null()
122
123 !
124 ! Device pointers (if present)
125 !
126
127 type(c_ptr) :: g11_d = c_null_ptr
128 type(c_ptr) :: g22_d = c_null_ptr
129 type(c_ptr) :: g33_d = c_null_ptr
130 type(c_ptr) :: g12_d = c_null_ptr
131 type(c_ptr) :: g13_d = c_null_ptr
132 type(c_ptr) :: g23_d = c_null_ptr
133 type(c_ptr) :: dxdr_d = c_null_ptr
134 type(c_ptr) :: dydr_d = c_null_ptr
135 type(c_ptr) :: dzdr_d = c_null_ptr
136 type(c_ptr) :: dxds_d = c_null_ptr
137 type(c_ptr) :: dyds_d = c_null_ptr
138 type(c_ptr) :: dzds_d = c_null_ptr
139 type(c_ptr) :: dxdt_d = c_null_ptr
140 type(c_ptr) :: dydt_d = c_null_ptr
141 type(c_ptr) :: dzdt_d = c_null_ptr
142 type(c_ptr) :: drdx_d = c_null_ptr
143 type(c_ptr) :: drdy_d = c_null_ptr
144 type(c_ptr) :: drdz_d = c_null_ptr
145 type(c_ptr) :: dsdx_d = c_null_ptr
146 type(c_ptr) :: dsdy_d = c_null_ptr
147 type(c_ptr) :: dsdz_d = c_null_ptr
148 type(c_ptr) :: dtdx_d = c_null_ptr
149 type(c_ptr) :: dtdy_d = c_null_ptr
150 type(c_ptr) :: dtdz_d = c_null_ptr
151 type(c_ptr) :: mult_d = c_null_ptr
152 type(c_ptr) :: h1_d = c_null_ptr
153 type(c_ptr) :: h2_d = c_null_ptr
154 type(c_ptr) :: jac_d = c_null_ptr
155 type(c_ptr) :: jacinv_d = c_null_ptr
156 type(c_ptr) :: b_d = c_null_ptr
157 type(c_ptr) :: blag_d = c_null_ptr
158 type(c_ptr) :: blaglag_d = c_null_ptr
159 type(c_ptr) :: binv_d = c_null_ptr
160 type(c_ptr) :: area_d = c_null_ptr
161 type(c_ptr) :: nx_d = c_null_ptr
162 type(c_ptr) :: ny_d = c_null_ptr
163 type(c_ptr) :: nz_d = c_null_ptr
164 type(c_ptr) :: cyc_msk_d = c_null_ptr
165 type(c_ptr) :: r11_d = c_null_ptr
166 type(c_ptr) :: r12_d = c_null_ptr
167
168
169
170 contains
171 procedure, private, pass(this) :: init_empty => coef_init_empty
172 procedure, private, pass(this) :: init_all => coef_init_all
173 procedure, pass(this) :: free => coef_free
174 procedure, pass(this) :: get_normal => coef_get_normal
175 procedure, pass(this) :: get_area => coef_get_area
176 procedure, pass(this) :: generate_cyclic_bc => coef_generate_cyclic_bc
177 procedure, pass(this) :: recompute_metrics => coef_recompute_metrics
178 procedure, pass(this) :: enable_b_history => coef_enable_lagged_mass
179 procedure, pass(this) :: update_b_history => coef_update_lagged_mass
180 generic :: init => init_empty, init_all
181 end type coef_t
182
183contains
184
186 subroutine coef_init_empty(this, Xh, msh)
187 class(coef_t), intent(inout) :: this
188 type(space_t), intent(inout), target :: Xh
189 type(mesh_t), intent(inout), target :: msh
190 integer :: n
191 call this%free()
192 this%msh => msh
193 this%Xh => xh
194
195 allocate(this%drdx(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
196 allocate(this%dsdx(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
197 allocate(this%dtdx(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
198
199 allocate(this%drdy(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
200 allocate(this%dsdy(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
201 allocate(this%dtdy(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
202
203 allocate(this%drdz(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
204 allocate(this%dsdz(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
205 allocate(this%dtdz(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
206
207
208 !
209 ! Setup device memory (if present)
210 !
211
212 n = this%Xh%lx * this%Xh%ly * this%Xh%lz * this%msh%nelv
213 if (neko_bcknd_device .eq. 1) then
214
215 call device_map(this%drdx, this%drdx_d, n)
216 call device_map(this%drdy, this%drdy_d, n)
217 call device_map(this%drdz, this%drdz_d, n)
218
219 call device_map(this%dsdx, this%dsdx_d, n)
220 call device_map(this%dsdy, this%dsdy_d, n)
221 call device_map(this%dsdz, this%dsdz_d, n)
222
223 call device_map(this%dtdx, this%dtdx_d, n)
224 call device_map(this%dtdy, this%dtdy_d, n)
225 call device_map(this%dtdz, this%dtdz_d, n)
226
227 end if
228
229 end subroutine coef_init_empty
230
232 subroutine coef_init_all(this, gs_h)
233 class(coef_t), intent(inout), target :: this
234 type(gs_t), intent(inout), target :: gs_h
235 integer :: n, m, ncyc
236 call this%free()
237
238 this%msh => gs_h%dofmap%msh
239 this%Xh => gs_h%dofmap%Xh
240 this%dof => gs_h%dofmap
241 this%gs_h => gs_h
242
243 !
244 ! Allocate arrays for geometric data
245 !
247 allocate(this%G11(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
248 allocate(this%G22(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
249 allocate(this%G33(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
250 allocate(this%G12(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
251 allocate(this%G13(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
252 allocate(this%G23(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
253
254 allocate(this%dxdr(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
255 allocate(this%dxds(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
256 allocate(this%dxdt(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
257
258 allocate(this%dydr(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
259 allocate(this%dyds(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
260 allocate(this%dydt(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
261
262 allocate(this%dzdr(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
263 allocate(this%dzds(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
264 allocate(this%dzdt(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
265
266 allocate(this%drdx(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
267 allocate(this%dsdx(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
268 allocate(this%dtdx(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
269
270 allocate(this%drdy(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
271 allocate(this%dsdy(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
272 allocate(this%dtdy(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
273
274 allocate(this%drdz(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
275 allocate(this%dsdz(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
276 allocate(this%dtdz(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
277
278 allocate(this%jac(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
279 allocate(this%jacinv(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
280
281 allocate(this%area(this%Xh%lx, this%Xh%ly, 6, this%msh%nelv))
282 allocate(this%nx(this%Xh%lx, this%Xh%ly, 6, this%msh%nelv))
283 allocate(this%ny(this%Xh%lx, this%Xh%ly, 6, this%msh%nelv))
284 allocate(this%nz(this%Xh%lx, this%Xh%ly, 6, this%msh%nelv))
285
286 allocate(this%B(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
287 allocate(this%Binv(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
288
289 ! We do this so in a static simulation we don't allocate extra memory
290 this%Blag => this%B
291 this%Blaglag => this%B
292
293 allocate(this%h1(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
294 allocate(this%h2(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
295
296 allocate(this%mult(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
297
298
299 !
300 ! Setup device memory (if present)
301 !
302
303 n = this%Xh%lx * this%Xh%ly * this%Xh%lz * this%msh%nelv
304 if (neko_bcknd_device .eq. 1) then
305 call device_map(this%G11, this%G11_d, n)
306 call device_map(this%G22, this%G22_d, n)
307 call device_map(this%G33, this%G33_d, n)
308 call device_map(this%G12, this%G12_d, n)
309 call device_map(this%G13, this%G13_d, n)
310 call device_map(this%G23, this%G23_d, n)
311
312 call device_map(this%dxdr, this%dxdr_d, n)
313 call device_map(this%dydr, this%dydr_d, n)
314 call device_map(this%dzdr, this%dzdr_d, n)
315
316 call device_map(this%dxds, this%dxds_d, n)
317 call device_map(this%dyds, this%dyds_d, n)
318 call device_map(this%dzds, this%dzds_d, n)
319
320 call device_map(this%dxdt, this%dxdt_d, n)
321 call device_map(this%dydt, this%dydt_d, n)
322 call device_map(this%dzdt, this%dzdt_d, n)
323
324 call device_map(this%drdx, this%drdx_d, n)
325 call device_map(this%drdy, this%drdy_d, n)
326 call device_map(this%drdz, this%drdz_d, n)
327
328 call device_map(this%dsdx, this%dsdx_d, n)
329 call device_map(this%dsdy, this%dsdy_d, n)
330 call device_map(this%dsdz, this%dsdz_d, n)
331
332 call device_map(this%dtdx, this%dtdx_d, n)
333 call device_map(this%dtdy, this%dtdy_d, n)
334 call device_map(this%dtdz, this%dtdz_d, n)
335
336 call device_map(this%mult, this%mult_d, n)
337 call device_map(this%h1, this%h1_d, n)
338 call device_map(this%h2, this%h2_d, n)
339
340 call device_map(this%jac, this%jac_d, n)
341 call device_map(this%jacinv, this%jacinv_d, n)
342 call device_map(this%B, this%B_d, n)
343 call device_map(this%Binv, this%Binv_d, n)
344
345 this%Blag_d = this%B_d
346 this%Blaglag_d = this%B_d
347
348 m = this%Xh%lx * this%Xh%ly * 6 * this%msh%nelv
349
350 call device_map(this%area, this%area_d, m)
351 call device_map(this%nx, this%nx_d, m)
352 call device_map(this%ny, this%ny_d, m)
353 call device_map(this%nz, this%nz_d, m)
354
355 end if
356
357 call coef_generate_dxyzdrst(this)
358
359 call coef_generate_geo(this)
360
362
363 call coef_generate_mass(this)
364
365 this%coef_metrics_initialized = .true.
366
367
368 ! This is a placeholder, just for now
369 ! We can probably find a prettier solution
370 if (neko_bcknd_device .eq. 1) then
371 call device_rone(this%h1_d, n)
372 call device_rone(this%h2_d, n)
373 call device_memcpy(this%h1, this%h1_d, n, &
374 device_to_host, sync = .false.)
375 call device_memcpy(this%h2, this%h2_d, n, &
376 device_to_host, sync = .false.)
377 else
378 call rone(this%h1,n)
379 call rone(this%h2,n)
380 end if
381
382 this%ifh2 = .false.
383
384 !
385 ! Set up multiplicity
386 !
387 if (neko_bcknd_device .eq. 1) then
388 call device_rone(this%mult_d, n)
389 else
390 call rone(this%mult, n)
391 end if
392
393 call gs_h%op(this%mult, n, gs_op_add)
394
395 if (neko_bcknd_device .eq. 1) then
396 call device_invcol1(this%mult_d, n)
397 call device_memcpy(this%mult, this%mult_d, n, &
398 device_to_host, sync = .true.)
399 else
400 call invcol1(this%mult, n)
401 end if
402
403 ncyc = this%msh%periodic%size * this%Xh%lx * this%Xh%lx
404 allocate(this%cyc_msk(0:ncyc))
405 this%cyc_msk(0) = ncyc + 1
406 if (ncyc .gt. 0) then
407 allocate(this%R11(ncyc))
408 allocate(this%R12(ncyc))
409
411 call rone(this%R11, ncyc)
412 call rzero(this%R12, ncyc)
413
414 if (neko_bcknd_device .eq. 1) then
415 call device_map(this%cyc_msk, this%cyc_msk_d, ncyc+1)
416 call device_map(this%R11, this%R11_d, ncyc)
417 call device_map(this%R12, this%R12_d, ncyc)
418
419 call device_memcpy(this%cyc_msk, this%cyc_msk_d, ncyc+1, &
420 host_to_device, sync = .false.)
421 call device_memcpy(this%R11, this%R11_d, ncyc, &
422 host_to_device, sync = .false.)
423 call device_memcpy(this%R12, this%R12_d, ncyc, &
424 host_to_device, sync = .false.)
425 end if
426
427 end if
428 end subroutine coef_init_all
429
431 subroutine coef_free(this)
432 class(coef_t), intent(inout), target :: this
433
434 if (allocated(this%G11)) then
435 deallocate(this%G11)
436 end if
437
438 if (allocated(this%G22)) then
439 deallocate(this%G22)
440 end if
441
442 if (allocated(this%G33)) then
443 deallocate(this%G33)
444 end if
445
446 if (allocated(this%G12)) then
447 deallocate(this%G12)
448 end if
449
450 if (allocated(this%G13)) then
451 deallocate(this%G13)
452 end if
453
454 if (allocated(this%G23)) then
455 deallocate(this%G23)
456 end if
457
458 if (allocated(this%mult)) then
459 deallocate(this%mult)
460 end if
461
462 if (associated(this%Blag) .and. &
463 .not. associated(this%Blag, this%B)) then
464 deallocate(this%Blag)
465 end if
466 nullify(this%Blag)
467
468 if (associated(this%Blaglag) .and. &
469 .not. associated(this%Blaglag, this%B)) then
470 deallocate(this%Blaglag)
471 end if
472 nullify(this%Blaglag)
473
474 if (allocated(this%B)) then
475 deallocate(this%B)
476 end if
477
478 if (allocated(this%Binv)) then
479 deallocate(this%Binv)
480 end if
481
482 if (allocated(this%dxdr)) then
483 deallocate(this%dxdr)
484 end if
485
486 if (allocated(this%dxds)) then
487 deallocate(this%dxds)
488 end if
489
490 if (allocated(this%dxdt)) then
491 deallocate(this%dxdt)
492 end if
493
494 if (allocated(this%dydr)) then
495 deallocate(this%dydr)
496 end if
497
498 if (allocated(this%dyds)) then
499 deallocate(this%dyds)
500 end if
501
502 if (allocated(this%dydt)) then
503 deallocate(this%dydt)
504 end if
505
506 if (allocated(this%dzdr)) then
507 deallocate(this%dzdr)
508 end if
509
510 if (allocated(this%dzds)) then
511 deallocate(this%dzds)
512 end if
513
514 if (allocated(this%dzdt)) then
515 deallocate(this%dzdt)
516 end if
517
518 if (allocated(this%drdx)) then
519 deallocate(this%drdx)
520 end if
521
522 if (allocated(this%dsdx)) then
523 deallocate(this%dsdx)
524 end if
525
526 if (allocated(this%dtdx)) then
527 deallocate(this%dtdx)
528 end if
529
530 if (allocated(this%drdy)) then
531 deallocate(this%drdy)
532 end if
533
534 if (allocated(this%dsdy)) then
535 deallocate(this%dsdy)
536 end if
537
538 if (allocated(this%dtdy)) then
539 deallocate(this%dtdy)
540 end if
541
542 if (allocated(this%drdz)) then
543 deallocate(this%drdz)
544 end if
545
546 if (allocated(this%dsdz)) then
547 deallocate(this%dsdz)
548 end if
549
550 if (allocated(this%dtdz)) then
551 deallocate(this%dtdz)
552 end if
553
554 if (allocated(this%jac)) then
555 deallocate(this%jac)
556 end if
557
558 if (allocated(this%jacinv)) then
559 deallocate(this%jacinv)
560 end if
561
562 if (allocated(this%h1)) then
563 deallocate(this%h1)
564 end if
565
566 if (allocated(this%h2)) then
567 deallocate(this%h2)
568 end if
569
570 if (allocated(this%area)) then
571 deallocate(this%area)
572 end if
573
574 if (allocated(this%nx)) then
575 deallocate(this%nx)
576 end if
577
578 if (allocated(this%ny)) then
579 deallocate(this%ny)
580 end if
581
582 if (allocated(this%nz)) then
583 deallocate(this%nz)
584 end if
585
586 if (allocated(this%cyc_msk)) then
587 deallocate(this%cyc_msk)
588 end if
589
590 if (allocated(this%R11)) then
591 deallocate(this%R11)
592 end if
593
594 if (allocated(this%R12)) then
595 deallocate(this%R12)
596 end if
597
598
599 nullify(this%msh)
600 nullify(this%Xh)
601 nullify(this%dof)
602 nullify(this%gs_h)
603
604 !
605 ! Cleanup the device (if present)
606 !
607
608 if (c_associated(this%G11_d)) then
609 call device_free(this%G11_d)
610 end if
611
612 if (c_associated(this%G22_d)) then
613 call device_free(this%G22_d)
614 end if
615
616 if (c_associated(this%G33_d)) then
617 call device_free(this%G33_d)
618 end if
619
620 if (c_associated(this%G12_d)) then
621 call device_free(this%G12_d)
622 end if
623
624 if (c_associated(this%G13_d)) then
625 call device_free(this%G13_d)
626 end if
627
628 if (c_associated(this%G23_d)) then
629 call device_free(this%G23_d)
630 end if
631
632 if (c_associated(this%dxdr_d)) then
633 call device_free(this%dxdr_d)
634 end if
635
636 if (c_associated(this%dydr_d)) then
637 call device_free(this%dydr_d)
638 end if
639
640 if (c_associated(this%dzdr_d)) then
641 call device_free(this%dzdr_d)
642 end if
643
644 if (c_associated(this%dxds_d)) then
645 call device_free(this%dxds_d)
646 end if
647
648 if (c_associated(this%dyds_d)) then
649 call device_free(this%dyds_d)
650 end if
651
652 if (c_associated(this%dzds_d)) then
653 call device_free(this%dzds_d)
654 end if
655
656 if (c_associated(this%dxdt_d)) then
657 call device_free(this%dxdt_d)
658 end if
659
660 if (c_associated(this%dydt_d)) then
661 call device_free(this%dydt_d)
662 end if
663
664 if (c_associated(this%dzdt_d)) then
665 call device_free(this%dzdt_d)
666 end if
667
668 if (c_associated(this%drdx_d)) then
669 call device_free(this%drdx_d)
670 end if
671
672 if (c_associated(this%drdy_d)) then
673 call device_free(this%drdy_d)
674 end if
675
676 if (c_associated(this%drdz_d)) then
677 call device_free(this%drdz_d)
678 end if
679
680 if (c_associated(this%dsdx_d)) then
681 call device_free(this%dsdx_d)
682 end if
683
684 if (c_associated(this%dsdy_d)) then
685 call device_free(this%dsdy_d)
686 end if
687
688 if (c_associated(this%dsdz_d)) then
689 call device_free(this%dsdz_d)
690 end if
691
692 if (c_associated(this%dtdx_d)) then
693 call device_free(this%dtdx_d)
694 end if
695
696 if (c_associated(this%dtdy_d)) then
697 call device_free(this%dtdy_d)
698 end if
699
700 if (c_associated(this%dtdz_d)) then
701 call device_free(this%dtdz_d)
702 end if
703
704 if (c_associated(this%mult_d)) then
705 call device_free(this%mult_d)
706 end if
707
708 if (c_associated(this%h1_d)) then
709 call device_free(this%h1_d)
710 end if
711
712 if (c_associated(this%h2_d)) then
713 call device_free(this%h2_d)
714 end if
715
716 if (c_associated(this%jac_d)) then
717 call device_free(this%jac_d)
718 end if
719
720 if (c_associated(this%jacinv_d)) then
721 call device_free(this%jacinv_d)
722 end if
723
724 if (c_associated(this%Blag_d) .and. &
725 .not. c_associated(this%Blag_d, this%B_d)) then
726 call device_free(this%Blag_d)
727 end if
728 this%Blag_d = c_null_ptr
729
730 if (c_associated(this%Blaglag_d) .and. &
731 .not. c_associated(this%Blaglag_d, this%B_d)) then
732 call device_free(this%Blaglag_d)
733 end if
734 this%Blaglag_d = c_null_ptr
735
736 if (c_associated(this%B_d)) then
737 call device_free(this%B_d)
738 end if
739
740 if (c_associated(this%Binv_d)) then
741 call device_free(this%Binv_d)
742 end if
743
744 if (c_associated(this%area_d)) then
745 call device_free(this%area_d)
746 end if
747
748 if (c_associated(this%nx_d)) then
749 call device_free(this%nx_d)
750 end if
751
752 if (c_associated(this%ny_d)) then
753 call device_free(this%ny_d)
754 end if
755
756 if (c_associated(this%nz_d)) then
757 call device_free(this%nz_d)
758 end if
759
760 if (c_associated(this%cyc_msk_d)) then
761 call device_free(this%cyc_msk_d)
762 end if
763
764 if (c_associated(this%R11_d)) then
765 call device_free(this%R11_d)
766 end if
767
768 if (c_associated(this%R12_d)) then
769 call device_free(this%R12_d)
770 end if
771
772
773 end subroutine coef_free
774
776 type(coef_t), intent(inout) :: c
777 integer :: e, i, lxy, lyz, ntot
778
779 lxy = c%Xh%lx*c%Xh%ly
780 lyz = c%Xh%ly*c%Xh%lz
781 ntot = c%dof%size()
782
783 associate(drdx => c%drdx, drdy => c%drdy, drdz => c%drdz, &
784 dsdx => c%dsdx, dsdy => c%dsdy, dsdz => c%dsdz, &
785 dtdx => c%dtdx, dtdy => c%dtdy, dtdz => c%dtdz, &
786 dxdr => c%dxdr, dydr => c%dydr, dzdr => c%dzdr, &
787 dxds => c%dxds, dyds => c%dyds, dzds => c%dzds, &
788 dxdt => c%dxdt, dydt => c%dydt, dzdt => c%dzdt, &
789 dx => c%Xh%dx, dy => c%Xh%dy, dz => c%Xh%dz, &
790 x => c%dof%x, y => c%dof%y, z => c%dof%z, &
791 lx => c%Xh%lx, ly => c%Xh%ly, lz => c%Xh%lz, &
792 dyt => c%Xh%dyt, dzt => c%Xh%dzt, &
793 jacinv => c%jacinv, jac => c%jac)
794
795 if (neko_bcknd_device .eq. 1) then
796
797 call device_coef_generate_dxydrst(c%drdx_d, c%drdy_d, c%drdz_d, &
798 c%dsdx_d, c%dsdy_d, c%dsdz_d, c%dtdx_d, c%dtdy_d, c%dtdz_d, &
799 c%dxdr_d, c%dydr_d, c%dzdr_d, c%dxds_d, c%dyds_d, c%dzds_d, &
800 c%dxdt_d, c%dydt_d, c%dzdt_d, c%Xh%dx_d, c%Xh%dy_d, c%Xh%dz_d, &
801 c%dof%x_d, c%dof%y_d, c%dof%z_d, c%jacinv_d, c%jac_d, &
802 c%Xh%lx, c%msh%nelv)
803
804 ! copy to host only at initialization.
805 if (.not. c%coef_metrics_initialized) then
806 call device_memcpy(dxdr, c%dxdr_d, ntot, device_to_host, &
807 sync = .false.)
808 call device_memcpy(dydr, c%dydr_d, ntot, device_to_host, &
809 sync = .false.)
810 call device_memcpy(dzdr, c%dzdr_d, ntot, device_to_host, &
811 sync = .false.)
812 call device_memcpy(dxds, c%dxds_d, ntot, device_to_host, &
813 sync = .false.)
814 call device_memcpy(dyds, c%dyds_d, ntot, device_to_host, &
815 sync = .false.)
816 call device_memcpy(dzds, c%dzds_d, ntot, device_to_host, &
817 sync = .false.)
818 call device_memcpy(dxdt, c%dxdt_d, ntot, device_to_host, &
819 sync = .false.)
820 call device_memcpy(dydt, c%dydt_d, ntot, device_to_host, &
821 sync = .false.)
822 call device_memcpy(dzdt, c%dzdt_d, ntot, device_to_host, &
823 sync = .false.)
824 call device_memcpy(drdx, c%drdx_d, ntot, device_to_host, &
825 sync = .false.)
826 call device_memcpy(drdy, c%drdy_d, ntot, device_to_host, &
827 sync = .false.)
828 call device_memcpy(drdz, c%drdz_d, ntot, device_to_host, &
829 sync = .false.)
830 call device_memcpy(dsdx, c%dsdx_d, ntot, device_to_host, &
831 sync = .false.)
832 call device_memcpy(dsdy, c%dsdy_d, ntot, device_to_host, &
833 sync = .false.)
834 call device_memcpy(dsdz, c%dsdz_d, ntot, device_to_host, &
835 sync = .false.)
836 call device_memcpy(dtdx, c%dtdx_d, ntot, device_to_host, &
837 sync = .false.)
838 call device_memcpy(dtdy, c%dtdy_d, ntot, device_to_host, &
839 sync = .false.)
840 call device_memcpy(dtdz, c%dtdz_d, ntot, device_to_host, &
841 sync = .false.)
842 call device_memcpy(jac, c%jac_d, ntot, device_to_host, &
843 sync = .false.)
844 call device_memcpy(jacinv, c%jacinv_d, ntot, device_to_host, &
845 sync = .true.)
846 end if
847
848 else
849 !$omp parallel do private(i)
850 do e = 1, c%msh%nelv
851 call mxm(dx, lx, x(1,1,1,e), lx, dxdr(1,1,1,e), lyz)
852 call mxm(dx, lx, y(1,1,1,e), lx, dydr(1,1,1,e), lyz)
853 call mxm(dx, lx, z(1,1,1,e), lx, dzdr(1,1,1,e), lyz)
854
855 do i = 1, lz
856 call mxm(x(1,1,i,e), lx, dyt, ly, dxds(1,1,i,e), ly)
857 call mxm(y(1,1,i,e), lx, dyt, ly, dyds(1,1,i,e), ly)
858 call mxm(z(1,1,i,e), lx, dyt, ly, dzds(1,1,i,e), ly)
859 end do
860
861 ! We actually take 2d into account, wow, need to do that for the rest.
862 if (c%msh%gdim .eq. 3) then
863 call mxm(x(1,1,1,e), lxy, dzt, lz, dxdt(1,1,1,e), lz)
864 call mxm(y(1,1,1,e), lxy, dzt, lz, dydt(1,1,1,e), lz)
865 call mxm(z(1,1,1,e), lxy, dzt, lz, dzdt(1,1,1,e), lz)
866 else
867 call rzero(dxdt(1,1,1,e), lxy)
868 call rzero(dydt(1,1,1,e), lxy)
869 call rone(dzdt(1,1,1,e), lxy)
870 end if
871 end do
872 !$omp end parallel do
873
874 if (c%msh%gdim .eq. 2) then
875 call rzero (jac, ntot)
876 call addcol3 (jac, dxdr, dyds, ntot)
877 call subcol3 (jac, dxds, dydr, ntot)
878 call copy (drdx, dyds, ntot)
879 call copy (drdy, dxds, ntot)
880 call chsign (drdy, ntot)
881 call copy (dsdx, dydr, ntot)
882 call chsign (dsdx, ntot)
883 call copy (dsdy, dxdr, ntot)
884 call rzero (drdz, ntot)
885 call rzero (dsdz, ntot)
886 call rone (dtdz, ntot)
887 else
888 !$omp parallel private(i)
889 !$omp do simd
890 do i = 1, ntot
891 c%jac(i, 1, 1, 1) = 0.0_rp
892 end do
893 !$omp end do simd
894 !$omp do simd
895 do i = 1, ntot
896 c%jac(i, 1, 1, 1) = c%jac(i, 1, 1, 1) + ( c%dxdr(i, 1, 1, 1) &
897 * c%dyds(i, 1, 1, 1) * c%dzdt(i, 1, 1, 1) )
898
899 c%jac(i, 1, 1, 1) = c%jac(i, 1, 1, 1) + ( c%dxdt(i, 1, 1, 1) &
900 * c%dydr(i, 1, 1, 1) * c%dzds(i, 1, 1, 1) )
901
902 c%jac(i, 1, 1, 1) = c%jac(i, 1, 1, 1) + ( c%dxds(i, 1, 1, 1) &
903 * c%dydt(i, 1, 1, 1) * c%dzdr(i, 1, 1, 1) )
904 end do
905 !$omp end do simd
906 !$omp do simd
907 do i = 1, ntot
908 c%jac(i, 1, 1, 1) = c%jac(i, 1, 1, 1) - ( c%dxdr(i, 1, 1, 1) &
909 * c%dydt(i, 1, 1, 1) * c%dzds(i, 1, 1, 1) )
910
911 c%jac(i, 1, 1, 1) = c%jac(i, 1, 1, 1) - ( c%dxds(i, 1, 1, 1) &
912 * c%dydr(i, 1, 1, 1) * c%dzdt(i, 1, 1, 1) )
913
914 c%jac(i, 1, 1, 1) = c%jac(i, 1, 1, 1) - ( c%dxdt(i, 1, 1, 1) &
915 * c%dyds(i, 1, 1, 1) * c%dzdr(i, 1, 1, 1) )
916 end do
917 !$omp end do simd
918 !$omp do simd
919 do i = 1, ntot
920 c%drdx(i, 1, 1, 1) = c%dyds(i, 1, 1, 1) * c%dzdt(i, 1, 1, 1) &
921 - c%dydt(i, 1, 1, 1) * c%dzds(i, 1, 1, 1)
922
923 c%drdy(i, 1, 1, 1) = c%dxdt(i, 1, 1, 1) * c%dzds(i, 1, 1, 1) &
924 - c%dxds(i, 1, 1, 1) * c%dzdt(i, 1, 1, 1)
925
926 c%drdz(i, 1, 1, 1) = c%dxds(i, 1, 1, 1) * c%dydt(i, 1, 1, 1) &
927 - c%dxdt(i, 1, 1, 1) * c%dyds(i, 1, 1, 1)
928 end do
929 !$omp end do simd
930 !$omp do simd
931 do i = 1, ntot
932 c%dsdx(i, 1, 1, 1) = c%dydt(i, 1, 1, 1) * c%dzdr(i, 1, 1, 1) &
933 - c%dydr(i, 1, 1, 1) * c%dzdt(i, 1, 1, 1)
934
935 c%dsdy(i, 1, 1, 1) = c%dxdr(i, 1, 1, 1) * c%dzdt(i, 1, 1, 1) &
936 - c%dxdt(i, 1, 1, 1) * c%dzdr(i, 1, 1, 1)
937
938 c%dsdz(i, 1, 1, 1) = c%dxdt(i, 1, 1, 1) * c%dydr(i, 1, 1, 1) &
939 - c%dxdr(i, 1, 1, 1) * c%dydt(i, 1, 1, 1)
940 end do
941 !$omp end do simd
942 !$omp do simd
943 do i = 1, ntot
944 c%dtdx(i, 1, 1, 1) = c%dydr(i, 1, 1, 1) * c%dzds(i, 1, 1, 1) &
945 - c%dyds(i, 1, 1, 1) * c%dzdr(i, 1, 1, 1)
946
947 c%dtdy(i, 1, 1, 1) = c%dxds(i, 1, 1, 1) * c%dzdr(i, 1, 1, 1) &
948 - c%dxdr(i, 1, 1, 1) * c%dzds(i, 1, 1, 1)
949
950 c%dtdz(i, 1, 1, 1) = c%dxdr(i, 1, 1, 1) * c%dyds(i, 1, 1, 1) &
951 - c%dxds(i, 1, 1, 1) * c%dydr(i, 1, 1, 1)
952 end do
953 !$omp end do simd
954 !$omp end parallel
955 end if
956 call invers2(jacinv, jac, ntot)
957 end if
958 end associate
959
960 end subroutine coef_generate_dxyzdrst
961
964 subroutine coef_generate_geo(c)
965 type(coef_t), intent(inout) :: c
966 integer :: e, i, lxyz, ntot
967
968 lxyz = c%Xh%lx * c%Xh%ly * c%Xh%lz
969 ntot = c%dof%size()
970
971 if (neko_bcknd_device .eq. 1) then
972
973 call device_coef_generate_geo(c%G11_d, c%G12_d, c%G13_d, &
974 c%G22_d, c%G23_d, c%G33_d, &
975 c%drdx_d, c%drdy_d, c%drdz_d, &
976 c%dsdx_d, c%dsdy_d, c%dsdz_d, &
977 c%dtdx_d, c%dtdy_d, c%dtdz_d, &
978 c%jacinv_d, c%Xh%w3_d, c%msh%nelv, &
979 c%Xh%lx, c%msh%gdim)
980
981 ! copy to host only at initialization.
982 if (.not. c%coef_metrics_initialized) then
983 call device_memcpy(c%G11, c%G11_d, ntot, device_to_host, &
984 sync = .false.)
985 call device_memcpy(c%G22, c%G22_d, ntot, device_to_host, &
986 sync = .false.)
987 call device_memcpy(c%G33, c%G33_d, ntot, device_to_host, &
988 sync = .false.)
989 call device_memcpy(c%G12, c%G12_d, ntot, device_to_host, &
990 sync = .false.)
991 call device_memcpy(c%G13, c%G13_d, ntot, device_to_host, &
992 sync = .false.)
993 call device_memcpy(c%G23, c%G23_d, ntot, device_to_host, &
994 sync = .true.)
995 end if
996
997 else
998 if (c%msh%gdim .eq. 2) then
999
1000 do i = 1, ntot
1001 c%G11(i, 1, 1, 1) = c%drdx(i, 1, 1, 1) * c%drdx(i, 1, 1, 1) &
1002 + c%drdy(i, 1, 1, 1) * c%drdy(i, 1, 1, 1)
1003
1004 c%G22(i, 1, 1, 1) = c%dsdx(i, 1, 1, 1) * c%dsdx(i, 1, 1, 1) &
1005 + c%dsdy(i, 1, 1, 1) * c%dsdy(i, 1, 1, 1)
1006
1007 c%G12(i, 1, 1, 1) = c%drdx(i, 1, 1, 1) * c%dsdx(i, 1, 1, 1) &
1008 + c%drdy(i, 1, 1, 1) * c%dsdy(i, 1, 1, 1)
1009 end do
1010
1011 do i = 1, ntot
1012 c%G11(i, 1, 1, 1) = c%G11(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
1013 c%G22(i, 1, 1, 1) = c%G22(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
1014 c%G12(i, 1, 1, 1) = c%G12(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
1015 c%G33(i, 1, 1, 1) = 0.0_rp
1016 c%G13(i, 1, 1, 1) = 0.0_rp
1017 c%G23(i, 1, 1, 1) = 0.0_rp
1018 end do
1019
1020 do concurrent(e = 1:c%msh%nelv)
1021 do concurrent(i = 1:lxyz)
1022 c%G11(i,1,1,e) = c%G11(i,1,1,e) * c%Xh%w3(i,1,1)
1023 c%G22(i,1,1,e) = c%G22(i,1,1,e) * c%Xh%w3(i,1,1)
1024 c%G12(i,1,1,e) = c%G12(i,1,1,e) * c%Xh%w3(i,1,1)
1025 end do
1026 end do
1027
1028 else
1029 !$omp parallel private(i)
1030 !$omp do
1031 do i = 1, ntot
1032 c%G11(i, 1, 1, 1) = c%drdx(i, 1, 1, 1) * c%drdx(i, 1, 1, 1) &
1033 + c%drdy(i, 1, 1, 1) * c%drdy(i, 1, 1, 1) &
1034 + c%drdz(i, 1, 1, 1) * c%drdz(i, 1, 1, 1)
1035
1036 c%G22(i, 1, 1, 1) = c%dsdx(i, 1, 1, 1) * c%dsdx(i, 1, 1, 1) &
1037 + c%dsdy(i, 1, 1, 1) * c%dsdy(i, 1, 1, 1) &
1038 + c%dsdz(i, 1, 1, 1) * c%dsdz(i, 1, 1, 1)
1039
1040 c%G33(i, 1, 1, 1) = c%dtdx(i, 1, 1, 1) * c%dtdx(i, 1, 1, 1) &
1041 + c%dtdy(i, 1, 1, 1) * c%dtdy(i, 1, 1, 1) &
1042 + c%dtdz(i, 1, 1, 1) * c%dtdz(i, 1, 1, 1)
1043 end do
1044 !$omp end do
1045 !$omp do
1046 do i = 1, ntot
1047 c%G11(i, 1, 1, 1) = c%G11(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
1048 c%G22(i, 1, 1, 1) = c%G22(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
1049 c%G33(i, 1, 1, 1) = c%G33(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
1050 end do
1051 !$omp end do
1052 !$omp do
1053 do i = 1, ntot
1054 c%G12(i, 1, 1, 1) = c%drdx(i, 1, 1, 1) * c%dsdx(i, 1, 1, 1) &
1055 + c%drdy(i, 1, 1, 1) * c%dsdy(i, 1, 1, 1) &
1056 + c%drdz(i, 1, 1, 1) * c%dsdz(i, 1, 1, 1)
1057
1058 c%G13(i, 1, 1, 1) = c%drdx(i, 1, 1, 1) * c%dtdx(i, 1, 1, 1) &
1059 + c%drdy(i, 1, 1, 1) * c%dtdy(i, 1, 1, 1) &
1060 + c%drdz(i, 1, 1, 1) * c%dtdz(i, 1, 1, 1)
1061
1062 c%G23(i, 1, 1, 1) = c%dsdx(i, 1, 1, 1) * c%dtdx(i, 1, 1, 1) &
1063 + c%dsdy(i, 1, 1, 1) * c%dtdy(i, 1, 1, 1) &
1064 + c%dsdz(i, 1, 1, 1) * c%dtdz(i, 1, 1, 1)
1065 end do
1066 !$omp end do
1067 !$omp do
1068 do i = 1, ntot
1069 c%G12(i, 1, 1, 1) = c%G12(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
1070 c%G13(i, 1, 1, 1) = c%G13(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
1071 c%G23(i, 1, 1, 1) = c%G23(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
1072 end do
1073 !$omp end do
1074 !$omp do
1075 do e = 1, c%msh%nelv
1076 do concurrent(i = 1:lxyz)
1077 c%G11(i,1,1,e) = c%G11(i,1,1,e) * c%Xh%w3(i,1,1)
1078 c%G22(i,1,1,e) = c%G22(i,1,1,e) * c%Xh%w3(i,1,1)
1079 c%G12(i,1,1,e) = c%G12(i,1,1,e) * c%Xh%w3(i,1,1)
1080
1081 c%G33(i,1,1,e) = c%G33(i,1,1,e) * c%Xh%w3(i,1,1)
1082 c%G13(i,1,1,e) = c%G13(i,1,1,e) * c%Xh%w3(i,1,1)
1083 c%G23(i,1,1,e) = c%G23(i,1,1,e) * c%Xh%w3(i,1,1)
1084 end do
1085 end do
1086 !$omp end do
1087 !$omp end parallel
1088 end if
1089 end if
1090
1091 end subroutine coef_generate_geo
1092
1096 type(coef_t), intent(inout) :: c
1097 integer :: e, i, lxyz, ntot
1098
1099 lxyz = c%Xh%lx * c%Xh%ly * c%Xh%lz
1100 ntot = c%dof%size()
1101
1102 if (neko_bcknd_device .eq. 1) then
1103 call device_coef_generate_mass(c%B_d, c%Binv_d, c%jac_d, c%Xh%w3_d, &
1104 lxyz, c%msh%nelv)
1105 ! copy to host only at initialization.
1106 if (.not. c%coef_metrics_initialized) then
1107 call device_memcpy(c%B, c%B_d, ntot, device_to_host, sync = .false.)
1108 end if
1109 else
1110 do concurrent(e = 1:c%msh%nelv)
1111 ! Here we need to handle things differently for axis symmetric elements
1112 do concurrent(i = 1:lxyz)
1113 c%B(i,1,1,e) = c%jac(i,1,1,e) * c%Xh%w3(i,1,1)
1114 c%Binv(i,1,1,e) = c%B(i,1,1,e)
1115 end do
1116 end do
1117 end if
1118
1119 call c%gs_h%op(c%Binv, ntot, gs_op_add)
1120
1121 if (neko_bcknd_device .eq. 1) then
1122 call device_invcol1(c%Binv_d, ntot)
1123 ! copy to host only at initialization.
1124 if (.not. c%coef_metrics_initialized) then
1125 call device_memcpy(c%Binv, c%Binv_d, ntot, &
1126 device_to_host, sync = .true.)
1127 end if
1128 else
1129 call invcol1(c%Binv, ntot)
1130 end if
1131
1133 if (neko_bcknd_device .eq. 1) then
1134 c%volume = device_glsum(c%B_d, ntot)
1135 else
1136 c%volume = glsum(c%B, ntot)
1137 end if
1138
1139 end subroutine coef_generate_mass
1140
1141 pure function coef_get_normal(this, i, j, k, e, facet) result(normal)
1142 class(coef_t), intent(in) :: this
1143 integer, intent(in) :: i, j, k, e, facet
1144 real(kind=rp) :: normal(3)
1145
1146 select case (facet)
1147 case(1,2)
1148 normal(1) = this%nx(j, k, facet, e)
1149 normal(2) = this%ny(j, k, facet, e)
1150 normal(3) = this%nz(j, k, facet, e)
1151 case(3,4)
1152 normal(1) = this%nx(i, k, facet, e)
1153 normal(2) = this%ny(i, k, facet, e)
1154 normal(3) = this%nz(i, k, facet, e)
1155 case(5,6)
1156 normal(1) = this%nx(i, j, facet, e)
1157 normal(2) = this%ny(i, j, facet, e)
1158 normal(3) = this%nz(i, j, facet, e)
1159 end select
1160 end function coef_get_normal
1161
1162 pure function coef_get_area(this, i, j, k, e, facet) result(area)
1163 class(coef_t), intent(in) :: this
1164 integer, intent(in) :: i, j, k, e, facet
1165 real(kind=rp) :: area
1166
1167 select case (facet)
1168 case(1,2)
1169 area = this%area(j, k, facet, e)
1170 case(3,4)
1171 area = this%area(i, k, facet, e)
1172 case(5,6)
1173 area = this%area(i, j, facet, e)
1174 end select
1175 end function coef_get_area
1176
1177
1180 type(coef_t), intent(inout) :: coef
1181 real(kind=rp), allocatable :: a(:,:,:,:)
1182 real(kind=rp), allocatable :: b(:,:,:,:)
1183 real(kind=rp), allocatable :: c(:,:,:,:)
1184 real(kind=rp), allocatable :: dot(:,:,:,:)
1185 integer :: n, m, e, i, j, k, lx
1186 real(kind=rp) :: weight, len
1187 n = coef%dof%size()
1188 lx = coef%Xh%lx
1189
1190 if (neko_bcknd_device .eq. 1) then
1191
1192 call device_coef_generate_area_and_normal( &
1193 coef%area_d, coef%nx_d, coef%ny_d, coef%nz_d, &
1194 coef%dxdr_d, coef%dydr_d, coef%dzdr_d, &
1195 coef%dxds_d, coef%dyds_d, coef%dzds_d, &
1196 coef%dxdt_d, coef%dydt_d, coef%dzdt_d, &
1197 coef%Xh%wx_d, coef%Xh%wy_d, coef%Xh%wz_d, &
1198 lx, coef%msh%nelv, neko_eps)
1199
1200 ! Here, we always copy back to host.
1201 m = size(coef%area)
1202 call device_memcpy(coef%area, coef%area_d, m, &
1203 device_to_host, sync = .false.)
1204 call device_memcpy(coef%nx, coef%nx_d, m, &
1205 device_to_host, sync = .false.)
1206 call device_memcpy(coef%ny, coef%ny_d, m, &
1207 device_to_host, sync = .false.)
1208 call device_memcpy(coef%nz, coef%nz_d, &
1209 m, device_to_host, sync = .true.)
1210
1211 else
1212
1213 allocate(a(coef%Xh%lx, coef%Xh%lx, coef%Xh%lx, coef%msh%nelv))
1214 allocate(b(coef%Xh%lx, coef%Xh%lx, coef%Xh%lx, coef%msh%nelv))
1215 allocate(c(coef%Xh%lx, coef%Xh%lx, coef%Xh%lx, coef%msh%nelv))
1216 allocate(dot(coef%Xh%lx, coef%Xh%lx, coef%Xh%lx, coef%msh%nelv))
1217
1218 !$omp parallel private (e, i, j, k, weight, len)
1219
1220 ! ds x dt
1221 !$omp do simd
1222 do i = 1, n
1223 a(i, 1, 1, 1) = coef%dyds(i, 1, 1, 1) * coef%dzdt(i, 1, 1, 1) &
1224 - coef%dzds(i, 1, 1, 1) * coef%dydt(i, 1, 1, 1)
1225
1226 b(i, 1, 1, 1) = coef%dzds(i, 1, 1, 1) * coef%dxdt(i, 1, 1, 1) &
1227 - coef%dxds(i, 1, 1, 1) * coef%dzdt(i, 1, 1, 1)
1228
1229 c(i, 1, 1, 1) = coef%dxds(i, 1, 1, 1) * coef%dydt(i, 1, 1, 1) &
1230 - coef%dyds(i, 1, 1, 1) * coef%dxdt(i, 1, 1, 1)
1231 end do
1232 !$omp end do simd
1233 !$omp do simd
1234 do i = 1, n
1235 dot(i, 1, 1, 1) = a(i, 1, 1, 1) * a(i, 1, 1, 1) &
1236 + b(i, 1, 1, 1) * b(i, 1, 1, 1) &
1237 + c(i, 1, 1, 1) * c(i, 1, 1, 1)
1238 end do
1239 !$omp end do simd
1240 !$omp do
1241 do e = 1, coef%msh%nelv
1242 do concurrent(k = 1:coef%Xh%lx)
1243 do concurrent(j = 1:coef%Xh%lx)
1244 weight = coef%Xh%wy(j) * coef%Xh%wz(k)
1245 coef%area(j, k, 2, e) = sqrt(dot(lx, j, k, e)) * weight
1246 coef%area(j, k, 1, e) = sqrt(dot(1, j, k, e)) * weight
1247 coef%nx(j,k, 1, e) = -a(1, j, k, e)
1248 coef%nx(j,k, 2, e) = a(lx, j, k, e)
1249 coef%ny(j,k, 1, e) = -b(1, j, k, e)
1250 coef%ny(j,k, 2, e) = b(lx, j, k, e)
1251 coef%nz(j,k, 1, e) = -c(1, j, k, e)
1252 coef%nz(j,k, 2, e) = c(lx, j, k, e)
1253 end do
1254 end do
1255 end do
1256 !$omp end do
1257
1258 ! dr x dt
1259 !$omp do simd
1260 do i = 1, n
1261 a(i, 1, 1, 1) = coef%dydr(i, 1, 1, 1) * coef%dzdt(i, 1, 1, 1) &
1262 - coef%dzdr(i, 1, 1, 1) * coef%dydt(i, 1, 1, 1)
1263
1264 b(i, 1, 1, 1) = coef%dzdr(i, 1, 1, 1) * coef%dxdt(i, 1, 1, 1) &
1265 - coef%dxdr(i, 1, 1, 1) * coef%dzdt(i, 1, 1, 1)
1266
1267 c(i, 1, 1, 1) = coef%dxdr(i, 1, 1, 1) * coef%dydt(i, 1, 1, 1) &
1268 - coef%dydr(i, 1, 1, 1) * coef%dxdt(i, 1, 1, 1)
1269 end do
1270 !$omp end do simd
1271 !$omp do simd
1272 do i = 1, n
1273 dot(i, 1, 1, 1) = a(i, 1, 1, 1) * a(i, 1, 1, 1) &
1274 + b(i, 1, 1, 1) * b(i, 1, 1, 1) &
1275 + c(i, 1, 1, 1) * c(i, 1, 1, 1)
1276 end do
1277 !$omp end do simd
1278 !$omp do
1279 do e = 1, coef%msh%nelv
1280 do concurrent(k = 1:coef%Xh%lx)
1281 do concurrent(j = 1:coef%Xh%lx)
1282 weight = coef%Xh%wx(j) * coef%Xh%wz(k)
1283 coef%area(j, k, 3, e) = sqrt(dot(j, 1, k, e)) * weight
1284 coef%area(j, k, 4, e) = sqrt(dot(j, lx, k, e)) * weight
1285 coef%nx(j,k, 3, e) = a(j, 1, k, e)
1286 coef%nx(j,k, 4, e) = -a(j, lx, k, e)
1287 coef%ny(j,k, 3, e) = b(j, 1, k, e)
1288 coef%ny(j,k, 4, e) = -b(j, lx, k, e)
1289 coef%nz(j,k, 3, e) = c(j, 1, k, e)
1290 coef%nz(j,k, 4, e) = -c(j, lx, k, e)
1291 end do
1292 end do
1293 end do
1294 !$omp end do
1295 ! dr x ds
1296 !$omp do simd
1297 do i = 1, n
1298 a(i, 1, 1, 1) = coef%dydr(i, 1, 1, 1) * coef%dzds(i, 1, 1, 1) &
1299 - coef%dzdr(i, 1, 1, 1) * coef%dyds(i, 1, 1, 1)
1300
1301 b(i, 1, 1, 1) = coef%dzdr(i, 1, 1, 1) * coef%dxds(i, 1, 1, 1) &
1302 - coef%dxdr(i, 1, 1, 1) * coef%dzds(i, 1, 1, 1)
1303
1304 c(i, 1, 1, 1) = coef%dxdr(i, 1, 1, 1) * coef%dyds(i, 1, 1, 1) &
1305 - coef%dydr(i, 1, 1, 1) * coef%dxds(i, 1, 1, 1)
1306 end do
1307 !$omp end do simd
1308 !$omp do simd
1309 do i = 1, n
1310 dot(i, 1, 1, 1) = a(i, 1, 1, 1) * a(i, 1, 1, 1) &
1311 + b(i, 1, 1, 1) * b(i, 1, 1, 1) &
1312 + c(i, 1, 1, 1) * c(i, 1, 1, 1)
1313 end do
1314 !$omp end do simd
1315 !$omp do
1316 do e = 1, coef%msh%nelv
1317 do concurrent(k = 1:coef%Xh%lx)
1318 do concurrent(j = 1:coef%Xh%lx)
1319 weight = coef%Xh%wx(j) * coef%Xh%wy(k)
1320 coef%area(j, k, 5, e) = sqrt(dot(j, k, 1, e)) * weight
1321 coef%area(j, k, 6, e) = sqrt(dot(j, k, lx, e)) * weight
1322 coef%nx(j,k, 5, e) = -a(j, k, 1, e)
1323 coef%nx(j,k, 6, e) = a(j, k, lx, e)
1324 coef%ny(j,k, 5, e) = -b(j, k, 1, e)
1325 coef%ny(j,k, 6, e) = b(j, k, lx, e)
1326 coef%nz(j,k, 5, e) = -c(j, k, 1, e)
1327 coef%nz(j,k, 6, e) = c(j, k, lx, e)
1328 end do
1329 end do
1330 end do
1331 !$omp end do
1332 ! Normalize
1333 !$omp do
1334 do j = 1, size(coef%nz)
1335 len = sqrt(coef%nx(j,1,1,1)**2 + &
1336 coef%ny(j,1,1,1)**2 + coef%nz(j,1,1,1)**2)
1337 if (len .gt. neko_eps) then
1338 coef%nx(j,1,1,1) = coef%nx(j,1,1,1) / len
1339 coef%ny(j,1,1,1) = coef%ny(j,1,1,1) / len
1340 coef%nz(j,1,1,1) = coef%nz(j,1,1,1) / len
1341 end if
1342 end do
1343 !$omp end do
1344 !$omp end parallel
1345
1346 deallocate(dot)
1347 deallocate(c)
1348 deallocate(b)
1349 deallocate(a)
1350
1351 end if
1352
1353 end subroutine coef_generate_area_and_normal
1354
1356 class(coef_t), intent(inout) :: this
1357 real(kind=rp) :: un(3), len, d
1358 integer :: lx, ly, lz, np, np_glb, ierr
1359 integer :: i, j, k, pf, pe, n, nc, ncyc
1360
1361 if (.not. this%cyclic) return
1362
1363 np = this%msh%periodic%size
1364 call mpi_allreduce(np, np_glb, 1, &
1365 mpi_integer, mpi_sum, neko_comm, ierr)
1366
1367 if (np_glb .eq. 0) then
1368 call neko_error("There are no periodic boundaries. " // &
1369 "Switch cyclic off in the case file.")
1370 end if
1371
1372 if (np .eq. 0) return
1373
1374 lx = this%Xh%lx
1375 ly = this%Xh%ly
1376 lz = this%Xh%lz
1377 ncyc = this%cyc_msk(0) - 1
1378 nc = 1
1379 do n = 1, np
1380 pf = this%msh%periodic%facet_el(n)%x(1)
1381 pe = this%msh%periodic%facet_el(n)%x(2)
1382 do k = 1, lz
1383 do j = 1, ly
1384 do i = 1, lx
1385 if (index_is_on_facet(i, j, k, lx, ly, lz, pf)) then
1386 un = this%get_normal(i, j, k, pe, pf)
1387 len = sqrt(un(1) * un(1) + un(2) * un(2))
1388 if (len .gt. neko_eps) then
1389 d = this%dof%y(i, j, k, pe) * un(1) &
1390 - this%dof%x(i, j, k, pe) * un(2)
1391
1392 this%cyc_msk(nc) = linear_index(i, j, k, pe, lx, ly, lz)
1393 this%R11(nc) = un(1) / len * sign(1.0_rp, d)
1394 this%R12(nc) = un(2) / len * sign(1.0_rp, d)
1395 nc = nc + 1
1396 else
1397 call neko_error("x and y components of surface " // &
1398 "normals are zero. Cyclic rotations must be " // &
1399 "around z-axis.")
1400 end if
1401 end if
1402 end do
1403 end do
1404 end do
1405 end do
1406
1407 if (nc - 1 /= ncyc) then
1408 call neko_error("The number of cyclic GLL points were " // &
1409 "not estimated correctly.")
1410 end if
1411
1412 if (neko_bcknd_device .eq. 1) then
1413 call device_memcpy(this%cyc_msk, this%cyc_msk_d, ncyc+1, &
1414 host_to_device, sync = .false.)
1415 call device_memcpy(this%R11, this%R11_d, ncyc, &
1416 host_to_device, sync = .false.)
1417 call device_memcpy(this%R12, this%R12_d, ncyc, &
1418 host_to_device, sync = .false.)
1419 end if
1420
1421 end subroutine coef_generate_cyclic_bc
1422
1423
1425 subroutine coef_recompute_metrics(this)
1426 class(coef_t), intent(inout) :: this
1427
1428 call coef_generate_dxyzdrst(this)
1429 call coef_generate_geo(this)
1431 call coef_generate_mass(this)
1432 if (this%cyclic) then
1433 call coef_generate_cyclic_bc(this)
1434 end if
1435 end subroutine coef_recompute_metrics
1436
1437
1441 class(coef_t), intent(inout), target :: this
1442 integer :: n
1443
1444 ! Return if already allocated distinctly
1445 if (.not. associated(this%Blag, this%B)) return
1446
1447 n = this%Xh%lx * this%Xh%ly * this%Xh%lz * this%msh%nelv
1448
1449
1450 nullify(this%Blag)
1451 nullify(this%Blaglag)
1452
1453 allocate(this%Blag(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
1454 allocate(this%Blaglag(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
1455
1456 this%Blag = this%B
1457 this%Blaglag = this%B
1458
1459 if (neko_bcknd_device .eq. 1) then
1460
1461 this%Blag_d = c_null_ptr
1462 this%Blaglag_d = c_null_ptr
1463
1464 call device_map(this%Blag, this%Blag_d, n)
1465 call device_map(this%Blaglag, this%Blaglag_d, n)
1466
1467 call device_memcpy(this%Blag, this%Blag_d, n, &
1468 host_to_device, sync = .false.)
1469 call device_memcpy(this%Blaglag, this%Blaglag_d, n, &
1470 host_to_device, sync = .true.)
1471 end if
1472
1473 end subroutine coef_enable_lagged_mass
1474
1475
1478 class(coef_t), intent(inout), target :: this
1479 integer :: n
1480 integer(c_size_t) :: n_bytes
1481
1482 ! If this%Blag does not have separate memory, we don't need to update it.
1483 if (associated(this%Blag, this%B)) return
1484
1485 this%Blaglag = this%Blag
1486 this%Blag = this%B
1487
1488 if (neko_bcknd_device .eq. 1) then
1489 n = this%Xh%lx * this%Xh%ly * this%Xh%lz * this%msh%nelv
1490 if (rp .eq. real32) then
1491 n_bytes = int(n, c_size_t) * 4_c_size_t
1492 else
1493 n_bytes = int(n, c_size_t) * 8_c_size_t
1494 end if
1495
1496 call device_memcpy(this%Blaglag_d, this%Blag_d, n_bytes, &
1497 device_to_device, sync = .false.)
1498
1499 call device_memcpy(this%Blag_d, this%B_d, n_bytes, &
1500 device_to_device, sync = .true.)
1501 end if
1502
1503 end subroutine coef_update_lagged_mass
1504
1505end module coefs
Map a Fortran array to a device (allocate and associate)
Definition device.F90:77
Copy data between host and device (or device and device)
Definition device.F90:71
Coefficients.
Definition coef.f90:34
subroutine coef_generate_geo(c)
Generate geometric data for the given mesh.
Definition coef.f90:965
subroutine coef_free(this)
Deallocate coefficients.
Definition coef.f90:432
subroutine coef_recompute_metrics(this)
Recompute and update geometric factors (ALE)
Definition coef.f90:1426
pure real(kind=rp) function coef_get_area(this, i, j, k, e, facet)
Definition coef.f90:1163
pure real(kind=rp) function, dimension(3) coef_get_normal(this, i, j, k, e, facet)
Definition coef.f90:1142
subroutine coef_update_lagged_mass(this)
Update history: Blaglag = Blag, Blag = B.
Definition coef.f90:1478
subroutine coef_generate_dxyzdrst(c)
Definition coef.f90:776
subroutine coef_generate_area_and_normal(coef)
Generate facet area and surface normals.
Definition coef.f90:1180
subroutine coef_init_empty(this, xh, msh)
Initialize empty coefs for a space and a mesh.
Definition coef.f90:187
subroutine coef_init_all(this, gs_h)
Initialize coefficients.
Definition coef.f90:233
subroutine coef_enable_lagged_mass(this)
Enable separate memory for lagged B matrices if needed. For eg. when mesh moves.
Definition coef.f90:1441
subroutine coef_generate_cyclic_bc(this)
Definition coef.f90:1356
subroutine coef_generate_mass(c)
Generate mass matrix B for the given mesh and space.
Definition coef.f90:1096
Definition comm.F90:1
type(mpi_comm), public neko_comm
MPI communicator.
Definition comm.F90:43
subroutine, public device_coef_generate_area_and_normal(area_d, nx_d, ny_d, nz_d, dxdr_d, dydr_d, dzdr_d, dxds_d, dyds_d, dzds_d, dxdt_d, dydt_d, dzdt_d, wx_d, wy_d, wz_d, lx, nel, eps)
subroutine, public device_coef_generate_dxydrst(drdx_d, drdy_d, drdz_d, dsdx_d, dsdy_d, dsdz_d, dtdx_d, dtdy_d, dtdz_d, dxdr_d, dydr_d, dzdr_d, dxds_d, dyds_d, dzds_d, dxdt_d, dydt_d, dzdt_d, dx_d, dy_d, dz_d, x_d, y_d, z_d, jacinv_d, jac_d, lx, nel)
subroutine, public device_coef_generate_mass(b, binv, jac, w3, lxyz, nel)
subroutine, public device_coef_generate_geo(g11_d, g12_d, g13_d, g22_d, g23_d, g33_d, drdx_d, drdy_d, drdz_d, dsdx_d, dsdy_d, dsdz_d, dtdx_d, dtdy_d, dtdz_d, jacinv_d, w3_d, nel, lx, gdim)
real(kind=rp) function, public device_glsum(a_d, n, strm)
Sum a vector of length n.
subroutine, public device_invcol1(a_d, n, strm)
Invert a vector .
subroutine, public device_rone(a_d, n, strm)
Set all elements to one.
Device abstraction, common interface for various accelerators.
Definition device.F90:34
integer, parameter, public host_to_device
Definition device.F90:47
subroutine, public device_free(x_d)
Deallocate memory on the device.
Definition device.F90:225
integer, parameter, public device_to_host
Definition device.F90:47
Defines a mapping of the degrees of freedom.
Definition dofmap.f90:35
Gather-scatter.
Defines Gather-scatter operations.
Definition gs_ops.f90:34
integer, parameter, public gs_op_add
Definition gs_ops.f90:36
Definition math.f90:60
subroutine, public invers2(a, b, n)
Compute inverted vector .
Definition math.f90:689
subroutine, public subcol3(a, b, c, n)
Returns .
Definition math.f90:931
subroutine, public rone(a, n)
Set all elements to one.
Definition math.f90:241
real(kind=rp) function, public glsum(a, n)
Sum a vector of length n.
Definition math.f90:549
subroutine, public addcol3(a, b, c, n)
Returns .
Definition math.f90:1008
subroutine, public invcol1(a, n)
Invert a vector .
Definition math.f90:664
subroutine, public chsign(a, n)
Change sign of vector .
Definition math.f90:629
subroutine, public copy(a, b, n)
Copy a vector .
Definition math.f90:252
real(kind=rp), parameter, public neko_eps
Machine epsilon .
Definition math.f90:70
subroutine, public rzero(a, n)
Zero a real vector.
Definition math.f90:208
Defines a mesh.
Definition mesh.f90:34
Wrapper for all matrix-matrix product implementations.
subroutine, public mxm(a, n1, b, n2, c, n3)
Compute matrix-matrix product for contiguously packed matrices A,B, and C.
Build configurations.
integer, parameter neko_bcknd_device
integer, parameter neko_bcknd_opencl
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Defines a function space.
Definition space.f90:34
Utilities.
Definition utils.f90:35
pure logical function, public index_is_on_facet(i, j, k, lx, ly, lz, facet)
Definition utils.f90:308
pure integer function, public linear_index(i, j, k, l, lx, ly, lz)
Compute the address of a (i,j,k,l) array with sizes (1:lx, 1:ly, 1:lz, :)
Definition utils.f90:283
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition coef.f90:63
The function space for the SEM solution fields.
Definition space.f90:63