Neko 1.99.3
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
coef.f90
Go to the documentation of this file.
1! Copyright (c) 2020-2026, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
34module coefs
35 use gather_scatter, only : gs_t
36 use gs_ops, only : gs_op_add
38 use num_types, only : rp
39 use dofmap, only : dofmap_t
40 use space, only : space_t
41 use math, only : rone, invcol1, addcol3, subcol3, copy, &
43 use mesh, only : mesh_t
48 use mxm_wrapper, only : mxm
49 use device
52 use comm, only : neko_comm
54 use mpi_f08, only : mpi_allreduce, mpi_integer, mpi_sum
55 use, intrinsic :: iso_fortran_env
56 use, intrinsic :: iso_c_binding
57 implicit none
58 private
59
62 type, public :: coef_t
64 real(kind=rp), allocatable :: g11(:,:,:,:)
66 real(kind=rp), allocatable :: g22(:,:,:,:)
68 real(kind=rp), allocatable :: g33(:,:,:,:)
70 real(kind=rp), allocatable :: g12(:,:,:,:)
72 real(kind=rp), allocatable :: g13(:,:,:,:)
74 real(kind=rp), allocatable :: g23(:,:,:,:)
75
76 real(kind=rp), allocatable :: mult(:,:,:,:)
81 real(kind=rp), allocatable :: dxdr(:,:,:,:), dydr(:,:,:,:), dzdr(:,:,:,:)
82 real(kind=rp), allocatable :: dxds(:,:,:,:), dyds(:,:,:,:), dzds(:,:,:,:)
83 real(kind=rp), allocatable :: dxdt(:,:,:,:), dydt(:,:,:,:), dzdt(:,:,:,:)
87 real(kind=rp), allocatable :: drdx(:,:,:,:), drdy(:,:,:,:), drdz(:,:,:,:)
88 real(kind=rp), allocatable :: dsdx(:,:,:,:), dsdy(:,:,:,:), dsdz(:,:,:,:)
89 real(kind=rp), allocatable :: dtdx(:,:,:,:), dtdy(:,:,:,:), dtdz(:,:,:,:)
90
91 real(kind=rp), allocatable :: h1(:,:,:,:)
92 real(kind=rp), allocatable :: h2(:,:,:,:)
93 logical :: ifh2
94
95 real(kind=rp), allocatable :: jac(:,:,:,:)
96 real(kind=rp), allocatable :: jacinv(:,:,:,:)
97 real(kind=rp), allocatable :: b(:,:,:,:)
98 real(kind=rp), allocatable :: binv(:,:,:,:)
99 real(kind=rp), pointer :: blag(:,:,:,:) => null()
100 real(kind=rp), pointer :: blaglag(:,:,:,:) => null()
101 real(kind=rp), allocatable :: area(:,:,:,:)
102 real(kind=rp), allocatable :: nx(:,:,:,:)
103 real(kind=rp), allocatable :: ny(:,:,:,:)
104 real(kind=rp), allocatable :: nz(:,:,:,:)
105 logical :: cyclic = .false.
106 integer, allocatable :: cyc_msk(:)
107 real(kind=rp), allocatable :: r11(:)
108 real(kind=rp), allocatable :: r12(:)
110
111 real(kind=rp) :: volume
112
113 type(space_t), pointer :: xh => null()
114 type(mesh_t), pointer :: msh => null()
115 type(dofmap_t), pointer :: dof => null()
116 type(gs_t), pointer :: gs_h=> null()
117
118 !
119 ! Device pointers (if present)
120 !
121
122 type(c_ptr) :: g11_d = c_null_ptr
123 type(c_ptr) :: g22_d = c_null_ptr
124 type(c_ptr) :: g33_d = c_null_ptr
125 type(c_ptr) :: g12_d = c_null_ptr
126 type(c_ptr) :: g13_d = c_null_ptr
127 type(c_ptr) :: g23_d = c_null_ptr
128 type(c_ptr) :: dxdr_d = c_null_ptr
129 type(c_ptr) :: dydr_d = c_null_ptr
130 type(c_ptr) :: dzdr_d = c_null_ptr
131 type(c_ptr) :: dxds_d = c_null_ptr
132 type(c_ptr) :: dyds_d = c_null_ptr
133 type(c_ptr) :: dzds_d = c_null_ptr
134 type(c_ptr) :: dxdt_d = c_null_ptr
135 type(c_ptr) :: dydt_d = c_null_ptr
136 type(c_ptr) :: dzdt_d = c_null_ptr
137 type(c_ptr) :: drdx_d = c_null_ptr
138 type(c_ptr) :: drdy_d = c_null_ptr
139 type(c_ptr) :: drdz_d = c_null_ptr
140 type(c_ptr) :: dsdx_d = c_null_ptr
141 type(c_ptr) :: dsdy_d = c_null_ptr
142 type(c_ptr) :: dsdz_d = c_null_ptr
143 type(c_ptr) :: dtdx_d = c_null_ptr
144 type(c_ptr) :: dtdy_d = c_null_ptr
145 type(c_ptr) :: dtdz_d = c_null_ptr
146 type(c_ptr) :: mult_d = c_null_ptr
147 type(c_ptr) :: h1_d = c_null_ptr
148 type(c_ptr) :: h2_d = c_null_ptr
149 type(c_ptr) :: jac_d = c_null_ptr
150 type(c_ptr) :: jacinv_d = c_null_ptr
151 type(c_ptr) :: b_d = c_null_ptr
152 type(c_ptr) :: blag_d = c_null_ptr
153 type(c_ptr) :: blaglag_d = c_null_ptr
154 type(c_ptr) :: binv_d = c_null_ptr
155 type(c_ptr) :: area_d = c_null_ptr
156 type(c_ptr) :: nx_d = c_null_ptr
157 type(c_ptr) :: ny_d = c_null_ptr
158 type(c_ptr) :: nz_d = c_null_ptr
159 type(c_ptr) :: cyc_msk_d = c_null_ptr
160 type(c_ptr) :: r11_d = c_null_ptr
161 type(c_ptr) :: r12_d = c_null_ptr
162
163
164
165 contains
166 procedure, private, pass(this) :: init_empty => coef_init_empty
167 procedure, private, pass(this) :: init_all => coef_init_all
168 procedure, pass(this) :: free => coef_free
169 procedure, pass(this) :: get_normal => coef_get_normal
170 procedure, pass(this) :: get_area => coef_get_area
171 procedure, pass(this) :: generate_cyclic_bc => coef_generate_cyclic_bc
172 procedure, pass(this) :: recompute_metrics => coef_recompute_metrics
173 procedure, pass(this) :: enable_b_history => coef_enable_lagged_mass
174 procedure, pass(this) :: update_b_history => coef_update_lagged_mass
175 generic :: init => init_empty, init_all
176 end type coef_t
177
178contains
179
181 subroutine coef_init_empty(this, Xh, msh)
182 class(coef_t), intent(inout) :: this
183 type(space_t), intent(inout), target :: Xh
184 type(mesh_t), intent(inout), target :: msh
185 integer :: n
186 call this%free()
187 this%msh => msh
188 this%Xh => xh
189
190 allocate(this%drdx(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
191 allocate(this%dsdx(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
192 allocate(this%dtdx(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
193
194 allocate(this%drdy(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
195 allocate(this%dsdy(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
196 allocate(this%dtdy(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
197
198 allocate(this%drdz(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
199 allocate(this%dsdz(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
200 allocate(this%dtdz(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
201
202
203 !
204 ! Setup device memory (if present)
205 !
206
207 n = this%Xh%lx * this%Xh%ly * this%Xh%lz * this%msh%nelv
208 if (neko_bcknd_device .eq. 1) then
209
210 call device_map(this%drdx, this%drdx_d, n)
211 call device_map(this%drdy, this%drdy_d, n)
212 call device_map(this%drdz, this%drdz_d, n)
213
214 call device_map(this%dsdx, this%dsdx_d, n)
215 call device_map(this%dsdy, this%dsdy_d, n)
216 call device_map(this%dsdz, this%dsdz_d, n)
217
218 call device_map(this%dtdx, this%dtdx_d, n)
219 call device_map(this%dtdy, this%dtdy_d, n)
220 call device_map(this%dtdz, this%dtdz_d, n)
221
222 end if
223
224 end subroutine coef_init_empty
225
227 subroutine coef_init_all(this, gs_h)
228 class(coef_t), intent(inout), target :: this
229 type(gs_t), intent(inout), target :: gs_h
230 integer :: n, m, ncyc
231 call this%free()
232
233 this%msh => gs_h%dofmap%msh
234 this%Xh => gs_h%dofmap%Xh
235 this%dof => gs_h%dofmap
236 this%gs_h => gs_h
237
238 !
239 ! Allocate arrays for geometric data
240 !
242 allocate(this%G11(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
243 allocate(this%G22(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
244 allocate(this%G33(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
245 allocate(this%G12(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
246 allocate(this%G13(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
247 allocate(this%G23(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
248
249 allocate(this%dxdr(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
250 allocate(this%dxds(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
251 allocate(this%dxdt(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
252
253 allocate(this%dydr(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
254 allocate(this%dyds(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
255 allocate(this%dydt(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
256
257 allocate(this%dzdr(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
258 allocate(this%dzds(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
259 allocate(this%dzdt(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
260
261 allocate(this%drdx(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
262 allocate(this%dsdx(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
263 allocate(this%dtdx(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
264
265 allocate(this%drdy(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
266 allocate(this%dsdy(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
267 allocate(this%dtdy(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
268
269 allocate(this%drdz(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
270 allocate(this%dsdz(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
271 allocate(this%dtdz(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
272
273 allocate(this%jac(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
274 allocate(this%jacinv(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
275
276 allocate(this%area(this%Xh%lx, this%Xh%ly, 6, this%msh%nelv))
277 allocate(this%nx(this%Xh%lx, this%Xh%ly, 6, this%msh%nelv))
278 allocate(this%ny(this%Xh%lx, this%Xh%ly, 6, this%msh%nelv))
279 allocate(this%nz(this%Xh%lx, this%Xh%ly, 6, this%msh%nelv))
280
281 allocate(this%B(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
282 allocate(this%Binv(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
283
284 ! We do this so in a static simulation we don't allocate extra memory
285 this%Blag => this%B
286 this%Blaglag => this%B
287
288 allocate(this%h1(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
289 allocate(this%h2(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
290
291 allocate(this%mult(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
292
293
294 !
295 ! Setup device memory (if present)
296 !
297
298 n = this%Xh%lx * this%Xh%ly * this%Xh%lz * this%msh%nelv
299 if (neko_bcknd_device .eq. 1) then
300 call device_map(this%G11, this%G11_d, n)
301 call device_map(this%G22, this%G22_d, n)
302 call device_map(this%G33, this%G33_d, n)
303 call device_map(this%G12, this%G12_d, n)
304 call device_map(this%G13, this%G13_d, n)
305 call device_map(this%G23, this%G23_d, n)
306
307 call device_map(this%dxdr, this%dxdr_d, n)
308 call device_map(this%dydr, this%dydr_d, n)
309 call device_map(this%dzdr, this%dzdr_d, n)
310
311 call device_map(this%dxds, this%dxds_d, n)
312 call device_map(this%dyds, this%dyds_d, n)
313 call device_map(this%dzds, this%dzds_d, n)
314
315 call device_map(this%dxdt, this%dxdt_d, n)
316 call device_map(this%dydt, this%dydt_d, n)
317 call device_map(this%dzdt, this%dzdt_d, n)
318
319 call device_map(this%drdx, this%drdx_d, n)
320 call device_map(this%drdy, this%drdy_d, n)
321 call device_map(this%drdz, this%drdz_d, n)
322
323 call device_map(this%dsdx, this%dsdx_d, n)
324 call device_map(this%dsdy, this%dsdy_d, n)
325 call device_map(this%dsdz, this%dsdz_d, n)
326
327 call device_map(this%dtdx, this%dtdx_d, n)
328 call device_map(this%dtdy, this%dtdy_d, n)
329 call device_map(this%dtdz, this%dtdz_d, n)
330
331 call device_map(this%mult, this%mult_d, n)
332 call device_map(this%h1, this%h1_d, n)
333 call device_map(this%h2, this%h2_d, n)
334
335 call device_map(this%jac, this%jac_d, n)
336 call device_map(this%jacinv, this%jacinv_d, n)
337 call device_map(this%B, this%B_d, n)
338 call device_map(this%Binv, this%Binv_d, n)
339
340 this%Blag_d = this%B_d
341 this%Blaglag_d = this%B_d
342
343 m = this%Xh%lx * this%Xh%ly * 6 * this%msh%nelv
344
345 call device_map(this%area, this%area_d, m)
346 call device_map(this%nx, this%nx_d, m)
347 call device_map(this%ny, this%ny_d, m)
348 call device_map(this%nz, this%nz_d, m)
349
350 end if
351
352 call coef_generate_dxyzdrst(this)
353
354 call coef_generate_geo(this)
355
357
358 call coef_generate_mass(this)
359
360
361 ! This is a placeholder, just for now
362 ! We can probably find a prettier solution
363 if (neko_bcknd_device .eq. 1) then
364 call device_rone(this%h1_d, n)
365 call device_rone(this%h2_d, n)
366 call device_memcpy(this%h1, this%h1_d, n, &
367 device_to_host, sync = .false.)
368 call device_memcpy(this%h2, this%h2_d, n, &
369 device_to_host, sync = .false.)
370 else
371 call rone(this%h1,n)
372 call rone(this%h2,n)
373 end if
374
375 this%ifh2 = .false.
376
377 !
378 ! Set up multiplicity
379 !
380 if (neko_bcknd_device .eq. 1) then
381 call device_rone(this%mult_d, n)
382 else
383 call rone(this%mult, n)
384 end if
385
386 call gs_h%op(this%mult, n, gs_op_add)
387
388 if (neko_bcknd_device .eq. 1) then
389 call device_invcol1(this%mult_d, n)
390 call device_memcpy(this%mult, this%mult_d, n, &
391 device_to_host, sync = .true.)
392 else
393 call invcol1(this%mult, n)
394 end if
395
396 ncyc = this%msh%periodic%size * this%Xh%lx * this%Xh%lx
397 allocate(this%cyc_msk(0:ncyc))
398 this%cyc_msk(0) = ncyc + 1
399 if (ncyc .gt. 0) then
400 allocate(this%R11(ncyc))
401 allocate(this%R12(ncyc))
402
404 call rone(this%R11, ncyc)
405 call rzero(this%R12, ncyc)
406
407 if (neko_bcknd_device .eq. 1) then
408 call device_map(this%cyc_msk, this%cyc_msk_d, ncyc+1)
409 call device_map(this%R11, this%R11_d, ncyc)
410 call device_map(this%R12, this%R12_d, ncyc)
411
412 call device_memcpy(this%cyc_msk, this%cyc_msk_d, ncyc+1, &
413 host_to_device, sync = .false.)
414 call device_memcpy(this%R11, this%R11_d, ncyc, &
415 host_to_device, sync = .false.)
416 call device_memcpy(this%R12, this%R12_d, ncyc, &
417 host_to_device, sync = .false.)
418 end if
419
420 end if
421 end subroutine coef_init_all
422
424 subroutine coef_free(this)
425 class(coef_t), intent(inout), target :: this
426
427 if (allocated(this%G11)) then
428 deallocate(this%G11)
429 end if
430
431 if (allocated(this%G22)) then
432 deallocate(this%G22)
433 end if
434
435 if (allocated(this%G33)) then
436 deallocate(this%G33)
437 end if
438
439 if (allocated(this%G12)) then
440 deallocate(this%G12)
441 end if
442
443 if (allocated(this%G13)) then
444 deallocate(this%G13)
445 end if
446
447 if (allocated(this%G23)) then
448 deallocate(this%G23)
449 end if
450
451 if (allocated(this%mult)) then
452 deallocate(this%mult)
453 end if
454
455 if (associated(this%Blag) .and. &
456 .not. associated(this%Blag, this%B)) then
457 deallocate(this%Blag)
458 end if
459 nullify(this%Blag)
460
461 if (associated(this%Blaglag) .and. &
462 .not. associated(this%Blaglag, this%B)) then
463 deallocate(this%Blaglag)
464 end if
465 nullify(this%Blaglag)
466
467 if (allocated(this%B)) then
468 deallocate(this%B)
469 end if
470
471 if (allocated(this%Binv)) then
472 deallocate(this%Binv)
473 end if
474
475 if (allocated(this%dxdr)) then
476 deallocate(this%dxdr)
477 end if
478
479 if (allocated(this%dxds)) then
480 deallocate(this%dxds)
481 end if
482
483 if (allocated(this%dxdt)) then
484 deallocate(this%dxdt)
485 end if
486
487 if (allocated(this%dydr)) then
488 deallocate(this%dydr)
489 end if
490
491 if (allocated(this%dyds)) then
492 deallocate(this%dyds)
493 end if
494
495 if (allocated(this%dydt)) then
496 deallocate(this%dydt)
497 end if
498
499 if (allocated(this%dzdr)) then
500 deallocate(this%dzdr)
501 end if
502
503 if (allocated(this%dzds)) then
504 deallocate(this%dzds)
505 end if
506
507 if (allocated(this%dzdt)) then
508 deallocate(this%dzdt)
509 end if
510
511 if (allocated(this%drdx)) then
512 deallocate(this%drdx)
513 end if
514
515 if (allocated(this%dsdx)) then
516 deallocate(this%dsdx)
517 end if
518
519 if (allocated(this%dtdx)) then
520 deallocate(this%dtdx)
521 end if
522
523 if (allocated(this%drdy)) then
524 deallocate(this%drdy)
525 end if
526
527 if (allocated(this%dsdy)) then
528 deallocate(this%dsdy)
529 end if
530
531 if (allocated(this%dtdy)) then
532 deallocate(this%dtdy)
533 end if
534
535 if (allocated(this%drdz)) then
536 deallocate(this%drdz)
537 end if
538
539 if (allocated(this%dsdz)) then
540 deallocate(this%dsdz)
541 end if
542
543 if (allocated(this%dtdz)) then
544 deallocate(this%dtdz)
545 end if
546
547 if (allocated(this%jac)) then
548 deallocate(this%jac)
549 end if
550
551 if (allocated(this%jacinv)) then
552 deallocate(this%jacinv)
553 end if
554
555 if (allocated(this%h1)) then
556 deallocate(this%h1)
557 end if
558
559 if (allocated(this%h2)) then
560 deallocate(this%h2)
561 end if
562
563 if (allocated(this%area)) then
564 deallocate(this%area)
565 end if
566
567 if (allocated(this%nx)) then
568 deallocate(this%nx)
569 end if
570
571 if (allocated(this%ny)) then
572 deallocate(this%ny)
573 end if
574
575 if (allocated(this%nz)) then
576 deallocate(this%nz)
577 end if
578
579 if (allocated(this%cyc_msk)) then
580 deallocate(this%cyc_msk)
581 end if
582
583 if (allocated(this%R11)) then
584 deallocate(this%R11)
585 end if
586
587 if (allocated(this%R12)) then
588 deallocate(this%R12)
589 end if
590
591
592 nullify(this%msh)
593 nullify(this%Xh)
594 nullify(this%dof)
595 nullify(this%gs_h)
596
597 !
598 ! Cleanup the device (if present)
599 !
600
601 if (c_associated(this%G11_d)) then
602 call device_free(this%G11_d)
603 end if
604
605 if (c_associated(this%G22_d)) then
606 call device_free(this%G22_d)
607 end if
608
609 if (c_associated(this%G33_d)) then
610 call device_free(this%G33_d)
611 end if
612
613 if (c_associated(this%G12_d)) then
614 call device_free(this%G12_d)
615 end if
616
617 if (c_associated(this%G13_d)) then
618 call device_free(this%G13_d)
619 end if
620
621 if (c_associated(this%G23_d)) then
622 call device_free(this%G23_d)
623 end if
624
625 if (c_associated(this%dxdr_d)) then
626 call device_free(this%dxdr_d)
627 end if
628
629 if (c_associated(this%dydr_d)) then
630 call device_free(this%dydr_d)
631 end if
632
633 if (c_associated(this%dzdr_d)) then
634 call device_free(this%dzdr_d)
635 end if
636
637 if (c_associated(this%dxds_d)) then
638 call device_free(this%dxds_d)
639 end if
640
641 if (c_associated(this%dyds_d)) then
642 call device_free(this%dyds_d)
643 end if
644
645 if (c_associated(this%dzds_d)) then
646 call device_free(this%dzds_d)
647 end if
648
649 if (c_associated(this%dxdt_d)) then
650 call device_free(this%dxdt_d)
651 end if
652
653 if (c_associated(this%dydt_d)) then
654 call device_free(this%dydt_d)
655 end if
656
657 if (c_associated(this%dzdt_d)) then
658 call device_free(this%dzdt_d)
659 end if
660
661 if (c_associated(this%drdx_d)) then
662 call device_free(this%drdx_d)
663 end if
664
665 if (c_associated(this%drdy_d)) then
666 call device_free(this%drdy_d)
667 end if
668
669 if (c_associated(this%drdz_d)) then
670 call device_free(this%drdz_d)
671 end if
672
673 if (c_associated(this%dsdx_d)) then
674 call device_free(this%dsdx_d)
675 end if
676
677 if (c_associated(this%dsdy_d)) then
678 call device_free(this%dsdy_d)
679 end if
680
681 if (c_associated(this%dsdz_d)) then
682 call device_free(this%dsdz_d)
683 end if
684
685 if (c_associated(this%dtdx_d)) then
686 call device_free(this%dtdx_d)
687 end if
688
689 if (c_associated(this%dtdy_d)) then
690 call device_free(this%dtdy_d)
691 end if
692
693 if (c_associated(this%dtdz_d)) then
694 call device_free(this%dtdz_d)
695 end if
696
697 if (c_associated(this%mult_d)) then
698 call device_free(this%mult_d)
699 end if
700
701 if (c_associated(this%h1_d)) then
702 call device_free(this%h1_d)
703 end if
704
705 if (c_associated(this%h2_d)) then
706 call device_free(this%h2_d)
707 end if
708
709 if (c_associated(this%jac_d)) then
710 call device_free(this%jac_d)
711 end if
712
713 if (c_associated(this%jacinv_d)) then
714 call device_free(this%jacinv_d)
715 end if
716
717 if (c_associated(this%Blag_d) .and. &
718 .not. c_associated(this%Blag_d, this%B_d)) then
719 call device_free(this%Blag_d)
720 end if
721 this%Blag_d = c_null_ptr
722
723 if (c_associated(this%Blaglag_d) .and. &
724 .not. c_associated(this%Blaglag_d, this%B_d)) then
725 call device_free(this%Blaglag_d)
726 end if
727 this%Blaglag_d = c_null_ptr
728
729 if (c_associated(this%B_d)) then
730 call device_free(this%B_d)
731 end if
732
733 if (c_associated(this%Binv_d)) then
734 call device_free(this%Binv_d)
735 end if
736
737 if (c_associated(this%area_d)) then
738 call device_free(this%area_d)
739 end if
740
741 if (c_associated(this%nx_d)) then
742 call device_free(this%nx_d)
743 end if
744
745 if (c_associated(this%ny_d)) then
746 call device_free(this%ny_d)
747 end if
748
749 if (c_associated(this%nz_d)) then
750 call device_free(this%nz_d)
751 end if
752
753 if (c_associated(this%cyc_msk_d)) then
754 call device_free(this%cyc_msk_d)
755 end if
756
757 if (c_associated(this%R11_d)) then
758 call device_free(this%R11_d)
759 end if
760
761 if (c_associated(this%R12_d)) then
762 call device_free(this%R12_d)
763 end if
764
765
766 end subroutine coef_free
767
769 type(coef_t), intent(inout) :: c
770 integer :: e, i, lxy, lyz, ntot
771
772 lxy = c%Xh%lx*c%Xh%ly
773 lyz = c%Xh%ly*c%Xh%lz
774 ntot = c%dof%size()
775
776 associate(drdx => c%drdx, drdy => c%drdy, drdz => c%drdz, &
777 dsdx => c%dsdx, dsdy => c%dsdy, dsdz => c%dsdz, &
778 dtdx => c%dtdx, dtdy => c%dtdy, dtdz => c%dtdz, &
779 dxdr => c%dxdr, dydr => c%dydr, dzdr => c%dzdr, &
780 dxds => c%dxds, dyds => c%dyds, dzds => c%dzds, &
781 dxdt => c%dxdt, dydt => c%dydt, dzdt => c%dzdt, &
782 dx => c%Xh%dx, dy => c%Xh%dy, dz => c%Xh%dz, &
783 x => c%dof%x, y => c%dof%y, z => c%dof%z, &
784 lx => c%Xh%lx, ly => c%Xh%ly, lz => c%Xh%lz, &
785 dyt => c%Xh%dyt, dzt => c%Xh%dzt, &
786 jacinv => c%jacinv, jac => c%jac)
787
788 if (neko_bcknd_device .eq. 1) then
789
790 call device_coef_generate_dxydrst(c%drdx_d, c%drdy_d, c%drdz_d, &
791 c%dsdx_d, c%dsdy_d, c%dsdz_d, c%dtdx_d, c%dtdy_d, c%dtdz_d, &
792 c%dxdr_d, c%dydr_d, c%dzdr_d, c%dxds_d, c%dyds_d, c%dzds_d, &
793 c%dxdt_d, c%dydt_d, c%dzdt_d, c%Xh%dx_d, c%Xh%dy_d, c%Xh%dz_d, &
794 c%dof%x_d, c%dof%y_d, c%dof%z_d, c%jacinv_d, c%jac_d, &
795 c%Xh%lx, c%msh%nelv)
796
797 call device_memcpy(dxdr, c%dxdr_d, ntot, device_to_host, &
798 sync = .false.)
799 call device_memcpy(dydr, c%dydr_d, ntot, device_to_host, &
800 sync = .false.)
801 call device_memcpy(dzdr, c%dzdr_d, ntot, device_to_host, &
802 sync = .false.)
803 call device_memcpy(dxds, c%dxds_d, ntot, device_to_host, &
804 sync = .false.)
805 call device_memcpy(dyds, c%dyds_d, ntot, device_to_host, &
806 sync = .false.)
807 call device_memcpy(dzds, c%dzds_d, ntot, device_to_host, &
808 sync = .false.)
809 call device_memcpy(dxdt, c%dxdt_d, ntot, device_to_host, &
810 sync = .false.)
811 call device_memcpy(dydt, c%dydt_d, ntot, device_to_host, &
812 sync = .false.)
813 call device_memcpy(dzdt, c%dzdt_d, ntot, device_to_host, &
814 sync = .false.)
815 call device_memcpy(drdx, c%drdx_d, ntot, device_to_host, &
816 sync = .false.)
817 call device_memcpy(drdy, c%drdy_d, ntot, device_to_host, &
818 sync = .false.)
819 call device_memcpy(drdz, c%drdz_d, ntot, device_to_host, &
820 sync = .false.)
821 call device_memcpy(dsdx, c%dsdx_d, ntot, device_to_host, &
822 sync = .false.)
823 call device_memcpy(dsdy, c%dsdy_d, ntot, device_to_host, &
824 sync = .false.)
825 call device_memcpy(dsdz, c%dsdz_d, ntot, device_to_host, &
826 sync = .false.)
827 call device_memcpy(dtdx, c%dtdx_d, ntot, device_to_host, &
828 sync = .false.)
829 call device_memcpy(dtdy, c%dtdy_d, ntot, device_to_host, &
830 sync = .false.)
831 call device_memcpy(dtdz, c%dtdz_d, ntot, device_to_host, &
832 sync = .false.)
833 call device_memcpy(jac, c%jac_d, ntot, device_to_host, &
834 sync = .false.)
835 call device_memcpy(jacinv, c%jacinv_d, ntot, device_to_host, &
836 sync = .true.)
837
838 else
839 !$omp parallel do private(i)
840 do e = 1, c%msh%nelv
841 call mxm(dx, lx, x(1,1,1,e), lx, dxdr(1,1,1,e), lyz)
842 call mxm(dx, lx, y(1,1,1,e), lx, dydr(1,1,1,e), lyz)
843 call mxm(dx, lx, z(1,1,1,e), lx, dzdr(1,1,1,e), lyz)
844
845 do i = 1, lz
846 call mxm(x(1,1,i,e), lx, dyt, ly, dxds(1,1,i,e), ly)
847 call mxm(y(1,1,i,e), lx, dyt, ly, dyds(1,1,i,e), ly)
848 call mxm(z(1,1,i,e), lx, dyt, ly, dzds(1,1,i,e), ly)
849 end do
850
851 ! We actually take 2d into account, wow, need to do that for the rest.
852 if (c%msh%gdim .eq. 3) then
853 call mxm(x(1,1,1,e), lxy, dzt, lz, dxdt(1,1,1,e), lz)
854 call mxm(y(1,1,1,e), lxy, dzt, lz, dydt(1,1,1,e), lz)
855 call mxm(z(1,1,1,e), lxy, dzt, lz, dzdt(1,1,1,e), lz)
856 else
857 call rzero(dxdt(1,1,1,e), lxy)
858 call rzero(dydt(1,1,1,e), lxy)
859 call rone(dzdt(1,1,1,e), lxy)
860 end if
861 end do
862 !$omp end parallel do
863
864 if (c%msh%gdim .eq. 2) then
865 call rzero (jac, ntot)
866 call addcol3 (jac, dxdr, dyds, ntot)
867 call subcol3 (jac, dxds, dydr, ntot)
868 call copy (drdx, dyds, ntot)
869 call copy (drdy, dxds, ntot)
870 call chsign (drdy, ntot)
871 call copy (dsdx, dydr, ntot)
872 call chsign (dsdx, ntot)
873 call copy (dsdy, dxdr, ntot)
874 call rzero (drdz, ntot)
875 call rzero (dsdz, ntot)
876 call rone (dtdz, ntot)
877 else
878 !$omp parallel private(i)
879 !$omp do simd
880 do i = 1, ntot
881 c%jac(i, 1, 1, 1) = 0.0_rp
882 end do
883 !$omp end do simd
884 !$omp do simd
885 do i = 1, ntot
886 c%jac(i, 1, 1, 1) = c%jac(i, 1, 1, 1) + ( c%dxdr(i, 1, 1, 1) &
887 * c%dyds(i, 1, 1, 1) * c%dzdt(i, 1, 1, 1) )
888
889 c%jac(i, 1, 1, 1) = c%jac(i, 1, 1, 1) + ( c%dxdt(i, 1, 1, 1) &
890 * c%dydr(i, 1, 1, 1) * c%dzds(i, 1, 1, 1) )
891
892 c%jac(i, 1, 1, 1) = c%jac(i, 1, 1, 1) + ( c%dxds(i, 1, 1, 1) &
893 * c%dydt(i, 1, 1, 1) * c%dzdr(i, 1, 1, 1) )
894 end do
895 !$omp end do simd
896 !$omp do simd
897 do i = 1, ntot
898 c%jac(i, 1, 1, 1) = c%jac(i, 1, 1, 1) - ( c%dxdr(i, 1, 1, 1) &
899 * c%dydt(i, 1, 1, 1) * c%dzds(i, 1, 1, 1) )
900
901 c%jac(i, 1, 1, 1) = c%jac(i, 1, 1, 1) - ( c%dxds(i, 1, 1, 1) &
902 * c%dydr(i, 1, 1, 1) * c%dzdt(i, 1, 1, 1) )
903
904 c%jac(i, 1, 1, 1) = c%jac(i, 1, 1, 1) - ( c%dxdt(i, 1, 1, 1) &
905 * c%dyds(i, 1, 1, 1) * c%dzdr(i, 1, 1, 1) )
906 end do
907 !$omp end do simd
908 !$omp do simd
909 do i = 1, ntot
910 c%drdx(i, 1, 1, 1) = c%dyds(i, 1, 1, 1) * c%dzdt(i, 1, 1, 1) &
911 - c%dydt(i, 1, 1, 1) * c%dzds(i, 1, 1, 1)
912
913 c%drdy(i, 1, 1, 1) = c%dxdt(i, 1, 1, 1) * c%dzds(i, 1, 1, 1) &
914 - c%dxds(i, 1, 1, 1) * c%dzdt(i, 1, 1, 1)
915
916 c%drdz(i, 1, 1, 1) = c%dxds(i, 1, 1, 1) * c%dydt(i, 1, 1, 1) &
917 - c%dxdt(i, 1, 1, 1) * c%dyds(i, 1, 1, 1)
918 end do
919 !$omp end do simd
920 !$omp do simd
921 do i = 1, ntot
922 c%dsdx(i, 1, 1, 1) = c%dydt(i, 1, 1, 1) * c%dzdr(i, 1, 1, 1) &
923 - c%dydr(i, 1, 1, 1) * c%dzdt(i, 1, 1, 1)
924
925 c%dsdy(i, 1, 1, 1) = c%dxdr(i, 1, 1, 1) * c%dzdt(i, 1, 1, 1) &
926 - c%dxdt(i, 1, 1, 1) * c%dzdr(i, 1, 1, 1)
927
928 c%dsdz(i, 1, 1, 1) = c%dxdt(i, 1, 1, 1) * c%dydr(i, 1, 1, 1) &
929 - c%dxdr(i, 1, 1, 1) * c%dydt(i, 1, 1, 1)
930 end do
931 !$omp end do simd
932 !$omp do simd
933 do i = 1, ntot
934 c%dtdx(i, 1, 1, 1) = c%dydr(i, 1, 1, 1) * c%dzds(i, 1, 1, 1) &
935 - c%dyds(i, 1, 1, 1) * c%dzdr(i, 1, 1, 1)
936
937 c%dtdy(i, 1, 1, 1) = c%dxds(i, 1, 1, 1) * c%dzdr(i, 1, 1, 1) &
938 - c%dxdr(i, 1, 1, 1) * c%dzds(i, 1, 1, 1)
939
940 c%dtdz(i, 1, 1, 1) = c%dxdr(i, 1, 1, 1) * c%dyds(i, 1, 1, 1) &
941 - c%dxds(i, 1, 1, 1) * c%dydr(i, 1, 1, 1)
942 end do
943 !$omp end do simd
944 !$omp end parallel
945 end if
946 call invers2(jacinv, jac, ntot)
947 end if
948 end associate
949
950 end subroutine coef_generate_dxyzdrst
951
954 subroutine coef_generate_geo(c)
955 type(coef_t), intent(inout) :: c
956 integer :: e, i, lxyz, ntot
957
958 lxyz = c%Xh%lx * c%Xh%ly * c%Xh%lz
959 ntot = c%dof%size()
960
961 if (neko_bcknd_device .eq. 1) then
962
963 call device_coef_generate_geo(c%G11_d, c%G12_d, c%G13_d, &
964 c%G22_d, c%G23_d, c%G33_d, &
965 c%drdx_d, c%drdy_d, c%drdz_d, &
966 c%dsdx_d, c%dsdy_d, c%dsdz_d, &
967 c%dtdx_d, c%dtdy_d, c%dtdz_d, &
968 c%jacinv_d, c%Xh%w3_d, c%msh%nelv, &
969 c%Xh%lx, c%msh%gdim)
970
971 call device_memcpy(c%G11, c%G11_d, ntot, device_to_host, sync = .false.)
972 call device_memcpy(c%G22, c%G22_d, ntot, device_to_host, sync = .false.)
973 call device_memcpy(c%G33, c%G33_d, ntot, device_to_host, sync = .false.)
974 call device_memcpy(c%G12, c%G12_d, ntot, device_to_host, sync = .false.)
975 call device_memcpy(c%G13, c%G13_d, ntot, device_to_host, sync = .false.)
976 call device_memcpy(c%G23, c%G23_d, ntot, device_to_host, sync = .true.)
977
978 else
979 if (c%msh%gdim .eq. 2) then
980
981 do i = 1, ntot
982 c%G11(i, 1, 1, 1) = c%drdx(i, 1, 1, 1) * c%drdx(i, 1, 1, 1) &
983 + c%drdy(i, 1, 1, 1) * c%drdy(i, 1, 1, 1)
984
985 c%G22(i, 1, 1, 1) = c%dsdx(i, 1, 1, 1) * c%dsdx(i, 1, 1, 1) &
986 + c%dsdy(i, 1, 1, 1) * c%dsdy(i, 1, 1, 1)
987
988 c%G12(i, 1, 1, 1) = c%drdx(i, 1, 1, 1) * c%dsdx(i, 1, 1, 1) &
989 + c%drdy(i, 1, 1, 1) * c%dsdy(i, 1, 1, 1)
990 end do
991
992 do i = 1, ntot
993 c%G11(i, 1, 1, 1) = c%G11(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
994 c%G22(i, 1, 1, 1) = c%G22(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
995 c%G12(i, 1, 1, 1) = c%G12(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
996 c%G33(i, 1, 1, 1) = 0.0_rp
997 c%G13(i, 1, 1, 1) = 0.0_rp
998 c%G23(i, 1, 1, 1) = 0.0_rp
999 end do
1000
1001 do concurrent(e = 1:c%msh%nelv)
1002 do concurrent(i = 1:lxyz)
1003 c%G11(i,1,1,e) = c%G11(i,1,1,e) * c%Xh%w3(i,1,1)
1004 c%G22(i,1,1,e) = c%G22(i,1,1,e) * c%Xh%w3(i,1,1)
1005 c%G12(i,1,1,e) = c%G12(i,1,1,e) * c%Xh%w3(i,1,1)
1006 end do
1007 end do
1008
1009 else
1010 !$omp parallel private(i)
1011 !$omp do
1012 do i = 1, ntot
1013 c%G11(i, 1, 1, 1) = c%drdx(i, 1, 1, 1) * c%drdx(i, 1, 1, 1) &
1014 + c%drdy(i, 1, 1, 1) * c%drdy(i, 1, 1, 1) &
1015 + c%drdz(i, 1, 1, 1) * c%drdz(i, 1, 1, 1)
1016
1017 c%G22(i, 1, 1, 1) = c%dsdx(i, 1, 1, 1) * c%dsdx(i, 1, 1, 1) &
1018 + c%dsdy(i, 1, 1, 1) * c%dsdy(i, 1, 1, 1) &
1019 + c%dsdz(i, 1, 1, 1) * c%dsdz(i, 1, 1, 1)
1020
1021 c%G33(i, 1, 1, 1) = c%dtdx(i, 1, 1, 1) * c%dtdx(i, 1, 1, 1) &
1022 + c%dtdy(i, 1, 1, 1) * c%dtdy(i, 1, 1, 1) &
1023 + c%dtdz(i, 1, 1, 1) * c%dtdz(i, 1, 1, 1)
1024 end do
1025 !$omp end do
1026 !$omp do
1027 do i = 1, ntot
1028 c%G11(i, 1, 1, 1) = c%G11(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
1029 c%G22(i, 1, 1, 1) = c%G22(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
1030 c%G33(i, 1, 1, 1) = c%G33(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
1031 end do
1032 !$omp end do
1033 !$omp do
1034 do i = 1, ntot
1035 c%G12(i, 1, 1, 1) = c%drdx(i, 1, 1, 1) * c%dsdx(i, 1, 1, 1) &
1036 + c%drdy(i, 1, 1, 1) * c%dsdy(i, 1, 1, 1) &
1037 + c%drdz(i, 1, 1, 1) * c%dsdz(i, 1, 1, 1)
1038
1039 c%G13(i, 1, 1, 1) = c%drdx(i, 1, 1, 1) * c%dtdx(i, 1, 1, 1) &
1040 + c%drdy(i, 1, 1, 1) * c%dtdy(i, 1, 1, 1) &
1041 + c%drdz(i, 1, 1, 1) * c%dtdz(i, 1, 1, 1)
1042
1043 c%G23(i, 1, 1, 1) = c%dsdx(i, 1, 1, 1) * c%dtdx(i, 1, 1, 1) &
1044 + c%dsdy(i, 1, 1, 1) * c%dtdy(i, 1, 1, 1) &
1045 + c%dsdz(i, 1, 1, 1) * c%dtdz(i, 1, 1, 1)
1046 end do
1047 !$omp end do
1048 !$omp do
1049 do i = 1, ntot
1050 c%G12(i, 1, 1, 1) = c%G12(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
1051 c%G13(i, 1, 1, 1) = c%G13(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
1052 c%G23(i, 1, 1, 1) = c%G23(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
1053 end do
1054 !$omp end do
1055 !$omp do
1056 do e = 1, c%msh%nelv
1057 do concurrent(i = 1:lxyz)
1058 c%G11(i,1,1,e) = c%G11(i,1,1,e) * c%Xh%w3(i,1,1)
1059 c%G22(i,1,1,e) = c%G22(i,1,1,e) * c%Xh%w3(i,1,1)
1060 c%G12(i,1,1,e) = c%G12(i,1,1,e) * c%Xh%w3(i,1,1)
1061
1062 c%G33(i,1,1,e) = c%G33(i,1,1,e) * c%Xh%w3(i,1,1)
1063 c%G13(i,1,1,e) = c%G13(i,1,1,e) * c%Xh%w3(i,1,1)
1064 c%G23(i,1,1,e) = c%G23(i,1,1,e) * c%Xh%w3(i,1,1)
1065 end do
1066 end do
1067 !$omp end do
1068 !$omp end parallel
1069 end if
1070 end if
1071
1072 end subroutine coef_generate_geo
1073
1077 type(coef_t), intent(inout) :: c
1078 integer :: e, i, lxyz, ntot
1079
1080 lxyz = c%Xh%lx * c%Xh%ly * c%Xh%lz
1081 ntot = c%dof%size()
1082
1084 do concurrent(e = 1:c%msh%nelv)
1085 ! Here we need to handle things differently for axis symmetric elements
1086 do concurrent(i = 1:lxyz)
1087 c%B(i,1,1,e) = c%jac(i,1,1,e) * c%Xh%w3(i,1,1)
1088 c%Binv(i,1,1,e) = c%B(i,1,1,e)
1089 end do
1090 end do
1091
1092 if (neko_bcknd_device .eq. 1) then
1093 call device_memcpy(c%B, c%B_d, ntot, host_to_device, &
1094 sync = .false.)
1095 call device_memcpy(c%Binv, c%Binv_d, ntot, host_to_device, &
1096 sync = .false.)
1097 end if
1098
1099 call c%gs_h%op(c%Binv, ntot, gs_op_add)
1100
1101 if (neko_bcknd_device .eq. 1) then
1102 call device_invcol1(c%Binv_d, ntot)
1103 call device_memcpy(c%Binv, c%Binv_d, ntot, device_to_host, &
1104 sync = .true.)
1105 else
1106 call invcol1(c%Binv, ntot)
1107 end if
1108
1110 if (neko_bcknd_device .eq. 1) then
1111 c%volume = device_glsum(c%B_d, ntot)
1112 else
1113 c%volume = glsum(c%B, ntot)
1114 end if
1115
1116 end subroutine coef_generate_mass
1117
1118 pure function coef_get_normal(this, i, j, k, e, facet) result(normal)
1119 class(coef_t), intent(in) :: this
1120 integer, intent(in) :: i, j, k, e, facet
1121 real(kind=rp) :: normal(3)
1122
1123 select case (facet)
1124 case(1,2)
1125 normal(1) = this%nx(j, k, facet, e)
1126 normal(2) = this%ny(j, k, facet, e)
1127 normal(3) = this%nz(j, k, facet, e)
1128 case(3,4)
1129 normal(1) = this%nx(i, k, facet, e)
1130 normal(2) = this%ny(i, k, facet, e)
1131 normal(3) = this%nz(i, k, facet, e)
1132 case(5,6)
1133 normal(1) = this%nx(i, j, facet, e)
1134 normal(2) = this%ny(i, j, facet, e)
1135 normal(3) = this%nz(i, j, facet, e)
1136 end select
1137 end function coef_get_normal
1138
1139 pure function coef_get_area(this, i, j, k, e, facet) result(area)
1140 class(coef_t), intent(in) :: this
1141 integer, intent(in) :: i, j, k, e, facet
1142 real(kind=rp) :: area
1143
1144 select case (facet)
1145 case(1,2)
1146 area = this%area(j, k, facet, e)
1147 case(3,4)
1148 area = this%area(i, k, facet, e)
1149 case(5,6)
1150 area = this%area(i, j, facet, e)
1151 end select
1152 end function coef_get_area
1153
1154
1157 type(coef_t), intent(inout) :: coef
1158 real(kind=rp), allocatable :: a(:,:,:,:)
1159 real(kind=rp), allocatable :: b(:,:,:,:)
1160 real(kind=rp), allocatable :: c(:,:,:,:)
1161 real(kind=rp), allocatable :: dot(:,:,:,:)
1162 integer :: n, e, i, j, k, lx
1163 real(kind=rp) :: weight, len
1164 n = coef%dof%size()
1165 lx = coef%Xh%lx
1166
1167 allocate(a(coef%Xh%lx, coef%Xh%lx, coef%Xh%lx, coef%msh%nelv))
1168 allocate(b(coef%Xh%lx, coef%Xh%lx, coef%Xh%lx, coef%msh%nelv))
1169 allocate(c(coef%Xh%lx, coef%Xh%lx, coef%Xh%lx, coef%msh%nelv))
1170 allocate(dot(coef%Xh%lx, coef%Xh%lx, coef%Xh%lx, coef%msh%nelv))
1171
1172 !$omp parallel private (e, i, j, k, weight, len)
1173
1174 ! ds x dt
1175 !$omp do simd
1176 do i = 1, n
1177 a(i, 1, 1, 1) = coef%dyds(i, 1, 1, 1) * coef%dzdt(i, 1, 1, 1) &
1178 - coef%dzds(i, 1, 1, 1) * coef%dydt(i, 1, 1, 1)
1179
1180 b(i, 1, 1, 1) = coef%dzds(i, 1, 1, 1) * coef%dxdt(i, 1, 1, 1) &
1181 - coef%dxds(i, 1, 1, 1) * coef%dzdt(i, 1, 1, 1)
1182
1183 c(i, 1, 1, 1) = coef%dxds(i, 1, 1, 1) * coef%dydt(i, 1, 1, 1) &
1184 - coef%dyds(i, 1, 1, 1) * coef%dxdt(i, 1, 1, 1)
1185 end do
1186 !$omp end do simd
1187 !$omp do simd
1188 do i = 1, n
1189 dot(i, 1, 1, 1) = a(i, 1, 1, 1) * a(i, 1, 1, 1) &
1190 + b(i, 1, 1, 1) * b(i, 1, 1, 1) &
1191 + c(i, 1, 1, 1) * c(i, 1, 1, 1)
1192 end do
1193 !$omp end do simd
1194 !$omp do
1195 do e = 1, coef%msh%nelv
1196 do concurrent(k = 1:coef%Xh%lx)
1197 do concurrent(j = 1:coef%Xh%lx)
1198 weight = coef%Xh%wy(j) * coef%Xh%wz(k)
1199 coef%area(j, k, 2, e) = sqrt(dot(lx, j, k, e)) * weight
1200 coef%area(j, k, 1, e) = sqrt(dot(1, j, k, e)) * weight
1201 coef%nx(j,k, 1, e) = -a(1, j, k, e)
1202 coef%nx(j,k, 2, e) = a(lx, j, k, e)
1203 coef%ny(j,k, 1, e) = -b(1, j, k, e)
1204 coef%ny(j,k, 2, e) = b(lx, j, k, e)
1205 coef%nz(j,k, 1, e) = -c(1, j, k, e)
1206 coef%nz(j,k, 2, e) = c(lx, j, k, e)
1207 end do
1208 end do
1209 end do
1210 !$omp end do
1211
1212 ! dr x dt
1213 !$omp do simd
1214 do i = 1, n
1215 a(i, 1, 1, 1) = coef%dydr(i, 1, 1, 1) * coef%dzdt(i, 1, 1, 1) &
1216 - coef%dzdr(i, 1, 1, 1) * coef%dydt(i, 1, 1, 1)
1217
1218 b(i, 1, 1, 1) = coef%dzdr(i, 1, 1, 1) * coef%dxdt(i, 1, 1, 1) &
1219 - coef%dxdr(i, 1, 1, 1) * coef%dzdt(i, 1, 1, 1)
1220
1221 c(i, 1, 1, 1) = coef%dxdr(i, 1, 1, 1) * coef%dydt(i, 1, 1, 1) &
1222 - coef%dydr(i, 1, 1, 1) * coef%dxdt(i, 1, 1, 1)
1223 end do
1224 !$omp end do simd
1225 !$omp do simd
1226 do i = 1, n
1227 dot(i, 1, 1, 1) = a(i, 1, 1, 1) * a(i, 1, 1, 1) &
1228 + b(i, 1, 1, 1) * b(i, 1, 1, 1) &
1229 + c(i, 1, 1, 1) * c(i, 1, 1, 1)
1230 end do
1231 !$omp end do simd
1232 !$omp do
1233 do e = 1, coef%msh%nelv
1234 do concurrent(k = 1:coef%Xh%lx)
1235 do concurrent(j = 1:coef%Xh%lx)
1236 weight = coef%Xh%wx(j) * coef%Xh%wz(k)
1237 coef%area(j, k, 3, e) = sqrt(dot(j, 1, k, e)) * weight
1238 coef%area(j, k, 4, e) = sqrt(dot(j, lx, k, e)) * weight
1239 coef%nx(j,k, 3, e) = a(j, 1, k, e)
1240 coef%nx(j,k, 4, e) = -a(j, lx, k, e)
1241 coef%ny(j,k, 3, e) = b(j, 1, k, e)
1242 coef%ny(j,k, 4, e) = -b(j, lx, k, e)
1243 coef%nz(j,k, 3, e) = c(j, 1, k, e)
1244 coef%nz(j,k, 4, e) = -c(j, lx, k, e)
1245 end do
1246 end do
1247 end do
1248 !$omp end do
1249 ! dr x ds
1250 !$omp do simd
1251 do i = 1, n
1252 a(i, 1, 1, 1) = coef%dydr(i, 1, 1, 1) * coef%dzds(i, 1, 1, 1) &
1253 - coef%dzdr(i, 1, 1, 1) * coef%dyds(i, 1, 1, 1)
1254
1255 b(i, 1, 1, 1) = coef%dzdr(i, 1, 1, 1) * coef%dxds(i, 1, 1, 1) &
1256 - coef%dxdr(i, 1, 1, 1) * coef%dzds(i, 1, 1, 1)
1257
1258 c(i, 1, 1, 1) = coef%dxdr(i, 1, 1, 1) * coef%dyds(i, 1, 1, 1) &
1259 - coef%dydr(i, 1, 1, 1) * coef%dxds(i, 1, 1, 1)
1260 end do
1261 !$omp end do simd
1262 !$omp do simd
1263 do i = 1, n
1264 dot(i, 1, 1, 1) = a(i, 1, 1, 1) * a(i, 1, 1, 1) &
1265 + b(i, 1, 1, 1) * b(i, 1, 1, 1) &
1266 + c(i, 1, 1, 1) * c(i, 1, 1, 1)
1267 end do
1268 !$omp end do simd
1269 !$omp do
1270 do e = 1, coef%msh%nelv
1271 do concurrent(k = 1:coef%Xh%lx)
1272 do concurrent(j = 1:coef%Xh%lx)
1273 weight = coef%Xh%wx(j) * coef%Xh%wy(k)
1274 coef%area(j, k, 5, e) = sqrt(dot(j, k, 1, e)) * weight
1275 coef%area(j, k, 6, e) = sqrt(dot(j, k, lx, e)) * weight
1276 coef%nx(j,k, 5, e) = -a(j, k, 1, e)
1277 coef%nx(j,k, 6, e) = a(j, k, lx, e)
1278 coef%ny(j,k, 5, e) = -b(j, k, 1, e)
1279 coef%ny(j,k, 6, e) = b(j, k, lx, e)
1280 coef%nz(j,k, 5, e) = -c(j, k, 1, e)
1281 coef%nz(j,k, 6, e) = c(j, k, lx, e)
1282 end do
1283 end do
1284 end do
1285 !$omp end do
1286 ! Normalize
1287 !$omp do
1288 do j = 1, size(coef%nz)
1289 len = sqrt(coef%nx(j,1,1,1)**2 + &
1290 coef%ny(j,1,1,1)**2 + coef%nz(j,1,1,1)**2)
1291 if (len .gt. neko_eps) then
1292 coef%nx(j,1,1,1) = coef%nx(j,1,1,1) / len
1293 coef%ny(j,1,1,1) = coef%ny(j,1,1,1) / len
1294 coef%nz(j,1,1,1) = coef%nz(j,1,1,1) / len
1295 end if
1296 end do
1297 !$omp end do
1298 !$omp end parallel
1299
1300 deallocate(dot)
1301 deallocate(c)
1302 deallocate(b)
1303 deallocate(a)
1305 if (neko_bcknd_device .eq. 1) then
1306 n = size(coef%area)
1307 call device_memcpy(coef%area, coef%area_d, n, &
1308 host_to_device, sync = .false.)
1309 call device_memcpy(coef%nx, coef%nx_d, n, &
1310 host_to_device, sync = .false.)
1311 call device_memcpy(coef%ny, coef%ny_d, n, &
1312 host_to_device, sync = .false.)
1313 call device_memcpy(coef%nz, coef%nz_d, n, &
1314 host_to_device, sync = .false.)
1315 end if
1316
1317 end subroutine coef_generate_area_and_normal
1318
1320 class(coef_t), intent(inout) :: this
1321 real(kind=rp) :: un(3), len, d
1322 integer :: lx, ly, lz, np, np_glb, ierr
1323 integer :: i, j, k, pf, pe, n, nc, ncyc
1324
1325 if (.not. this%cyclic) return
1326
1327 np = this%msh%periodic%size
1328 call mpi_allreduce(np, np_glb, 1, &
1329 mpi_integer, mpi_sum, neko_comm, ierr)
1330
1331 if (np_glb .eq. 0) then
1332 call neko_error("There are no periodic boundaries. " // &
1333 "Switch cyclic off in the case file.")
1334 end if
1335
1336 if (np .eq. 0) return
1337
1338 lx = this%Xh%lx
1339 ly = this%Xh%ly
1340 lz = this%Xh%lz
1341 ncyc = this%cyc_msk(0) - 1
1342 nc = 1
1343 do n = 1, np
1344 pf = this%msh%periodic%facet_el(n)%x(1)
1345 pe = this%msh%periodic%facet_el(n)%x(2)
1346 do k = 1, lz
1347 do j = 1, ly
1348 do i = 1, lx
1349 if (index_is_on_facet(i, j, k, lx, ly, lz, pf)) then
1350 un = this%get_normal(i, j, k, pe, pf)
1351 len = sqrt(un(1) * un(1) + un(2) * un(2))
1352 if (len .gt. neko_eps) then
1353 d = this%dof%y(i, j, k, pe) * un(1) &
1354 - this%dof%x(i, j, k, pe) * un(2)
1355
1356 this%cyc_msk(nc) = linear_index(i, j, k, pe, lx, ly, lz)
1357 this%R11(nc) = un(1) / len * sign(1.0_rp, d)
1358 this%R12(nc) = un(2) / len * sign(1.0_rp, d)
1359 nc = nc + 1
1360 else
1361 call neko_error("x and y components of surface " // &
1362 "normals are zero. Cyclic rotations must be " // &
1363 "around z-axis.")
1364 end if
1365 end if
1366 end do
1367 end do
1368 end do
1369 end do
1370
1371 if (nc - 1 /= ncyc) then
1372 call neko_error("The number of cyclic GLL points were " // &
1373 "not estimated correctly.")
1374 end if
1375
1376 if (neko_bcknd_device .eq. 1) then
1377 call device_memcpy(this%cyc_msk, this%cyc_msk_d, ncyc+1, &
1378 host_to_device, sync = .false.)
1379 call device_memcpy(this%R11, this%R11_d, ncyc, &
1380 host_to_device, sync = .false.)
1381 call device_memcpy(this%R12, this%R12_d, ncyc, &
1382 host_to_device, sync = .false.)
1383 end if
1384
1385 end subroutine coef_generate_cyclic_bc
1386
1387
1389 subroutine coef_recompute_metrics(this)
1390 class(coef_t), intent(inout) :: this
1391
1392 call coef_generate_dxyzdrst(this)
1393 call coef_generate_geo(this)
1395 call coef_generate_mass(this)
1396 if (this%cyclic) then
1397 call coef_generate_cyclic_bc(this)
1398 end if
1399 end subroutine coef_recompute_metrics
1400
1401
1405 class(coef_t), intent(inout), target :: this
1406 integer :: n
1407
1408 ! Return if already allocated distinctly
1409 if (.not. associated(this%Blag, this%B)) return
1410
1411 n = this%Xh%lx * this%Xh%ly * this%Xh%lz * this%msh%nelv
1412
1413
1414 nullify(this%Blag)
1415 nullify(this%Blaglag)
1416
1417 allocate(this%Blag(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
1418 allocate(this%Blaglag(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
1419
1420 this%Blag = this%B
1421 this%Blaglag = this%B
1422
1423 if (neko_bcknd_device .eq. 1) then
1424
1425 this%Blag_d = c_null_ptr
1426 this%Blaglag_d = c_null_ptr
1427
1428 call device_map(this%Blag, this%Blag_d, n)
1429 call device_map(this%Blaglag, this%Blaglag_d, n)
1430
1431 call device_memcpy(this%Blag, this%Blag_d, n, &
1432 host_to_device, sync = .false.)
1433 call device_memcpy(this%Blaglag, this%Blaglag_d, n, &
1434 host_to_device, sync = .true.)
1435 end if
1436
1437 end subroutine coef_enable_lagged_mass
1438
1439
1442 class(coef_t), intent(inout), target :: this
1443 integer :: n
1444 integer(c_size_t) :: n_bytes
1445
1446 ! If this%Blag does not have separate memory, we don't need to update it.
1447 if (associated(this%Blag, this%B)) return
1448
1449 this%Blaglag = this%Blag
1450 this%Blag = this%B
1451
1452 if (neko_bcknd_device .eq. 1) then
1453 n = this%Xh%lx * this%Xh%ly * this%Xh%lz * this%msh%nelv
1454 if (rp .eq. real32) then
1455 n_bytes = int(n, c_size_t) * 4_c_size_t
1456 else
1457 n_bytes = int(n, c_size_t) * 8_c_size_t
1458 end if
1459
1460 call device_memcpy(this%Blaglag_d, this%Blag_d, n_bytes, &
1461 device_to_device, sync = .false.)
1462
1463 call device_memcpy(this%Blag_d, this%B_d, n_bytes, &
1464 device_to_device, sync = .true.)
1465 end if
1466
1467 end subroutine coef_update_lagged_mass
1468
1469end module coefs
Map a Fortran array to a device (allocate and associate)
Definition device.F90:77
Copy data between host and device (or device and device)
Definition device.F90:71
Coefficients.
Definition coef.f90:34
subroutine coef_generate_geo(c)
Generate geometric data for the given mesh.
Definition coef.f90:955
subroutine coef_free(this)
Deallocate coefficients.
Definition coef.f90:425
subroutine coef_recompute_metrics(this)
Recompute and update geometric factors (ALE)
Definition coef.f90:1390
pure real(kind=rp) function coef_get_area(this, i, j, k, e, facet)
Definition coef.f90:1140
pure real(kind=rp) function, dimension(3) coef_get_normal(this, i, j, k, e, facet)
Definition coef.f90:1119
subroutine coef_update_lagged_mass(this)
Update history: Blaglag = Blag, Blag = B.
Definition coef.f90:1442
subroutine coef_generate_dxyzdrst(c)
Definition coef.f90:769
subroutine coef_generate_area_and_normal(coef)
Generate facet area and surface normals.
Definition coef.f90:1157
subroutine coef_init_empty(this, xh, msh)
Initialize empty coefs for a space and a mesh.
Definition coef.f90:182
subroutine coef_init_all(this, gs_h)
Initialize coefficients.
Definition coef.f90:228
subroutine coef_enable_lagged_mass(this)
Enable separate memory for lagged B matrices if needed. For eg. when mesh moves.
Definition coef.f90:1405
subroutine coef_generate_cyclic_bc(this)
Definition coef.f90:1320
subroutine coef_generate_mass(c)
Generate mass matrix B for the given mesh and space.
Definition coef.f90:1077
Definition comm.F90:1
type(mpi_comm), public neko_comm
MPI communicator.
Definition comm.F90:43
subroutine, public device_coef_generate_dxydrst(drdx_d, drdy_d, drdz_d, dsdx_d, dsdy_d, dsdz_d, dtdx_d, dtdy_d, dtdz_d, dxdr_d, dydr_d, dzdr_d, dxds_d, dyds_d, dzds_d, dxdt_d, dydt_d, dzdt_d, dx_d, dy_d, dz_d, x_d, y_d, z_d, jacinv_d, jac_d, lx, nel)
subroutine, public device_coef_generate_geo(g11_d, g12_d, g13_d, g22_d, g23_d, g33_d, drdx_d, drdy_d, drdz_d, dsdx_d, dsdy_d, dsdz_d, dtdx_d, dtdy_d, dtdz_d, jacinv_d, w3_d, nel, lx, gdim)
real(kind=rp) function, public device_glsum(a_d, n, strm)
Sum a vector of length n.
subroutine, public device_invcol1(a_d, n, strm)
Invert a vector .
subroutine, public device_rone(a_d, n, strm)
Set all elements to one.
Device abstraction, common interface for various accelerators.
Definition device.F90:34
integer, parameter, public host_to_device
Definition device.F90:47
subroutine, public device_free(x_d)
Deallocate memory on the device.
Definition device.F90:225
integer, parameter, public device_to_host
Definition device.F90:47
Defines a mapping of the degrees of freedom.
Definition dofmap.f90:35
Gather-scatter.
Defines Gather-scatter operations.
Definition gs_ops.f90:34
integer, parameter, public gs_op_add
Definition gs_ops.f90:36
Definition math.f90:60
subroutine, public invers2(a, b, n)
Compute inverted vector .
Definition math.f90:640
subroutine, public subcol3(a, b, c, n)
Returns .
Definition math.f90:882
subroutine, public rone(a, n)
Set all elements to one.
Definition math.f90:239
real(kind=rp) function, public glsum(a, n)
Sum a vector of length n.
Definition math.f90:500
subroutine, public addcol3(a, b, c, n)
Returns .
Definition math.f90:959
subroutine, public invcol1(a, n)
Invert a vector .
Definition math.f90:615
subroutine, public chsign(a, n)
Change sign of vector .
Definition math.f90:580
subroutine, public copy(a, b, n)
Copy a vector .
Definition math.f90:250
real(kind=rp), parameter, public neko_eps
Machine epsilon .
Definition math.f90:69
subroutine, public rzero(a, n)
Zero a real vector.
Definition math.f90:206
Defines a mesh.
Definition mesh.f90:34
Wrapper for all matrix-matrix product implementations.
subroutine, public mxm(a, n1, b, n2, c, n3)
Compute matrix-matrix product for contiguously packed matrices A,B, and C.
Build configurations.
integer, parameter neko_bcknd_device
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Defines a function space.
Definition space.f90:34
Utilities.
Definition utils.f90:35
pure logical function, public index_is_on_facet(i, j, k, lx, ly, lz, facet)
Definition utils.f90:262
pure integer function, public linear_index(i, j, k, l, lx, ly, lz)
Compute the address of a (i,j,k,l) array with sizes (1:lx, 1:ly, 1:lz, :)
Definition utils.f90:237
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition coef.f90:62
The function space for the SEM solution fields.
Definition space.f90:63