Neko 1.99.3
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
coef.f90
Go to the documentation of this file.
1! Copyright (c) 2020-2024, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
34module coefs
35 use gather_scatter, only : gs_t
36 use gs_ops, only : gs_op_add
38 use num_types, only : rp
39 use dofmap, only : dofmap_t
40 use space, only: space_t
41 use math, only : rone, invcol1, addcol3, subcol3, copy, &
43 use mesh, only : mesh_t
48 use mxm_wrapper, only : mxm
49 use device
52 use comm, only : neko_comm
54 use mpi_f08, only : mpi_allreduce, mpi_integer, mpi_sum
55 use, intrinsic :: iso_c_binding
56 implicit none
57 private
58
61 type, public :: coef_t
63 real(kind=rp), allocatable :: g11(:,:,:,:)
65 real(kind=rp), allocatable :: g22(:,:,:,:)
67 real(kind=rp), allocatable :: g33(:,:,:,:)
69 real(kind=rp), allocatable :: g12(:,:,:,:)
71 real(kind=rp), allocatable :: g13(:,:,:,:)
73 real(kind=rp), allocatable :: g23(:,:,:,:)
74
75 real(kind=rp), allocatable :: mult(:,:,:,:)
80 real(kind=rp), allocatable :: dxdr(:,:,:,:), dydr(:,:,:,:), dzdr(:,:,:,:)
81 real(kind=rp), allocatable :: dxds(:,:,:,:), dyds(:,:,:,:), dzds(:,:,:,:)
82 real(kind=rp), allocatable :: dxdt(:,:,:,:), dydt(:,:,:,:), dzdt(:,:,:,:)
86 real(kind=rp), allocatable :: drdx(:,:,:,:), drdy(:,:,:,:), drdz(:,:,:,:)
87 real(kind=rp), allocatable :: dsdx(:,:,:,:), dsdy(:,:,:,:), dsdz(:,:,:,:)
88 real(kind=rp), allocatable :: dtdx(:,:,:,:), dtdy(:,:,:,:), dtdz(:,:,:,:)
89
90 real(kind=rp), allocatable :: h1(:,:,:,:)
91 real(kind=rp), allocatable :: h2(:,:,:,:)
92 logical :: ifh2
93
94 real(kind=rp), allocatable :: jac(:,:,:,:)
95 real(kind=rp), allocatable :: jacinv(:,:,:,:)
96 real(kind=rp), allocatable :: b(:,:,:,:)
97 real(kind=rp), allocatable :: binv(:,:,:,:)
98
99 real(kind=rp), allocatable :: area(:,:,:,:)
100 real(kind=rp), allocatable :: nx(:,:,:,:)
101 real(kind=rp), allocatable :: ny(:,:,:,:)
102 real(kind=rp), allocatable :: nz(:,:,:,:)
103 logical :: cyclic = .false.
104 integer, allocatable :: cyc_msk(:)
105 real(kind=rp), allocatable :: r11(:)
106 real(kind=rp), allocatable :: r12(:)
108
109 real(kind=rp) :: volume
110
111 type(space_t), pointer :: xh => null()
112 type(mesh_t), pointer :: msh => null()
113 type(dofmap_t), pointer :: dof => null()
114 type(gs_t), pointer :: gs_h=> null()
115
116 !
117 ! Device pointers (if present)
118 !
119
120 type(c_ptr) :: g11_d = c_null_ptr
121 type(c_ptr) :: g22_d = c_null_ptr
122 type(c_ptr) :: g33_d = c_null_ptr
123 type(c_ptr) :: g12_d = c_null_ptr
124 type(c_ptr) :: g13_d = c_null_ptr
125 type(c_ptr) :: g23_d = c_null_ptr
126 type(c_ptr) :: dxdr_d = c_null_ptr
127 type(c_ptr) :: dydr_d = c_null_ptr
128 type(c_ptr) :: dzdr_d = c_null_ptr
129 type(c_ptr) :: dxds_d = c_null_ptr
130 type(c_ptr) :: dyds_d = c_null_ptr
131 type(c_ptr) :: dzds_d = c_null_ptr
132 type(c_ptr) :: dxdt_d = c_null_ptr
133 type(c_ptr) :: dydt_d = c_null_ptr
134 type(c_ptr) :: dzdt_d = c_null_ptr
135 type(c_ptr) :: drdx_d = c_null_ptr
136 type(c_ptr) :: drdy_d = c_null_ptr
137 type(c_ptr) :: drdz_d = c_null_ptr
138 type(c_ptr) :: dsdx_d = c_null_ptr
139 type(c_ptr) :: dsdy_d = c_null_ptr
140 type(c_ptr) :: dsdz_d = c_null_ptr
141 type(c_ptr) :: dtdx_d = c_null_ptr
142 type(c_ptr) :: dtdy_d = c_null_ptr
143 type(c_ptr) :: dtdz_d = c_null_ptr
144 type(c_ptr) :: mult_d = c_null_ptr
145 type(c_ptr) :: h1_d = c_null_ptr
146 type(c_ptr) :: h2_d = c_null_ptr
147 type(c_ptr) :: jac_d = c_null_ptr
148 type(c_ptr) :: jacinv_d = c_null_ptr
149 type(c_ptr) :: b_d = c_null_ptr
150 type(c_ptr) :: binv_d = c_null_ptr
151 type(c_ptr) :: area_d = c_null_ptr
152 type(c_ptr) :: nx_d = c_null_ptr
153 type(c_ptr) :: ny_d = c_null_ptr
154 type(c_ptr) :: nz_d = c_null_ptr
155 type(c_ptr) :: cyc_msk_d = c_null_ptr
156 type(c_ptr) :: r11_d = c_null_ptr
157 type(c_ptr) :: r12_d = c_null_ptr
158
159
160
161 contains
162 procedure, private, pass(this) :: init_empty => coef_init_empty
163 procedure, private, pass(this) :: init_all => coef_init_all
164 procedure, pass(this) :: free => coef_free
165 procedure, pass(this) :: get_normal => coef_get_normal
166 procedure, pass(this) :: get_area => coef_get_area
167 procedure, pass(this) :: generate_cyclic_bc => coef_generate_cyclic_bc
168 generic :: init => init_empty, init_all
169 end type coef_t
170
171contains
172
174 subroutine coef_init_empty(this, Xh, msh)
175 class(coef_t), intent(inout) :: this
176 type(space_t), intent(inout), target :: Xh
177 type(mesh_t), intent(inout), target :: msh
178 integer :: n
179 call this%free()
180 this%msh => msh
181 this%Xh => xh
182
183 allocate(this%drdx(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
184 allocate(this%dsdx(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
185 allocate(this%dtdx(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
186
187 allocate(this%drdy(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
188 allocate(this%dsdy(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
189 allocate(this%dtdy(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
190
191 allocate(this%drdz(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
192 allocate(this%dsdz(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
193 allocate(this%dtdz(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
194
195
196 !
197 ! Setup device memory (if present)
198 !
199
200 n = this%Xh%lx * this%Xh%ly * this%Xh%lz * this%msh%nelv
201 if (neko_bcknd_device .eq. 1) then
202
203 call device_map(this%drdx, this%drdx_d, n)
204 call device_map(this%drdy, this%drdy_d, n)
205 call device_map(this%drdz, this%drdz_d, n)
206
207 call device_map(this%dsdx, this%dsdx_d, n)
208 call device_map(this%dsdy, this%dsdy_d, n)
209 call device_map(this%dsdz, this%dsdz_d, n)
210
211 call device_map(this%dtdx, this%dtdx_d, n)
212 call device_map(this%dtdy, this%dtdy_d, n)
213 call device_map(this%dtdz, this%dtdz_d, n)
214
215 end if
216
217 end subroutine coef_init_empty
218
220 subroutine coef_init_all(this, gs_h)
221 class(coef_t), intent(inout) :: this
222 type(gs_t), intent(inout), target :: gs_h
223 integer :: n, m, ncyc
224 call this%free()
225
226 this%msh => gs_h%dofmap%msh
227 this%Xh => gs_h%dofmap%Xh
228 this%dof => gs_h%dofmap
229 this%gs_h => gs_h
230
231 !
232 ! Allocate arrays for geometric data
233 !
235 allocate(this%G11(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
236 allocate(this%G22(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
237 allocate(this%G33(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
238 allocate(this%G12(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
239 allocate(this%G13(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
240 allocate(this%G23(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
241
242 allocate(this%dxdr(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
243 allocate(this%dxds(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
244 allocate(this%dxdt(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
245
246 allocate(this%dydr(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
247 allocate(this%dyds(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
248 allocate(this%dydt(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
249
250 allocate(this%dzdr(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
251 allocate(this%dzds(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
252 allocate(this%dzdt(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
253
254 allocate(this%drdx(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
255 allocate(this%dsdx(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
256 allocate(this%dtdx(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
257
258 allocate(this%drdy(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
259 allocate(this%dsdy(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
260 allocate(this%dtdy(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
261
262 allocate(this%drdz(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
263 allocate(this%dsdz(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
264 allocate(this%dtdz(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
265
266 allocate(this%jac(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
267 allocate(this%jacinv(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
268
269 allocate(this%area(this%Xh%lx, this%Xh%ly, 6, this%msh%nelv))
270 allocate(this%nx(this%Xh%lx, this%Xh%ly, 6, this%msh%nelv))
271 allocate(this%ny(this%Xh%lx, this%Xh%ly, 6, this%msh%nelv))
272 allocate(this%nz(this%Xh%lx, this%Xh%ly, 6, this%msh%nelv))
273
274 allocate(this%B(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
275 allocate(this%Binv(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
276
277 allocate(this%h1(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
278 allocate(this%h2(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
279
280 allocate(this%mult(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
281
282
283 !
284 ! Setup device memory (if present)
285 !
286
287 n = this%Xh%lx * this%Xh%ly * this%Xh%lz * this%msh%nelv
288 if (neko_bcknd_device .eq. 1) then
289 call device_map(this%G11, this%G11_d, n)
290 call device_map(this%G22, this%G22_d, n)
291 call device_map(this%G33, this%G33_d, n)
292 call device_map(this%G12, this%G12_d, n)
293 call device_map(this%G13, this%G13_d, n)
294 call device_map(this%G23, this%G23_d, n)
295
296 call device_map(this%dxdr, this%dxdr_d, n)
297 call device_map(this%dydr, this%dydr_d, n)
298 call device_map(this%dzdr, this%dzdr_d, n)
299
300 call device_map(this%dxds, this%dxds_d, n)
301 call device_map(this%dyds, this%dyds_d, n)
302 call device_map(this%dzds, this%dzds_d, n)
303
304 call device_map(this%dxdt, this%dxdt_d, n)
305 call device_map(this%dydt, this%dydt_d, n)
306 call device_map(this%dzdt, this%dzdt_d, n)
307
308 call device_map(this%drdx, this%drdx_d, n)
309 call device_map(this%drdy, this%drdy_d, n)
310 call device_map(this%drdz, this%drdz_d, n)
311
312 call device_map(this%dsdx, this%dsdx_d, n)
313 call device_map(this%dsdy, this%dsdy_d, n)
314 call device_map(this%dsdz, this%dsdz_d, n)
315
316 call device_map(this%dtdx, this%dtdx_d, n)
317 call device_map(this%dtdy, this%dtdy_d, n)
318 call device_map(this%dtdz, this%dtdz_d, n)
319
320 call device_map(this%mult, this%mult_d, n)
321 call device_map(this%h1, this%h1_d, n)
322 call device_map(this%h2, this%h2_d, n)
323
324 call device_map(this%jac, this%jac_d, n)
325 call device_map(this%jacinv, this%jacinv_d, n)
326 call device_map(this%B, this%B_d, n)
327 call device_map(this%Binv, this%Binv_d, n)
328
329 m = this%Xh%lx * this%Xh%ly * 6 * this%msh%nelv
330
331 call device_map(this%area, this%area_d, m)
332 call device_map(this%nx, this%nx_d, m)
333 call device_map(this%ny, this%ny_d, m)
334 call device_map(this%nz, this%nz_d, m)
335
336 end if
337
338 call coef_generate_dxyzdrst(this)
339
340 call coef_generate_geo(this)
341
343
344 call coef_generate_mass(this)
345
346
347 ! This is a placeholder, just for now
348 ! We can probably find a prettier solution
349 if (neko_bcknd_device .eq. 1) then
350 call device_rone(this%h1_d, n)
351 call device_rone(this%h2_d, n)
352 call device_memcpy(this%h1, this%h1_d, n, &
353 device_to_host, sync=.false.)
354 call device_memcpy(this%h2, this%h2_d, n, &
355 device_to_host, sync=.false.)
356 else
357 call rone(this%h1,n)
358 call rone(this%h2,n)
359 end if
360
361 this%ifh2 = .false.
362
363 !
364 ! Set up multiplicity
365 !
366 if (neko_bcknd_device .eq. 1) then
367 call device_rone(this%mult_d, n)
368 else
369 call rone(this%mult, n)
370 end if
371
372 call gs_h%op(this%mult, n, gs_op_add)
373
374 if (neko_bcknd_device .eq. 1) then
375 call device_invcol1(this%mult_d, n)
376 call device_memcpy(this%mult, this%mult_d, n, &
377 device_to_host, sync=.true.)
378 else
379 call invcol1(this%mult, n)
380 end if
381
382 ncyc = this%msh%periodic%size * this%Xh%lx * this%Xh%lx
383 allocate(this%cyc_msk(0:ncyc))
384 this%cyc_msk(0) = ncyc + 1
385 if (ncyc .gt. 0) then
386 allocate(this%R11(ncyc))
387 allocate(this%R12(ncyc))
388
390 call rone(this%R11, ncyc)
391 call rzero(this%R12, ncyc)
392
393 if (neko_bcknd_device .eq. 1) then
394 call device_map(this%cyc_msk, this%cyc_msk_d, ncyc+1)
395 call device_map(this%R11, this%R11_d, ncyc)
396 call device_map(this%R12, this%R12_d, ncyc)
397
398 call device_memcpy(this%cyc_msk, this%cyc_msk_d, ncyc+1, host_to_device, sync=.false.)
399 call device_memcpy(this%R11, this%R11_d, ncyc, host_to_device, sync=.false.)
400 call device_memcpy(this%R12, this%R12_d, ncyc, host_to_device, sync=.false.)
401 end if
402
403 end if
404 end subroutine coef_init_all
405
407 subroutine coef_free(this)
408 class(coef_t), intent(inout) :: this
409
410 if (allocated(this%G11)) then
411 deallocate(this%G11)
412 end if
413
414 if (allocated(this%G22)) then
415 deallocate(this%G22)
416 end if
417
418 if (allocated(this%G33)) then
419 deallocate(this%G33)
420 end if
421
422 if (allocated(this%G12)) then
423 deallocate(this%G12)
424 end if
425
426 if (allocated(this%G13)) then
427 deallocate(this%G13)
428 end if
429
430 if (allocated(this%G23)) then
431 deallocate(this%G23)
432 end if
433
434 if (allocated(this%mult)) then
435 deallocate(this%mult)
436 end if
437
438 if (allocated(this%B)) then
439 deallocate(this%B)
440 end if
441
442 if (allocated(this%Binv)) then
443 deallocate(this%Binv)
444 end if
445
446 if(allocated(this%dxdr)) then
447 deallocate(this%dxdr)
448 end if
449
450 if(allocated(this%dxds)) then
451 deallocate(this%dxds)
452 end if
453
454 if(allocated(this%dxdt)) then
455 deallocate(this%dxdt)
456 end if
457
458 if(allocated(this%dydr)) then
459 deallocate(this%dydr)
460 end if
461
462 if(allocated(this%dyds)) then
463 deallocate(this%dyds)
464 end if
465
466 if(allocated(this%dydt)) then
467 deallocate(this%dydt)
468 end if
469
470 if(allocated(this%dzdr)) then
471 deallocate(this%dzdr)
472 end if
473
474 if(allocated(this%dzds)) then
475 deallocate(this%dzds)
476 end if
477
478 if(allocated(this%dzdt)) then
479 deallocate(this%dzdt)
480 end if
481
482 if(allocated(this%drdx)) then
483 deallocate(this%drdx)
484 end if
485
486 if(allocated(this%dsdx)) then
487 deallocate(this%dsdx)
488 end if
489
490 if(allocated(this%dtdx)) then
491 deallocate(this%dtdx)
492 end if
493
494 if(allocated(this%drdy)) then
495 deallocate(this%drdy)
496 end if
497
498 if(allocated(this%dsdy)) then
499 deallocate(this%dsdy)
500 end if
501
502 if(allocated(this%dtdy)) then
503 deallocate(this%dtdy)
504 end if
505
506 if(allocated(this%drdz)) then
507 deallocate(this%drdz)
508 end if
509
510 if(allocated(this%dsdz)) then
511 deallocate(this%dsdz)
512 end if
513
514 if(allocated(this%dtdz)) then
515 deallocate(this%dtdz)
516 end if
517
518 if(allocated(this%jac)) then
519 deallocate(this%jac)
520 end if
521
522 if(allocated(this%jacinv)) then
523 deallocate(this%jacinv)
524 end if
525
526 if(allocated(this%h1)) then
527 deallocate(this%h1)
528 end if
529
530 if(allocated(this%h2)) then
531 deallocate(this%h2)
532 end if
533
534 if (allocated(this%area)) then
535 deallocate(this%area)
536 end if
537
538 if (allocated(this%nx)) then
539 deallocate(this%nx)
540 end if
541
542 if (allocated(this%ny)) then
543 deallocate(this%ny)
544 end if
545
546 if (allocated(this%nz)) then
547 deallocate(this%nz)
548 end if
549
550 if (allocated(this%cyc_msk)) then
551 deallocate(this%cyc_msk)
552 end if
553
554 if (allocated(this%R11)) then
555 deallocate(this%R11)
556 end if
557
558 if (allocated(this%R12)) then
559 deallocate(this%R12)
560 end if
561
562
563 nullify(this%msh)
564 nullify(this%Xh)
565 nullify(this%dof)
566 nullify(this%gs_h)
567
568 !
569 ! Cleanup the device (if present)
570 !
571
572 if (c_associated(this%G11_d)) then
573 call device_free(this%G11_d)
574 end if
575
576 if (c_associated(this%G22_d)) then
577 call device_free(this%G22_d)
578 end if
579
580 if (c_associated(this%G33_d)) then
581 call device_free(this%G33_d)
582 end if
583
584 if (c_associated(this%G12_d)) then
585 call device_free(this%G12_d)
586 end if
587
588 if (c_associated(this%G13_d)) then
589 call device_free(this%G13_d)
590 end if
591
592 if (c_associated(this%G23_d)) then
593 call device_free(this%G23_d)
594 end if
595
596 if (c_associated(this%dxdr_d)) then
597 call device_free(this%dxdr_d)
598 end if
599
600 if (c_associated(this%dydr_d)) then
601 call device_free(this%dydr_d)
602 end if
603
604 if (c_associated(this%dzdr_d)) then
605 call device_free(this%dzdr_d)
606 end if
607
608 if (c_associated(this%dxds_d)) then
609 call device_free(this%dxds_d)
610 end if
611
612 if (c_associated(this%dyds_d)) then
613 call device_free(this%dyds_d)
614 end if
615
616 if (c_associated(this%dzds_d)) then
617 call device_free(this%dzds_d)
618 end if
619
620 if (c_associated(this%dxdt_d)) then
621 call device_free(this%dxdt_d)
622 end if
623
624 if (c_associated(this%dydt_d)) then
625 call device_free(this%dydt_d)
626 end if
627
628 if (c_associated(this%dzdt_d)) then
629 call device_free(this%dzdt_d)
630 end if
631
632 if (c_associated(this%drdx_d)) then
633 call device_free(this%drdx_d)
634 end if
635
636 if (c_associated(this%drdy_d)) then
637 call device_free(this%drdy_d)
638 end if
639
640 if (c_associated(this%drdz_d)) then
641 call device_free(this%drdz_d)
642 end if
643
644 if (c_associated(this%dsdx_d)) then
645 call device_free(this%dsdx_d)
646 end if
647
648 if (c_associated(this%dsdy_d)) then
649 call device_free(this%dsdy_d)
650 end if
651
652 if (c_associated(this%dsdz_d)) then
653 call device_free(this%dsdz_d)
654 end if
655
656 if (c_associated(this%dtdx_d)) then
657 call device_free(this%dtdx_d)
658 end if
659
660 if (c_associated(this%dtdy_d)) then
661 call device_free(this%dtdy_d)
662 end if
663
664 if (c_associated(this%dtdz_d)) then
665 call device_free(this%dtdz_d)
666 end if
667
668 if (c_associated(this%mult_d)) then
669 call device_free(this%mult_d)
670 end if
671
672 if (c_associated(this%h1_d)) then
673 call device_free(this%h1_d)
674 end if
675
676 if (c_associated(this%h2_d)) then
677 call device_free(this%h2_d)
678 end if
679
680 if (c_associated(this%jac_d)) then
681 call device_free(this%jac_d)
682 end if
683
684 if (c_associated(this%jacinv_d)) then
685 call device_free(this%jacinv_d)
686 end if
687
688 if (c_associated(this%B_d)) then
689 call device_free(this%B_d)
690 end if
691
692 if (c_associated(this%Binv_d)) then
693 call device_free(this%Binv_d)
694 end if
695
696 if (c_associated(this%area_d)) then
697 call device_free(this%area_d)
698 end if
699
700 if (c_associated(this%nx_d)) then
701 call device_free(this%nx_d)
702 end if
703
704 if (c_associated(this%ny_d)) then
705 call device_free(this%ny_d)
706 end if
707
708 if (c_associated(this%nz_d)) then
709 call device_free(this%nz_d)
710 end if
711
712 if (c_associated(this%cyc_msk_d)) then
713 call device_free(this%cyc_msk_d)
714 end if
715
716 if (c_associated(this%R11_d)) then
717 call device_free(this%R11_d)
718 end if
719
720 if (c_associated(this%R12_d)) then
721 call device_free(this%R12_d)
722 end if
723
724
725 end subroutine coef_free
726
728 type(coef_t), intent(inout) :: c
729 integer :: e,i,lxy,lyz,ntot
730
731 lxy=c%Xh%lx*c%Xh%ly
732 lyz=c%Xh%ly*c%Xh%lz
733 ntot = c%dof%size()
734
735 associate(drdx => c%drdx, drdy => c%drdy, drdz => c%drdz, &
736 dsdx => c%dsdx, dsdy => c%dsdy, dsdz => c%dsdz, &
737 dtdx => c%dtdx, dtdy => c%dtdy, dtdz => c%dtdz, &
738 dxdr => c%dxdr, dydr => c%dydr, dzdr => c%dzdr, &
739 dxds => c%dxds, dyds => c%dyds, dzds => c%dzds, &
740 dxdt => c%dxdt, dydt => c%dydt, dzdt => c%dzdt, &
741 dx => c%Xh%dx, dy => c%Xh%dy, dz => c%Xh%dz, &
742 x => c%dof%x, y => c%dof%y, z => c%dof%z, &
743 lx => c%Xh%lx, ly => c%Xh%ly, lz => c%Xh%lz, &
744 dyt => c%Xh%dyt, dzt => c%Xh%dzt, &
745 jacinv => c%jacinv, jac => c%jac)
746
747 if (neko_bcknd_device .eq. 1) then
748
749 call device_coef_generate_dxydrst(c%drdx_d, c%drdy_d, c%drdz_d, &
750 c%dsdx_d, c%dsdy_d, c%dsdz_d, c%dtdx_d, c%dtdy_d, c%dtdz_d, &
751 c%dxdr_d, c%dydr_d, c%dzdr_d, c%dxds_d, c%dyds_d, c%dzds_d, &
752 c%dxdt_d, c%dydt_d, c%dzdt_d, c%Xh%dx_d, c%Xh%dy_d, c%Xh%dz_d, &
753 c%dof%x_d, c%dof%y_d, c%dof%z_d, c%jacinv_d, c%jac_d, &
754 c%Xh%lx, c%msh%nelv)
755
756 call device_memcpy(dxdr, c%dxdr_d, ntot, device_to_host, sync=.false.)
757 call device_memcpy(dydr, c%dydr_d, ntot, device_to_host, sync=.false.)
758 call device_memcpy(dzdr, c%dzdr_d, ntot, device_to_host, sync=.false.)
759 call device_memcpy(dxds, c%dxds_d, ntot, device_to_host, sync=.false.)
760 call device_memcpy(dyds, c%dyds_d, ntot, device_to_host, sync=.false.)
761 call device_memcpy(dzds, c%dzds_d, ntot, device_to_host, sync=.false.)
762 call device_memcpy(dxdt, c%dxdt_d, ntot, device_to_host, sync=.false.)
763 call device_memcpy(dydt, c%dydt_d, ntot, device_to_host, sync=.false.)
764 call device_memcpy(dzdt, c%dzdt_d, ntot, device_to_host, sync=.false.)
765 call device_memcpy(drdx, c%drdx_d, ntot, device_to_host, sync=.false.)
766 call device_memcpy(drdy, c%drdy_d, ntot, device_to_host, sync=.false.)
767 call device_memcpy(drdz, c%drdz_d, ntot, device_to_host, sync=.false.)
768 call device_memcpy(dsdx, c%dsdx_d, ntot, device_to_host, sync=.false.)
769 call device_memcpy(dsdy, c%dsdy_d, ntot, device_to_host, sync=.false.)
770 call device_memcpy(dsdz, c%dsdz_d, ntot, device_to_host, sync=.false.)
771 call device_memcpy(dtdx, c%dtdx_d, ntot, device_to_host, sync=.false.)
772 call device_memcpy(dtdy, c%dtdy_d, ntot, device_to_host, sync=.false.)
773 call device_memcpy(dtdz, c%dtdz_d, ntot, device_to_host, sync=.false.)
774 call device_memcpy(jac, c%jac_d, ntot, device_to_host, sync=.false.)
775 call device_memcpy(jacinv, c%jacinv_d, ntot, &
776 device_to_host, sync=.true.)
777
778 else
779 !$omp parallel do private(i)
780 do e = 1, c%msh%nelv
781 call mxm(dx, lx, x(1,1,1,e), lx, dxdr(1,1,1,e), lyz)
782 call mxm(dx, lx, y(1,1,1,e), lx, dydr(1,1,1,e), lyz)
783 call mxm(dx, lx, z(1,1,1,e), lx, dzdr(1,1,1,e), lyz)
784
785 do i = 1, lz
786 call mxm(x(1,1,i,e), lx, dyt, ly, dxds(1,1,i,e), ly)
787 call mxm(y(1,1,i,e), lx, dyt, ly, dyds(1,1,i,e), ly)
788 call mxm(z(1,1,i,e), lx, dyt, ly, dzds(1,1,i,e), ly)
789 end do
790
791 ! We actually take 2d into account, wow, need to do that for the rest.
792 if(c%msh%gdim .eq. 3) then
793 call mxm(x(1,1,1,e), lxy, dzt, lz, dxdt(1,1,1,e), lz)
794 call mxm(y(1,1,1,e), lxy, dzt, lz, dydt(1,1,1,e), lz)
795 call mxm(z(1,1,1,e), lxy, dzt, lz, dzdt(1,1,1,e), lz)
796 else
797 call rzero(dxdt(1,1,1,e), lxy)
798 call rzero(dydt(1,1,1,e), lxy)
799 call rone(dzdt(1,1,1,e), lxy)
800 end if
801 end do
802 !$omp end parallel do
803
804 if (c%msh%gdim .eq. 2) then
805 call rzero (jac, ntot)
806 call addcol3 (jac, dxdr, dyds, ntot)
807 call subcol3 (jac, dxds, dydr, ntot)
808 call copy (drdx, dyds, ntot)
809 call copy (drdy, dxds, ntot)
810 call chsign (drdy, ntot)
811 call copy (dsdx, dydr, ntot)
812 call chsign (dsdx, ntot)
813 call copy (dsdy, dxdr, ntot)
814 call rzero (drdz, ntot)
815 call rzero (dsdz, ntot)
816 call rone (dtdz, ntot)
817 else
818 !$omp parallel private(i)
819 !$omp do
820 do i = 1, ntot
821 c%jac(i, 1, 1, 1) = 0.0_rp
822 end do
823 !$omp end do
824 !$omp do
825 do i = 1, ntot
826 c%jac(i, 1, 1, 1) = c%jac(i, 1, 1, 1) + ( c%dxdr(i, 1, 1, 1) &
827 * c%dyds(i, 1, 1, 1) * c%dzdt(i, 1, 1, 1) )
828
829 c%jac(i, 1, 1, 1) = c%jac(i, 1, 1, 1) + ( c%dxdt(i, 1, 1, 1) &
830 * c%dydr(i, 1, 1, 1) * c%dzds(i, 1, 1, 1) )
831
832 c%jac(i, 1, 1, 1) = c%jac(i, 1, 1, 1) + ( c%dxds(i, 1, 1, 1) &
833 * c%dydt(i, 1, 1, 1) * c%dzdr(i, 1, 1, 1) )
834 end do
835 !$omp end do
836 !$omp do
837 do i = 1, ntot
838 c%jac(i, 1, 1, 1) = c%jac(i, 1, 1, 1) - ( c%dxdr(i, 1, 1, 1) &
839 * c%dydt(i, 1, 1, 1) * c%dzds(i, 1, 1, 1) )
840
841 c%jac(i, 1, 1, 1) = c%jac(i, 1, 1, 1) - ( c%dxds(i, 1, 1, 1) &
842 * c%dydr(i, 1, 1, 1) * c%dzdt(i, 1, 1, 1) )
843
844 c%jac(i, 1, 1, 1) = c%jac(i, 1, 1, 1) - ( c%dxdt(i, 1, 1, 1) &
845 * c%dyds(i, 1, 1, 1) * c%dzdr(i, 1, 1, 1) )
846 end do
847 !$omp end do
848 !$omp do
849 do i = 1, ntot
850 c%drdx(i, 1, 1, 1) = c%dyds(i, 1, 1, 1) * c%dzdt(i, 1, 1, 1) &
851 - c%dydt(i, 1, 1, 1) * c%dzds(i, 1, 1, 1)
852
853 c%drdy(i, 1, 1, 1) = c%dxdt(i, 1, 1, 1) * c%dzds(i, 1, 1, 1) &
854 - c%dxds(i, 1, 1, 1) * c%dzdt(i, 1, 1, 1)
855
856 c%drdz(i, 1, 1, 1) = c%dxds(i, 1, 1, 1) * c%dydt(i, 1, 1, 1) &
857 - c%dxdt(i, 1, 1, 1) * c%dyds(i, 1, 1, 1)
858 end do
859 !$omp end do
860 !$omp do
861 do i = 1, ntot
862 c%dsdx(i, 1, 1, 1) = c%dydt(i, 1, 1, 1) * c%dzdr(i, 1, 1, 1) &
863 - c%dydr(i, 1, 1, 1) * c%dzdt(i, 1, 1, 1)
864
865 c%dsdy(i, 1, 1, 1) = c%dxdr(i, 1, 1, 1) * c%dzdt(i, 1, 1, 1) &
866 - c%dxdt(i, 1, 1, 1) * c%dzdr(i, 1, 1, 1)
867
868 c%dsdz(i, 1, 1, 1) = c%dxdt(i, 1, 1, 1) * c%dydr(i, 1, 1, 1) &
869 - c%dxdr(i, 1, 1, 1) * c%dydt(i, 1, 1, 1)
870 end do
871 !$omp end do
872 !$omp do
873 do i = 1, ntot
874 c%dtdx(i, 1, 1, 1) = c%dydr(i, 1, 1, 1) * c%dzds(i, 1, 1, 1) &
875 - c%dyds(i, 1, 1, 1) * c%dzdr(i, 1, 1, 1)
876
877 c%dtdy(i, 1, 1, 1) = c%dxds(i, 1, 1, 1) * c%dzdr(i, 1, 1, 1) &
878 - c%dxdr(i, 1, 1, 1) * c%dzds(i, 1, 1, 1)
879
880 c%dtdz(i, 1, 1, 1) = c%dxdr(i, 1, 1, 1) * c%dyds(i, 1, 1, 1) &
881 - c%dxds(i, 1, 1, 1) * c%dydr(i, 1, 1, 1)
882 end do
883 !$omp end do
884 !$omp end parallel
885 end if
886 call invers2(jacinv, jac, ntot)
887 end if
888 end associate
889
890 end subroutine coef_generate_dxyzdrst
891
894 subroutine coef_generate_geo(c)
895 type(coef_t), intent(inout) :: c
896 integer :: e, i, lxyz, ntot
897
898 lxyz = c%Xh%lx * c%Xh%ly * c%Xh%lz
899 ntot = c%dof%size()
900
901 if (neko_bcknd_device .eq. 1) then
902
903 call device_coef_generate_geo(c%G11_d, c%G12_d, c%G13_d, &
904 c%G22_d, c%G23_d, c%G33_d, &
905 c%drdx_d, c%drdy_d, c%drdz_d, &
906 c%dsdx_d, c%dsdy_d, c%dsdz_d, &
907 c%dtdx_d, c%dtdy_d, c%dtdz_d, &
908 c%jacinv_d, c%Xh%w3_d, c%msh%nelv, &
909 c%Xh%lx, c%msh%gdim)
910
911 call device_memcpy(c%G11, c%G11_d, ntot, device_to_host, sync=.false.)
912 call device_memcpy(c%G22, c%G22_d, ntot, device_to_host, sync=.false.)
913 call device_memcpy(c%G33, c%G33_d, ntot, device_to_host, sync=.false.)
914 call device_memcpy(c%G12, c%G12_d, ntot, device_to_host, sync=.false.)
915 call device_memcpy(c%G13, c%G13_d, ntot, device_to_host, sync=.false.)
916 call device_memcpy(c%G23, c%G23_d, ntot, device_to_host, sync=.true.)
917
918 else
919 if(c%msh%gdim .eq. 2) then
920
921 do i = 1, ntot
922 c%G11(i, 1, 1, 1) = c%drdx(i, 1, 1, 1) * c%drdx(i, 1, 1, 1) &
923 + c%drdy(i, 1, 1, 1) * c%drdy(i, 1, 1, 1)
924
925 c%G22(i, 1, 1, 1) = c%dsdx(i, 1, 1, 1) * c%dsdx(i, 1, 1, 1) &
926 + c%dsdy(i, 1, 1, 1) * c%dsdy(i, 1, 1, 1)
927
928 c%G12(i, 1, 1, 1) = c%drdx(i, 1, 1, 1) * c%dsdx(i, 1, 1, 1) &
929 + c%drdy(i, 1, 1, 1) * c%dsdy(i, 1, 1, 1)
930 end do
931
932 do i = 1, ntot
933 c%G11(i, 1, 1, 1) = c%G11(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
934 c%G22(i, 1, 1, 1) = c%G22(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
935 c%G12(i, 1, 1, 1) = c%G12(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
936 c%G33(i, 1, 1, 1) = 0.0_rp
937 c%G13(i, 1, 1, 1) = 0.0_rp
938 c%G23(i, 1, 1, 1) = 0.0_rp
939 end do
940
941 do concurrent(e = 1:c%msh%nelv)
942 do concurrent(i = 1:lxyz)
943 c%G11(i,1,1,e) = c%G11(i,1,1,e) * c%Xh%w3(i,1,1)
944 c%G22(i,1,1,e) = c%G22(i,1,1,e) * c%Xh%w3(i,1,1)
945 c%G12(i,1,1,e) = c%G12(i,1,1,e) * c%Xh%w3(i,1,1)
946 end do
947 end do
948
949 else
950 !$omp parallel private(i)
951 !$omp do
952 do i = 1, ntot
953 c%G11(i, 1, 1, 1) = c%drdx(i, 1, 1, 1) * c%drdx(i, 1, 1, 1) &
954 + c%drdy(i, 1, 1, 1) * c%drdy(i, 1, 1, 1) &
955 + c%drdz(i, 1, 1, 1) * c%drdz(i, 1, 1, 1)
956
957 c%G22(i, 1, 1, 1) = c%dsdx(i, 1, 1, 1) * c%dsdx(i, 1, 1, 1) &
958 + c%dsdy(i, 1, 1, 1) * c%dsdy(i, 1, 1, 1) &
959 + c%dsdz(i, 1, 1, 1) * c%dsdz(i, 1, 1, 1)
960
961 c%G33(i, 1, 1, 1) = c%dtdx(i, 1, 1, 1) * c%dtdx(i, 1, 1, 1) &
962 + c%dtdy(i, 1, 1, 1) * c%dtdy(i, 1, 1, 1) &
963 + c%dtdz(i, 1, 1, 1) * c%dtdz(i, 1, 1, 1)
964 end do
965 !$omp end do
966 !$omp do
967 do i = 1, ntot
968 c%G11(i, 1, 1, 1) = c%G11(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
969 c%G22(i, 1, 1, 1) = c%G22(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
970 c%G33(i, 1, 1, 1) = c%G33(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
971 end do
972 !$omp end do
973 !$omp do
974 do i = 1, ntot
975 c%G12(i, 1, 1, 1) = c%drdx(i, 1, 1, 1) * c%dsdx(i, 1, 1, 1) &
976 + c%drdy(i, 1, 1, 1) * c%dsdy(i, 1, 1, 1) &
977 + c%drdz(i, 1, 1, 1) * c%dsdz(i, 1, 1, 1)
978
979 c%G13(i, 1, 1, 1) = c%drdx(i, 1, 1, 1) * c%dtdx(i, 1, 1, 1) &
980 + c%drdy(i, 1, 1, 1) * c%dtdy(i, 1, 1, 1) &
981 + c%drdz(i, 1, 1, 1) * c%dtdz(i, 1, 1, 1)
982
983 c%G23(i, 1, 1, 1) = c%dsdx(i, 1, 1, 1) * c%dtdx(i, 1, 1, 1) &
984 + c%dsdy(i, 1, 1, 1) * c%dtdy(i, 1, 1, 1) &
985 + c%dsdz(i, 1, 1, 1) * c%dtdz(i, 1, 1, 1)
986 end do
987 !$omp end do
988 !$omp do
989 do i = 1, ntot
990 c%G12(i, 1, 1, 1) = c%G12(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
991 c%G13(i, 1, 1, 1) = c%G13(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
992 c%G23(i, 1, 1, 1) = c%G23(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
993 end do
994 !$omp end do
995 !$omp do
996 do e = 1, c%msh%nelv
997 do concurrent(i = 1:lxyz)
998 c%G11(i,1,1,e) = c%G11(i,1,1,e) * c%Xh%w3(i,1,1)
999 c%G22(i,1,1,e) = c%G22(i,1,1,e) * c%Xh%w3(i,1,1)
1000 c%G12(i,1,1,e) = c%G12(i,1,1,e) * c%Xh%w3(i,1,1)
1001
1002 c%G33(i,1,1,e) = c%G33(i,1,1,e) * c%Xh%w3(i,1,1)
1003 c%G13(i,1,1,e) = c%G13(i,1,1,e) * c%Xh%w3(i,1,1)
1004 c%G23(i,1,1,e) = c%G23(i,1,1,e) * c%Xh%w3(i,1,1)
1005 end do
1006 end do
1007 !$omp end do
1008 !$omp end parallel
1009 end if
1010 end if
1011
1012 end subroutine coef_generate_geo
1013
1017 type(coef_t), intent(inout) :: c
1018 integer :: e, i, lxyz, ntot
1019
1020 lxyz = c%Xh%lx * c%Xh%ly * c%Xh%lz
1021 ntot = c%dof%size()
1022
1024 do concurrent(e = 1:c%msh%nelv)
1025 ! Here we need to handle things differently for axis symmetric elements
1026 do concurrent(i = 1:lxyz)
1027 c%B(i,1,1,e) = c%jac(i,1,1,e) * c%Xh%w3(i,1,1)
1028 c%Binv(i,1,1,e) = c%B(i,1,1,e)
1029 end do
1030 end do
1031
1032 if (neko_bcknd_device .eq. 1) then
1033 call device_memcpy(c%B, c%B_d, ntot, host_to_device, sync=.false.)
1034 call device_memcpy(c%Binv, c%Binv_d, ntot, host_to_device, sync=.false.)
1035 end if
1036
1037 call c%gs_h%op(c%Binv, ntot, gs_op_add)
1038
1039 if (neko_bcknd_device .eq. 1) then
1040 call device_invcol1(c%Binv_d, ntot)
1041 call device_memcpy(c%Binv, c%Binv_d, ntot, device_to_host, sync=.true.)
1042 else
1043 call invcol1(c%Binv, ntot)
1044 end if
1045
1047 if (neko_bcknd_device .eq. 1) then
1048 c%volume = device_glsum(c%B_d, ntot)
1049 else
1050 c%volume = glsum(c%B, ntot)
1051 end if
1052
1053 end subroutine coef_generate_mass
1054
1055 pure function coef_get_normal(this, i, j, k, e, facet) result(normal)
1056 class(coef_t), intent(in) :: this
1057 integer, intent(in) :: i, j, k, e, facet
1058 real(kind=rp) :: normal(3)
1059
1060 select case (facet)
1061 case(1,2)
1062 normal(1) = this%nx(j, k, facet, e)
1063 normal(2) = this%ny(j, k, facet, e)
1064 normal(3) = this%nz(j, k, facet, e)
1065 case(3,4)
1066 normal(1) = this%nx(i, k, facet, e)
1067 normal(2) = this%ny(i, k, facet, e)
1068 normal(3) = this%nz(i, k, facet, e)
1069 case(5,6)
1070 normal(1) = this%nx(i, j, facet, e)
1071 normal(2) = this%ny(i, j, facet, e)
1072 normal(3) = this%nz(i, j, facet, e)
1073 end select
1074 end function coef_get_normal
1075
1076 pure function coef_get_area(this, i, j, k, e, facet) result(area)
1077 class(coef_t), intent(in) :: this
1078 integer, intent(in) :: i, j, k, e, facet
1079 real(kind=rp) :: area
1080
1081 select case (facet)
1082 case(1,2)
1083 area = this%area(j, k, facet, e)
1084 case(3,4)
1085 area = this%area(i, k, facet, e)
1086 case(5,6)
1087 area = this%area(i, j, facet, e)
1088 end select
1089 end function coef_get_area
1090
1091
1094 type(coef_t), intent(inout) :: coef
1095 real(kind=rp), allocatable :: a(:,:,:,:)
1096 real(kind=rp), allocatable :: b(:,:,:,:)
1097 real(kind=rp), allocatable :: c(:,:,:,:)
1098 real(kind=rp), allocatable :: dot(:,:,:,:)
1099 integer :: n, e, i, j, k, lx
1100 real(kind=rp) :: weight, len
1101 n = coef%dof%size()
1102 lx = coef%Xh%lx
1103
1104 allocate(a(coef%Xh%lx, coef%Xh%lx, coef%Xh%lx, coef%msh%nelv))
1105 allocate(b(coef%Xh%lx, coef%Xh%lx, coef%Xh%lx, coef%msh%nelv))
1106 allocate(c(coef%Xh%lx, coef%Xh%lx, coef%Xh%lx, coef%msh%nelv))
1107 allocate(dot(coef%Xh%lx, coef%Xh%lx, coef%Xh%lx, coef%msh%nelv))
1108
1109 ! ds x dt
1110 do i = 1, n
1111 a(i, 1, 1, 1) = coef%dyds(i, 1, 1, 1) * coef%dzdt(i, 1, 1, 1) &
1112 - coef%dzds(i, 1, 1, 1) * coef%dydt(i, 1, 1, 1)
1113
1114 b(i, 1, 1, 1) = coef%dzds(i, 1, 1, 1) * coef%dxdt(i, 1, 1, 1) &
1115 - coef%dxds(i, 1, 1, 1) * coef%dzdt(i, 1, 1, 1)
1116
1117 c(i, 1, 1, 1) = coef%dxds(i, 1, 1, 1) * coef%dydt(i, 1, 1, 1) &
1118 - coef%dyds(i, 1, 1, 1) * coef%dxdt(i, 1, 1, 1)
1119 end do
1120
1121 do i = 1, n
1122 dot(i, 1, 1, 1) = a(i, 1, 1, 1) * a(i, 1, 1, 1) &
1123 + b(i, 1, 1, 1) * b(i, 1, 1, 1) &
1124 + c(i, 1, 1, 1) * c(i, 1, 1, 1)
1125 end do
1126
1127 do concurrent(e = 1:coef%msh%nelv)
1128 do concurrent(k = 1:coef%Xh%lx)
1129 do concurrent(j = 1:coef%Xh%lx)
1130 weight = coef%Xh%wy(j) * coef%Xh%wz(k)
1131 coef%area(j, k, 2, e) = sqrt(dot(lx, j, k, e)) * weight
1132 coef%area(j, k, 1, e) = sqrt(dot(1, j, k, e)) * weight
1133 coef%nx(j,k, 1, e) = -a(1, j, k, e)
1134 coef%nx(j,k, 2, e) = a(lx, j, k, e)
1135 coef%ny(j,k, 1, e) = -b(1, j, k, e)
1136 coef%ny(j,k, 2, e) = b(lx, j, k, e)
1137 coef%nz(j,k, 1, e) = -c(1, j, k, e)
1138 coef%nz(j,k, 2, e) = c(lx, j, k, e)
1139 end do
1140 end do
1141 end do
1142
1143 ! dr x dt
1144 do i = 1, n
1145 a(i, 1, 1, 1) = coef%dydr(i, 1, 1, 1) * coef%dzdt(i, 1, 1, 1) &
1146 - coef%dzdr(i, 1, 1, 1) * coef%dydt(i, 1, 1, 1)
1147
1148 b(i, 1, 1, 1) = coef%dzdr(i, 1, 1, 1) * coef%dxdt(i, 1, 1, 1) &
1149 - coef%dxdr(i, 1, 1, 1) * coef%dzdt(i, 1, 1, 1)
1150
1151 c(i, 1, 1, 1) = coef%dxdr(i, 1, 1, 1) * coef%dydt(i, 1, 1, 1) &
1152 - coef%dydr(i, 1, 1, 1) * coef%dxdt(i, 1, 1, 1)
1153 end do
1154
1155 do i = 1, n
1156 dot(i, 1, 1, 1) = a(i, 1, 1, 1) * a(i, 1, 1, 1) &
1157 + b(i, 1, 1, 1) * b(i, 1, 1, 1) &
1158 + c(i, 1, 1, 1) * c(i, 1, 1, 1)
1159 end do
1160
1161 do concurrent(e = 1:coef%msh%nelv)
1162 do concurrent(k = 1:coef%Xh%lx)
1163 do concurrent(j = 1:coef%Xh%lx)
1164 weight = coef%Xh%wx(j) * coef%Xh%wz(k)
1165 coef%area(j, k, 3, e) = sqrt(dot(j, 1, k, e)) * weight
1166 coef%area(j, k, 4, e) = sqrt(dot(j, lx, k, e)) * weight
1167 coef%nx(j,k, 3, e) = a(j, 1, k, e)
1168 coef%nx(j,k, 4, e) = -a(j, lx, k, e)
1169 coef%ny(j,k, 3, e) = b(j, 1, k, e)
1170 coef%ny(j,k, 4, e) = -b(j, lx, k, e)
1171 coef%nz(j,k, 3, e) = c(j, 1, k, e)
1172 coef%nz(j,k, 4, e) = -c(j, lx, k, e)
1173 end do
1174 end do
1175 end do
1176
1177 ! dr x ds
1178 do i = 1, n
1179 a(i, 1, 1, 1) = coef%dydr(i, 1, 1, 1) * coef%dzds(i, 1, 1, 1) &
1180 - coef%dzdr(i, 1, 1, 1) * coef%dyds(i, 1, 1, 1)
1181
1182 b(i, 1, 1, 1) = coef%dzdr(i, 1, 1, 1) * coef%dxds(i, 1, 1, 1) &
1183 - coef%dxdr(i, 1, 1, 1) * coef%dzds(i, 1, 1, 1)
1184
1185 c(i, 1, 1, 1) = coef%dxdr(i, 1, 1, 1) * coef%dyds(i, 1, 1, 1) &
1186 - coef%dydr(i, 1, 1, 1) * coef%dxds(i, 1, 1, 1)
1187 end do
1188
1189 do i = 1, n
1190 dot(i, 1, 1, 1) = a(i, 1, 1, 1) * a(i, 1, 1, 1) &
1191 + b(i, 1, 1, 1) * b(i, 1, 1, 1) &
1192 + c(i, 1, 1, 1) * c(i, 1, 1, 1)
1193 end do
1194
1195 do concurrent(e = 1:coef%msh%nelv)
1196 do concurrent(k = 1:coef%Xh%lx)
1197 do concurrent(j = 1:coef%Xh%lx)
1198 weight = coef%Xh%wx(j) * coef%Xh%wy(k)
1199 coef%area(j, k, 5, e) = sqrt(dot(j, k, 1, e)) * weight
1200 coef%area(j, k, 6, e) = sqrt(dot(j, k, lx, e)) * weight
1201 coef%nx(j,k, 5, e) = -a(j, k, 1, e)
1202 coef%nx(j,k, 6, e) = a(j, k, lx, e)
1203 coef%ny(j,k, 5, e) = -b(j, k, 1, e)
1204 coef%ny(j,k, 6, e) = b(j, k, lx, e)
1205 coef%nz(j,k, 5, e) = -c(j, k, 1, e)
1206 coef%nz(j,k, 6, e) = c(j, k, lx, e)
1207 end do
1208 end do
1209 end do
1210
1211 ! Normalize
1212 do j = 1, size(coef%nz)
1213 len = sqrt(coef%nx(j,1,1,1)**2 + &
1214 coef%ny(j,1,1,1)**2 + coef%nz(j,1,1,1)**2)
1215 if (len .gt. neko_eps) then
1216 coef%nx(j,1,1,1) = coef%nx(j,1,1,1) / len
1217 coef%ny(j,1,1,1) = coef%ny(j,1,1,1) / len
1218 coef%nz(j,1,1,1) = coef%nz(j,1,1,1) / len
1219 end if
1220 end do
1221
1222 deallocate(dot)
1223 deallocate(c)
1224 deallocate(b)
1225 deallocate(a)
1227 if (neko_bcknd_device .eq. 1) then
1228 n = size(coef%area)
1229 call device_memcpy(coef%area, coef%area_d, n, &
1230 host_to_device, sync=.false.)
1231 call device_memcpy(coef%nx, coef%nx_d, n, &
1232 host_to_device, sync=.false.)
1233 call device_memcpy(coef%ny, coef%ny_d, n, &
1234 host_to_device, sync=.false.)
1235 call device_memcpy(coef%nz, coef%nz_d, n, &
1236 host_to_device, sync=.false.)
1237 end if
1238
1239 end subroutine coef_generate_area_and_normal
1240
1242 class(coef_t), intent(inout) :: this
1243 real(kind=rp) :: un(3), len, d
1244 integer :: lx, ly, lz, np, np_glb, ierr
1245 integer :: i, j, k, pf, pe, n, nc, ncyc
1246
1247 if (.not. this%cyclic) return
1248
1249 np = this%msh%periodic%size
1250 call mpi_allreduce(np, np_glb, 1, &
1251 mpi_integer, mpi_sum, neko_comm, ierr)
1252
1253 if (np_glb .eq. 0) then
1254 call neko_error("There are no periodic boundaries. " // &
1255 "Switch cyclic off in the case file.")
1256 end if
1257
1258 if (np .eq. 0) return
1259
1260 lx = this%Xh%lx
1261 ly = this%Xh%ly
1262 lz = this%Xh%lz
1263 ncyc = this%cyc_msk(0) - 1
1264 nc = 1
1265 do n = 1, np
1266 pf = this%msh%periodic%facet_el(n)%x(1)
1267 pe = this%msh%periodic%facet_el(n)%x(2)
1268 do k = 1, lz
1269 do j = 1, ly
1270 do i = 1, lx
1271 if (index_is_on_facet(i, j, k, lx, ly, lz, pf)) then
1272 un = this%get_normal(i, j, k, pe, pf)
1273 len = sqrt(un(1) * un(1) + un(2) * un(2))
1274 if (len .gt. neko_eps) then
1275 d = this%dof%y(i, j, k, pe) * un(1) &
1276 - this%dof%x(i, j, k, pe) * un(2)
1277
1278 this%cyc_msk(nc) = linear_index(i, j, k, pe, lx, ly, lz)
1279 this%R11(nc) = un(1) / len * sign(1.0_rp, d)
1280 this%R12(nc) = un(2) / len * sign(1.0_rp, d)
1281 nc = nc + 1
1282 else
1283 call neko_error("x and y components of surface " // &
1284 "normals are zero. Cyclic rotations must be " // &
1285 "around z-axis.")
1286 end if
1287 end if
1288 end do
1289 end do
1290 end do
1291 end do
1292
1293 if (nc - 1 /= ncyc) then
1294 call neko_error("The number of cyclic GLL points were " // &
1295 "not estimated correctly.")
1296 end if
1297
1298 if (neko_bcknd_device .eq. 1) then
1299 call device_memcpy(this%cyc_msk, this%cyc_msk_d, ncyc+1, host_to_device, sync=.false.)
1300 call device_memcpy(this%R11, this%R11_d, ncyc, host_to_device, sync=.false.)
1301 call device_memcpy(this%R12, this%R12_d, ncyc, host_to_device, sync=.false.)
1302 end if
1303
1304 end subroutine coef_generate_cyclic_bc
1305
1306end module coefs
Map a Fortran array to a device (allocate and associate)
Definition device.F90:77
Copy data between host and device (or device and device)
Definition device.F90:71
Coefficients.
Definition coef.f90:34
subroutine coef_generate_geo(c)
Generate geometric data for the given mesh.
Definition coef.f90:895
subroutine coef_free(this)
Deallocate coefficients.
Definition coef.f90:408
pure real(kind=rp) function coef_get_area(this, i, j, k, e, facet)
Definition coef.f90:1077
pure real(kind=rp) function, dimension(3) coef_get_normal(this, i, j, k, e, facet)
Definition coef.f90:1056
subroutine coef_generate_dxyzdrst(c)
Definition coef.f90:728
subroutine coef_generate_area_and_normal(coef)
Generate facet area and surface normals.
Definition coef.f90:1094
subroutine coef_init_empty(this, xh, msh)
Initialize empty coefs for a space and a mesh.
Definition coef.f90:175
subroutine coef_init_all(this, gs_h)
Initialize coefficients.
Definition coef.f90:221
subroutine coef_generate_cyclic_bc(this)
Definition coef.f90:1242
subroutine coef_generate_mass(c)
Generate mass matrix B for the given mesh and space.
Definition coef.f90:1017
Definition comm.F90:1
type(mpi_comm), public neko_comm
MPI communicator.
Definition comm.F90:43
subroutine, public device_coef_generate_dxydrst(drdx_d, drdy_d, drdz_d, dsdx_d, dsdy_d, dsdz_d, dtdx_d, dtdy_d, dtdz_d, dxdr_d, dydr_d, dzdr_d, dxds_d, dyds_d, dzds_d, dxdt_d, dydt_d, dzdt_d, dx_d, dy_d, dz_d, x_d, y_d, z_d, jacinv_d, jac_d, lx, nel)
subroutine, public device_coef_generate_geo(g11_d, g12_d, g13_d, g22_d, g23_d, g33_d, drdx_d, drdy_d, drdz_d, dsdx_d, dsdy_d, dsdz_d, dtdx_d, dtdy_d, dtdz_d, jacinv_d, w3_d, nel, lx, gdim)
real(kind=rp) function, public device_glsum(a_d, n, strm)
Sum a vector of length n.
subroutine, public device_invcol1(a_d, n, strm)
Invert a vector .
subroutine, public device_rone(a_d, n, strm)
Set all elements to one.
Device abstraction, common interface for various accelerators.
Definition device.F90:34
integer, parameter, public host_to_device
Definition device.F90:47
subroutine, public device_free(x_d)
Deallocate memory on the device.
Definition device.F90:225
integer, parameter, public device_to_host
Definition device.F90:47
Defines a mapping of the degrees of freedom.
Definition dofmap.f90:35
Gather-scatter.
Defines Gather-scatter operations.
Definition gs_ops.f90:34
integer, parameter, public gs_op_add
Definition gs_ops.f90:36
Definition math.f90:60
subroutine, public invers2(a, b, n)
Compute inverted vector .
Definition math.f90:639
subroutine, public subcol3(a, b, c, n)
Returns .
Definition math.f90:881
subroutine, public rone(a, n)
Set all elements to one.
Definition math.f90:238
real(kind=rp) function, public glsum(a, n)
Sum a vector of length n.
Definition math.f90:499
subroutine, public addcol3(a, b, c, n)
Returns .
Definition math.f90:958
subroutine, public invcol1(a, n)
Invert a vector .
Definition math.f90:614
subroutine, public chsign(a, n)
Change sign of vector .
Definition math.f90:579
subroutine, public copy(a, b, n)
Copy a vector .
Definition math.f90:249
real(kind=rp), parameter, public neko_eps
Machine epsilon .
Definition math.f90:69
subroutine, public rzero(a, n)
Zero a real vector.
Definition math.f90:205
Defines a mesh.
Definition mesh.f90:34
Wrapper for all matrix-matrix product implementations.
subroutine, public mxm(a, n1, b, n2, c, n3)
Compute matrix-matrix product for contiguously packed matrices A,B, and C.
Build configurations.
integer, parameter neko_bcknd_device
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Defines a function space.
Definition space.f90:34
Utilities.
Definition utils.f90:35
pure logical function, public index_is_on_facet(i, j, k, lx, ly, lz, facet)
Definition utils.f90:262
pure integer function, public linear_index(i, j, k, l, lx, ly, lz)
Compute the address of a (i,j,k,l) array with sizes (1:lx, 1:ly, 1:lz, :)
Definition utils.f90:237
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition coef.f90:61
The function space for the SEM solution fields.
Definition space.f90:63