Neko 1.0.2
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
coef.f90
Go to the documentation of this file.
1! Copyright (c) 2020-2024, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
34module coefs
35 use gather_scatter, only : gs_t
36 use gs_ops, only : gs_op_add
38 use num_types, only : rp
39 use dofmap, only : dofmap_t
40 use space, only: space_t
41 use math, only : rone, invcol1, addcol3, subcol3, copy, &
43 use mesh, only : mesh_t
48 use mxm_wrapper, only : mxm
49 use device
52 use comm, only : neko_comm
54 use mpi_f08, only : mpi_allreduce, mpi_integer, mpi_sum
55 use, intrinsic :: iso_c_binding
56 implicit none
57 private
58
61 type, public :: coef_t
63 real(kind=rp), allocatable :: g11(:,:,:,:)
65 real(kind=rp), allocatable :: g22(:,:,:,:)
67 real(kind=rp), allocatable :: g33(:,:,:,:)
69 real(kind=rp), allocatable :: g12(:,:,:,:)
71 real(kind=rp), allocatable :: g13(:,:,:,:)
73 real(kind=rp), allocatable :: g23(:,:,:,:)
74
75 real(kind=rp), allocatable :: mult(:,:,:,:)
80 real(kind=rp), allocatable :: dxdr(:,:,:,:), dydr(:,:,:,:), dzdr(:,:,:,:)
81 real(kind=rp), allocatable :: dxds(:,:,:,:), dyds(:,:,:,:), dzds(:,:,:,:)
82 real(kind=rp), allocatable :: dxdt(:,:,:,:), dydt(:,:,:,:), dzdt(:,:,:,:)
86 real(kind=rp), allocatable :: drdx(:,:,:,:), drdy(:,:,:,:), drdz(:,:,:,:)
87 real(kind=rp), allocatable :: dsdx(:,:,:,:), dsdy(:,:,:,:), dsdz(:,:,:,:)
88 real(kind=rp), allocatable :: dtdx(:,:,:,:), dtdy(:,:,:,:), dtdz(:,:,:,:)
89
90 real(kind=rp), allocatable :: h1(:,:,:,:)
91 real(kind=rp), allocatable :: h2(:,:,:,:)
92 logical :: ifh2
93
94 real(kind=rp), allocatable :: jac(:,:,:,:)
95 real(kind=rp), allocatable :: jacinv(:,:,:,:)
96 real(kind=rp), allocatable :: b(:,:,:,:)
97 real(kind=rp), allocatable :: binv(:,:,:,:)
98
99 real(kind=rp), allocatable :: area(:,:,:,:)
100 real(kind=rp), allocatable :: nx(:,:,:,:)
101 real(kind=rp), allocatable :: ny(:,:,:,:)
102 real(kind=rp), allocatable :: nz(:,:,:,:)
103 logical :: cyclic = .false.
104 integer, allocatable :: cyc_msk(:)
105 real(kind=rp), allocatable :: r11(:)
106 real(kind=rp), allocatable :: r12(:)
108
109 real(kind=rp) :: volume
110
111 type(space_t), pointer :: xh => null()
112 type(mesh_t), pointer :: msh => null()
113 type(dofmap_t), pointer :: dof => null()
114 type(gs_t), pointer :: gs_h=> null()
115
116 !
117 ! Device pointers (if present)
118 !
119
120 type(c_ptr) :: g11_d = c_null_ptr
121 type(c_ptr) :: g22_d = c_null_ptr
122 type(c_ptr) :: g33_d = c_null_ptr
123 type(c_ptr) :: g12_d = c_null_ptr
124 type(c_ptr) :: g13_d = c_null_ptr
125 type(c_ptr) :: g23_d = c_null_ptr
126 type(c_ptr) :: dxdr_d = c_null_ptr
127 type(c_ptr) :: dydr_d = c_null_ptr
128 type(c_ptr) :: dzdr_d = c_null_ptr
129 type(c_ptr) :: dxds_d = c_null_ptr
130 type(c_ptr) :: dyds_d = c_null_ptr
131 type(c_ptr) :: dzds_d = c_null_ptr
132 type(c_ptr) :: dxdt_d = c_null_ptr
133 type(c_ptr) :: dydt_d = c_null_ptr
134 type(c_ptr) :: dzdt_d = c_null_ptr
135 type(c_ptr) :: drdx_d = c_null_ptr
136 type(c_ptr) :: drdy_d = c_null_ptr
137 type(c_ptr) :: drdz_d = c_null_ptr
138 type(c_ptr) :: dsdx_d = c_null_ptr
139 type(c_ptr) :: dsdy_d = c_null_ptr
140 type(c_ptr) :: dsdz_d = c_null_ptr
141 type(c_ptr) :: dtdx_d = c_null_ptr
142 type(c_ptr) :: dtdy_d = c_null_ptr
143 type(c_ptr) :: dtdz_d = c_null_ptr
144 type(c_ptr) :: mult_d = c_null_ptr
145 type(c_ptr) :: h1_d = c_null_ptr
146 type(c_ptr) :: h2_d = c_null_ptr
147 type(c_ptr) :: jac_d = c_null_ptr
148 type(c_ptr) :: jacinv_d = c_null_ptr
149 type(c_ptr) :: b_d = c_null_ptr
150 type(c_ptr) :: binv_d = c_null_ptr
151 type(c_ptr) :: area_d = c_null_ptr
152 type(c_ptr) :: nx_d = c_null_ptr
153 type(c_ptr) :: ny_d = c_null_ptr
154 type(c_ptr) :: nz_d = c_null_ptr
155 type(c_ptr) :: cyc_msk_d = c_null_ptr
156 type(c_ptr) :: r11_d = c_null_ptr
157 type(c_ptr) :: r12_d = c_null_ptr
158
159
160
161 contains
162 procedure, private, pass(this) :: init_empty => coef_init_empty
163 procedure, private, pass(this) :: init_all => coef_init_all
164 procedure, pass(this) :: free => coef_free
165 procedure, pass(this) :: get_normal => coef_get_normal
166 procedure, pass(this) :: get_area => coef_get_area
167 procedure, pass(this) :: generate_cyclic_bc => coef_generate_cyclic_bc
168 generic :: init => init_empty, init_all
169 end type coef_t
170
171contains
172
174 subroutine coef_init_empty(this, Xh, msh)
175 class(coef_t), intent(inout) :: this
176 type(space_t), intent(inout), target :: Xh
177 type(mesh_t), intent(inout), target :: msh
178 integer :: n
179 call this%free()
180 this%msh => msh
181 this%Xh => xh
182
183 allocate(this%drdx(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
184 allocate(this%dsdx(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
185 allocate(this%dtdx(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
186
187 allocate(this%drdy(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
188 allocate(this%dsdy(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
189 allocate(this%dtdy(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
190
191 allocate(this%drdz(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
192 allocate(this%dsdz(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
193 allocate(this%dtdz(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
194
195
196 !
197 ! Setup device memory (if present)
198 !
199
200 n = this%Xh%lx * this%Xh%ly * this%Xh%lz * this%msh%nelv
201 if (neko_bcknd_device .eq. 1) then
202
203 call device_map(this%drdx, this%drdx_d, n)
204 call device_map(this%drdy, this%drdy_d, n)
205 call device_map(this%drdz, this%drdz_d, n)
206
207 call device_map(this%dsdx, this%dsdx_d, n)
208 call device_map(this%dsdy, this%dsdy_d, n)
209 call device_map(this%dsdz, this%dsdz_d, n)
210
211 call device_map(this%dtdx, this%dtdx_d, n)
212 call device_map(this%dtdy, this%dtdy_d, n)
213 call device_map(this%dtdz, this%dtdz_d, n)
214
215 end if
216
217 end subroutine coef_init_empty
218
220 subroutine coef_init_all(this, gs_h)
221 class(coef_t), intent(inout) :: this
222 type(gs_t), intent(inout), target :: gs_h
223 integer :: n, m, ncyc
224 call this%free()
225
226 this%msh => gs_h%dofmap%msh
227 this%Xh => gs_h%dofmap%Xh
228 this%dof => gs_h%dofmap
229 this%gs_h => gs_h
230
231 !
232 ! Allocate arrays for geometric data
233 !
235 allocate(this%G11(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
236 allocate(this%G22(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
237 allocate(this%G33(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
238 allocate(this%G12(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
239 allocate(this%G13(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
240 allocate(this%G23(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
241
242 allocate(this%dxdr(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
243 allocate(this%dxds(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
244 allocate(this%dxdt(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
245
246 allocate(this%dydr(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
247 allocate(this%dyds(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
248 allocate(this%dydt(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
249
250 allocate(this%dzdr(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
251 allocate(this%dzds(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
252 allocate(this%dzdt(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
253
254 allocate(this%drdx(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
255 allocate(this%dsdx(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
256 allocate(this%dtdx(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
257
258 allocate(this%drdy(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
259 allocate(this%dsdy(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
260 allocate(this%dtdy(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
261
262 allocate(this%drdz(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
263 allocate(this%dsdz(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
264 allocate(this%dtdz(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
265
266 allocate(this%jac(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
267 allocate(this%jacinv(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
268
269 allocate(this%area(this%Xh%lx, this%Xh%ly, 6, this%msh%nelv))
270 allocate(this%nx(this%Xh%lx, this%Xh%ly, 6, this%msh%nelv))
271 allocate(this%ny(this%Xh%lx, this%Xh%ly, 6, this%msh%nelv))
272 allocate(this%nz(this%Xh%lx, this%Xh%ly, 6, this%msh%nelv))
273
274 allocate(this%B(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
275 allocate(this%Binv(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
276
277 allocate(this%h1(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
278 allocate(this%h2(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
279
280 allocate(this%mult(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
281
282
283 !
284 ! Setup device memory (if present)
285 !
286
287 n = this%Xh%lx * this%Xh%ly * this%Xh%lz * this%msh%nelv
288 if (neko_bcknd_device .eq. 1) then
289 call device_map(this%G11, this%G11_d, n)
290 call device_map(this%G22, this%G22_d, n)
291 call device_map(this%G33, this%G33_d, n)
292 call device_map(this%G12, this%G12_d, n)
293 call device_map(this%G13, this%G13_d, n)
294 call device_map(this%G23, this%G23_d, n)
295
296 call device_map(this%dxdr, this%dxdr_d, n)
297 call device_map(this%dydr, this%dydr_d, n)
298 call device_map(this%dzdr, this%dzdr_d, n)
299
300 call device_map(this%dxds, this%dxds_d, n)
301 call device_map(this%dyds, this%dyds_d, n)
302 call device_map(this%dzds, this%dzds_d, n)
303
304 call device_map(this%dxdt, this%dxdt_d, n)
305 call device_map(this%dydt, this%dydt_d, n)
306 call device_map(this%dzdt, this%dzdt_d, n)
307
308 call device_map(this%drdx, this%drdx_d, n)
309 call device_map(this%drdy, this%drdy_d, n)
310 call device_map(this%drdz, this%drdz_d, n)
311
312 call device_map(this%dsdx, this%dsdx_d, n)
313 call device_map(this%dsdy, this%dsdy_d, n)
314 call device_map(this%dsdz, this%dsdz_d, n)
315
316 call device_map(this%dtdx, this%dtdx_d, n)
317 call device_map(this%dtdy, this%dtdy_d, n)
318 call device_map(this%dtdz, this%dtdz_d, n)
319
320 call device_map(this%mult, this%mult_d, n)
321 call device_map(this%h1, this%h1_d, n)
322 call device_map(this%h2, this%h2_d, n)
323
324 call device_map(this%jac, this%jac_d, n)
325 call device_map(this%jacinv, this%jacinv_d, n)
326 call device_map(this%B, this%B_d, n)
327 call device_map(this%Binv, this%Binv_d, n)
328
329 m = this%Xh%lx * this%Xh%ly * 6 * this%msh%nelv
330
331 call device_map(this%area, this%area_d, m)
332 call device_map(this%nx, this%nx_d, m)
333 call device_map(this%ny, this%ny_d, m)
334 call device_map(this%nz, this%nz_d, m)
335
336 end if
337
338 call coef_generate_dxyzdrst(this)
339
340 call coef_generate_geo(this)
341
343
344 call coef_generate_mass(this)
345
346
347 ! This is a placeholder, just for now
348 ! We can probably find a prettier solution
349 if (neko_bcknd_device .eq. 1) then
350 call device_rone(this%h1_d, n)
351 call device_rone(this%h2_d, n)
352 call device_memcpy(this%h1, this%h1_d, n, &
353 device_to_host, sync=.false.)
354 call device_memcpy(this%h2, this%h2_d, n, &
355 device_to_host, sync=.false.)
356 else
357 call rone(this%h1,n)
358 call rone(this%h2,n)
359 end if
360
361 this%ifh2 = .false.
362
363 !
364 ! Set up multiplicity
365 !
366 if (neko_bcknd_device .eq. 1) then
367 call device_rone(this%mult_d, n)
368 else
369 call rone(this%mult, n)
370 end if
371
372 call gs_h%op(this%mult, n, gs_op_add)
373
374 if (neko_bcknd_device .eq. 1) then
375 call device_invcol1(this%mult_d, n)
376 call device_memcpy(this%mult, this%mult_d, n, &
377 device_to_host, sync=.true.)
378 else
379 call invcol1(this%mult, n)
380 end if
381
382 ncyc = this%msh%periodic%size * this%Xh%lx * this%Xh%lx
383 allocate(this%cyc_msk(0:ncyc))
384 this%cyc_msk(0) = ncyc + 1
385 if (ncyc .gt. 0) then
386 allocate(this%R11(ncyc))
387 allocate(this%R12(ncyc))
388
390 call rone(this%R11, ncyc)
391 call rzero(this%R12, ncyc)
392
393 if (neko_bcknd_device .eq. 1) then
394 call device_map(this%cyc_msk, this%cyc_msk_d, ncyc+1)
395 call device_map(this%R11, this%R11_d, ncyc)
396 call device_map(this%R12, this%R12_d, ncyc)
397
398 call device_memcpy(this%cyc_msk, this%cyc_msk_d, ncyc+1, host_to_device, sync=.false.)
399 call device_memcpy(this%R11, this%R11_d, ncyc, host_to_device, sync=.false.)
400 call device_memcpy(this%R12, this%R12_d, ncyc, host_to_device, sync=.false.)
401 end if
402
403 end if
404 end subroutine coef_init_all
405
407 subroutine coef_free(this)
408 class(coef_t), intent(inout) :: this
409
410 if (allocated(this%G11)) then
411 deallocate(this%G11)
412 end if
413
414 if (allocated(this%G22)) then
415 deallocate(this%G22)
416 end if
417
418 if (allocated(this%G33)) then
419 deallocate(this%G33)
420 end if
421
422 if (allocated(this%G12)) then
423 deallocate(this%G12)
424 end if
425
426 if (allocated(this%G13)) then
427 deallocate(this%G13)
428 end if
429
430 if (allocated(this%G23)) then
431 deallocate(this%G23)
432 end if
433
434 if (allocated(this%mult)) then
435 deallocate(this%mult)
436 end if
437
438 if (allocated(this%B)) then
439 deallocate(this%B)
440 end if
441
442 if (allocated(this%Binv)) then
443 deallocate(this%Binv)
444 end if
445
446 if(allocated(this%dxdr)) then
447 deallocate(this%dxdr)
448 end if
449
450 if(allocated(this%dxds)) then
451 deallocate(this%dxds)
452 end if
453
454 if(allocated(this%dxdt)) then
455 deallocate(this%dxdt)
456 end if
457
458 if(allocated(this%dydr)) then
459 deallocate(this%dydr)
460 end if
461
462 if(allocated(this%dyds)) then
463 deallocate(this%dyds)
464 end if
465
466 if(allocated(this%dydt)) then
467 deallocate(this%dydt)
468 end if
469
470 if(allocated(this%dzdr)) then
471 deallocate(this%dzdr)
472 end if
473
474 if(allocated(this%dzds)) then
475 deallocate(this%dzds)
476 end if
477
478 if(allocated(this%dzdt)) then
479 deallocate(this%dzdt)
480 end if
481
482 if(allocated(this%drdx)) then
483 deallocate(this%drdx)
484 end if
485
486 if(allocated(this%dsdx)) then
487 deallocate(this%dsdx)
488 end if
489
490 if(allocated(this%dtdx)) then
491 deallocate(this%dtdx)
492 end if
493
494 if(allocated(this%drdy)) then
495 deallocate(this%drdy)
496 end if
497
498 if(allocated(this%dsdy)) then
499 deallocate(this%dsdy)
500 end if
501
502 if(allocated(this%dtdy)) then
503 deallocate(this%dtdy)
504 end if
505
506 if(allocated(this%drdz)) then
507 deallocate(this%drdz)
508 end if
509
510 if(allocated(this%dsdz)) then
511 deallocate(this%dsdz)
512 end if
513
514 if(allocated(this%dtdz)) then
515 deallocate(this%dtdz)
516 end if
517
518 if(allocated(this%jac)) then
519 deallocate(this%jac)
520 end if
521
522 if(allocated(this%jacinv)) then
523 deallocate(this%jacinv)
524 end if
525
526 if(allocated(this%h1)) then
527 deallocate(this%h1)
528 end if
529
530 if(allocated(this%h2)) then
531 deallocate(this%h2)
532 end if
533
534 if (allocated(this%area)) then
535 deallocate(this%area)
536 end if
537
538 if (allocated(this%nx)) then
539 deallocate(this%nx)
540 end if
541
542 if (allocated(this%ny)) then
543 deallocate(this%ny)
544 end if
545
546 if (allocated(this%nz)) then
547 deallocate(this%nz)
548 end if
549
550 if (allocated(this%cyc_msk)) then
551 deallocate(this%cyc_msk)
552 end if
553
554 if (allocated(this%R11)) then
555 deallocate(this%R11)
556 end if
557
558 if (allocated(this%R12)) then
559 deallocate(this%R12)
560 end if
561
562
563 nullify(this%msh)
564 nullify(this%Xh)
565 nullify(this%dof)
566 nullify(this%gs_h)
567
568 !
569 ! Cleanup the device (if present)
570 !
571
572 if (c_associated(this%G11_d)) then
573 call device_free(this%G11_d)
574 end if
575
576 if (c_associated(this%G22_d)) then
577 call device_free(this%G22_d)
578 end if
579
580 if (c_associated(this%G33_d)) then
581 call device_free(this%G33_d)
582 end if
583
584 if (c_associated(this%G12_d)) then
585 call device_free(this%G12_d)
586 end if
587
588 if (c_associated(this%G13_d)) then
589 call device_free(this%G13_d)
590 end if
591
592 if (c_associated(this%G23_d)) then
593 call device_free(this%G23_d)
594 end if
595
596 if (c_associated(this%dxdr_d)) then
597 call device_free(this%dxdr_d)
598 end if
599
600 if (c_associated(this%dydr_d)) then
601 call device_free(this%dydr_d)
602 end if
603
604 if (c_associated(this%dzdr_d)) then
605 call device_free(this%dzdr_d)
606 end if
607
608 if (c_associated(this%dxds_d)) then
609 call device_free(this%dxds_d)
610 end if
611
612 if (c_associated(this%dyds_d)) then
613 call device_free(this%dyds_d)
614 end if
615
616 if (c_associated(this%dzds_d)) then
617 call device_free(this%dzds_d)
618 end if
619
620 if (c_associated(this%dxdt_d)) then
621 call device_free(this%dxdt_d)
622 end if
623
624 if (c_associated(this%dydt_d)) then
625 call device_free(this%dydt_d)
626 end if
627
628 if (c_associated(this%dzdt_d)) then
629 call device_free(this%dzdt_d)
630 end if
631
632 if (c_associated(this%drdx_d)) then
633 call device_free(this%drdx_d)
634 end if
635
636 if (c_associated(this%drdy_d)) then
637 call device_free(this%drdy_d)
638 end if
639
640 if (c_associated(this%drdz_d)) then
641 call device_free(this%drdz_d)
642 end if
643
644 if (c_associated(this%dsdx_d)) then
645 call device_free(this%dsdx_d)
646 end if
647
648 if (c_associated(this%dsdy_d)) then
649 call device_free(this%dsdy_d)
650 end if
651
652 if (c_associated(this%dsdz_d)) then
653 call device_free(this%dsdz_d)
654 end if
655
656 if (c_associated(this%dtdx_d)) then
657 call device_free(this%dtdx_d)
658 end if
659
660 if (c_associated(this%dtdy_d)) then
661 call device_free(this%dtdy_d)
662 end if
663
664 if (c_associated(this%dtdz_d)) then
665 call device_free(this%dtdz_d)
666 end if
667
668 if (c_associated(this%mult_d)) then
669 call device_free(this%mult_d)
670 end if
671
672 if (c_associated(this%h1_d)) then
673 call device_free(this%h1_d)
674 end if
675
676 if (c_associated(this%h2_d)) then
677 call device_free(this%h2_d)
678 end if
679
680 if (c_associated(this%jac_d)) then
681 call device_free(this%jac_d)
682 end if
683
684 if (c_associated(this%jacinv_d)) then
685 call device_free(this%jacinv_d)
686 end if
687
688 if (c_associated(this%B_d)) then
689 call device_free(this%B_d)
690 end if
691
692 if (c_associated(this%Binv_d)) then
693 call device_free(this%Binv_d)
694 end if
695
696 if (c_associated(this%area_d)) then
697 call device_free(this%area_d)
698 end if
699
700 if (c_associated(this%nx_d)) then
701 call device_free(this%nx_d)
702 end if
703
704 if (c_associated(this%ny_d)) then
705 call device_free(this%ny_d)
706 end if
707
708 if (c_associated(this%nz_d)) then
709 call device_free(this%nz_d)
710 end if
711
712 if (c_associated(this%cyc_msk_d)) then
713 call device_free(this%cyc_msk_d)
714 end if
715
716 if (c_associated(this%R11_d)) then
717 call device_free(this%R11_d)
718 end if
719
720 if (c_associated(this%R12_d)) then
721 call device_free(this%R12_d)
722 end if
723
724
725 end subroutine coef_free
726
728 type(coef_t), intent(inout) :: c
729 integer :: e,i,lxy,lyz,ntot
730
731 lxy=c%Xh%lx*c%Xh%ly
732 lyz=c%Xh%ly*c%Xh%lz
733 ntot = c%dof%size()
734
735 associate(drdx => c%drdx, drdy => c%drdy, drdz => c%drdz, &
736 dsdx => c%dsdx, dsdy => c%dsdy, dsdz => c%dsdz, &
737 dtdx => c%dtdx, dtdy => c%dtdy, dtdz => c%dtdz, &
738 dxdr => c%dxdr, dydr => c%dydr, dzdr => c%dzdr, &
739 dxds => c%dxds, dyds => c%dyds, dzds => c%dzds, &
740 dxdt => c%dxdt, dydt => c%dydt, dzdt => c%dzdt, &
741 dx => c%Xh%dx, dy => c%Xh%dy, dz => c%Xh%dz, &
742 x => c%dof%x, y => c%dof%y, z => c%dof%z, &
743 lx => c%Xh%lx, ly => c%Xh%ly, lz => c%Xh%lz, &
744 dyt => c%Xh%dyt, dzt => c%Xh%dzt, &
745 jacinv => c%jacinv, jac => c%jac)
746
747 if (neko_bcknd_device .eq. 1) then
748
749 call device_coef_generate_dxydrst(c%drdx_d, c%drdy_d, c%drdz_d, &
750 c%dsdx_d, c%dsdy_d, c%dsdz_d, c%dtdx_d, c%dtdy_d, c%dtdz_d, &
751 c%dxdr_d, c%dydr_d, c%dzdr_d, c%dxds_d, c%dyds_d, c%dzds_d, &
752 c%dxdt_d, c%dydt_d, c%dzdt_d, c%Xh%dx_d, c%Xh%dy_d, c%Xh%dz_d, &
753 c%dof%x_d, c%dof%y_d, c%dof%z_d, c%jacinv_d, c%jac_d, &
754 c%Xh%lx, c%msh%nelv)
755
756 call device_memcpy(dxdr, c%dxdr_d, ntot, device_to_host, sync=.false.)
757 call device_memcpy(dydr, c%dydr_d, ntot, device_to_host, sync=.false.)
758 call device_memcpy(dzdr, c%dzdr_d, ntot, device_to_host, sync=.false.)
759 call device_memcpy(dxds, c%dxds_d, ntot, device_to_host, sync=.false.)
760 call device_memcpy(dyds, c%dyds_d, ntot, device_to_host, sync=.false.)
761 call device_memcpy(dzds, c%dzds_d, ntot, device_to_host, sync=.false.)
762 call device_memcpy(dxdt, c%dxdt_d, ntot, device_to_host, sync=.false.)
763 call device_memcpy(dydt, c%dydt_d, ntot, device_to_host, sync=.false.)
764 call device_memcpy(dzdt, c%dzdt_d, ntot, device_to_host, sync=.false.)
765 call device_memcpy(drdx, c%drdx_d, ntot, device_to_host, sync=.false.)
766 call device_memcpy(drdy, c%drdy_d, ntot, device_to_host, sync=.false.)
767 call device_memcpy(drdz, c%drdz_d, ntot, device_to_host, sync=.false.)
768 call device_memcpy(dsdx, c%dsdx_d, ntot, device_to_host, sync=.false.)
769 call device_memcpy(dsdy, c%dsdy_d, ntot, device_to_host, sync=.false.)
770 call device_memcpy(dsdz, c%dsdz_d, ntot, device_to_host, sync=.false.)
771 call device_memcpy(dtdx, c%dtdx_d, ntot, device_to_host, sync=.false.)
772 call device_memcpy(dtdy, c%dtdy_d, ntot, device_to_host, sync=.false.)
773 call device_memcpy(dtdz, c%dtdz_d, ntot, device_to_host, sync=.false.)
774 call device_memcpy(jac, c%jac_d, ntot, device_to_host, sync=.false.)
775 call device_memcpy(jacinv, c%jacinv_d, ntot, &
776 device_to_host, sync=.true.)
777
778 else
779 do e = 1, c%msh%nelv
780 call mxm(dx, lx, x(1,1,1,e), lx, dxdr(1,1,1,e), lyz)
781 call mxm(dx, lx, y(1,1,1,e), lx, dydr(1,1,1,e), lyz)
782 call mxm(dx, lx, z(1,1,1,e), lx, dzdr(1,1,1,e), lyz)
783
784 do i = 1, lz
785 call mxm(x(1,1,i,e), lx, dyt, ly, dxds(1,1,i,e), ly)
786 call mxm(y(1,1,i,e), lx, dyt, ly, dyds(1,1,i,e), ly)
787 call mxm(z(1,1,i,e), lx, dyt, ly, dzds(1,1,i,e), ly)
788 end do
789
790 ! We actually take 2d into account, wow, need to do that for the rest.
791 if(c%msh%gdim .eq. 3) then
792 call mxm(x(1,1,1,e), lxy, dzt, lz, dxdt(1,1,1,e), lz)
793 call mxm(y(1,1,1,e), lxy, dzt, lz, dydt(1,1,1,e), lz)
794 call mxm(z(1,1,1,e), lxy, dzt, lz, dzdt(1,1,1,e), lz)
795 else
796 call rzero(dxdt(1,1,1,e), lxy)
797 call rzero(dydt(1,1,1,e), lxy)
798 call rone(dzdt(1,1,1,e), lxy)
799 end if
800 end do
801
802 if (c%msh%gdim .eq. 2) then
803 call rzero (jac, ntot)
804 call addcol3 (jac, dxdr, dyds, ntot)
805 call subcol3 (jac, dxds, dydr, ntot)
806 call copy (drdx, dyds, ntot)
807 call copy (drdy, dxds, ntot)
808 call chsign (drdy, ntot)
809 call copy (dsdx, dydr, ntot)
810 call chsign (dsdx, ntot)
811 call copy (dsdy, dxdr, ntot)
812 call rzero (drdz, ntot)
813 call rzero (dsdz, ntot)
814 call rone (dtdz, ntot)
815 else
816
817 do i = 1, ntot
818 c%jac(i, 1, 1, 1) = 0.0_rp
819 end do
820
821 do i = 1, ntot
822 c%jac(i, 1, 1, 1) = c%jac(i, 1, 1, 1) + ( c%dxdr(i, 1, 1, 1) &
823 * c%dyds(i, 1, 1, 1) * c%dzdt(i, 1, 1, 1) )
824
825 c%jac(i, 1, 1, 1) = c%jac(i, 1, 1, 1) + ( c%dxdt(i, 1, 1, 1) &
826 * c%dydr(i, 1, 1, 1) * c%dzds(i, 1, 1, 1) )
827
828 c%jac(i, 1, 1, 1) = c%jac(i, 1, 1, 1) + ( c%dxds(i, 1, 1, 1) &
829 * c%dydt(i, 1, 1, 1) * c%dzdr(i, 1, 1, 1) )
830 end do
831
832 do i = 1, ntot
833 c%jac(i, 1, 1, 1) = c%jac(i, 1, 1, 1) - ( c%dxdr(i, 1, 1, 1) &
834 * c%dydt(i, 1, 1, 1) * c%dzds(i, 1, 1, 1) )
835
836 c%jac(i, 1, 1, 1) = c%jac(i, 1, 1, 1) - ( c%dxds(i, 1, 1, 1) &
837 * c%dydr(i, 1, 1, 1) * c%dzdt(i, 1, 1, 1) )
838
839 c%jac(i, 1, 1, 1) = c%jac(i, 1, 1, 1) - ( c%dxdt(i, 1, 1, 1) &
840 * c%dyds(i, 1, 1, 1) * c%dzdr(i, 1, 1, 1) )
841 end do
842
843 do i = 1, ntot
844 c%drdx(i, 1, 1, 1) = c%dyds(i, 1, 1, 1) * c%dzdt(i, 1, 1, 1) &
845 - c%dydt(i, 1, 1, 1) * c%dzds(i, 1, 1, 1)
846
847 c%drdy(i, 1, 1, 1) = c%dxdt(i, 1, 1, 1) * c%dzds(i, 1, 1, 1) &
848 - c%dxds(i, 1, 1, 1) * c%dzdt(i, 1, 1, 1)
849
850 c%drdz(i, 1, 1, 1) = c%dxds(i, 1, 1, 1) * c%dydt(i, 1, 1, 1) &
851 - c%dxdt(i, 1, 1, 1) * c%dyds(i, 1, 1, 1)
852 end do
853
854 do i = 1, ntot
855 c%dsdx(i, 1, 1, 1) = c%dydt(i, 1, 1, 1) * c%dzdr(i, 1, 1, 1) &
856 - c%dydr(i, 1, 1, 1) * c%dzdt(i, 1, 1, 1)
857
858 c%dsdy(i, 1, 1, 1) = c%dxdr(i, 1, 1, 1) * c%dzdt(i, 1, 1, 1) &
859 - c%dxdt(i, 1, 1, 1) * c%dzdr(i, 1, 1, 1)
860
861 c%dsdz(i, 1, 1, 1) = c%dxdt(i, 1, 1, 1) * c%dydr(i, 1, 1, 1) &
862 - c%dxdr(i, 1, 1, 1) * c%dydt(i, 1, 1, 1)
863 end do
864
865 do i = 1, ntot
866 c%dtdx(i, 1, 1, 1) = c%dydr(i, 1, 1, 1) * c%dzds(i, 1, 1, 1) &
867 - c%dyds(i, 1, 1, 1) * c%dzdr(i, 1, 1, 1)
868
869 c%dtdy(i, 1, 1, 1) = c%dxds(i, 1, 1, 1) * c%dzdr(i, 1, 1, 1) &
870 - c%dxdr(i, 1, 1, 1) * c%dzds(i, 1, 1, 1)
871
872 c%dtdz(i, 1, 1, 1) = c%dxdr(i, 1, 1, 1) * c%dyds(i, 1, 1, 1) &
873 - c%dxds(i, 1, 1, 1) * c%dydr(i, 1, 1, 1)
874 end do
875
876 end if
877 call invers2(jacinv, jac, ntot)
878 end if
879 end associate
880
881 end subroutine coef_generate_dxyzdrst
882
885 subroutine coef_generate_geo(c)
886 type(coef_t), intent(inout) :: c
887 integer :: e, i, lxyz, ntot
888
889 lxyz = c%Xh%lx * c%Xh%ly * c%Xh%lz
890 ntot = c%dof%size()
891
892 if (neko_bcknd_device .eq. 1) then
893
894 call device_coef_generate_geo(c%G11_d, c%G12_d, c%G13_d, &
895 c%G22_d, c%G23_d, c%G33_d, &
896 c%drdx_d, c%drdy_d, c%drdz_d, &
897 c%dsdx_d, c%dsdy_d, c%dsdz_d, &
898 c%dtdx_d, c%dtdy_d, c%dtdz_d, &
899 c%jacinv_d, c%Xh%w3_d, c%msh%nelv, &
900 c%Xh%lx, c%msh%gdim)
901
902 call device_memcpy(c%G11, c%G11_d, ntot, device_to_host, sync=.false.)
903 call device_memcpy(c%G22, c%G22_d, ntot, device_to_host, sync=.false.)
904 call device_memcpy(c%G33, c%G33_d, ntot, device_to_host, sync=.false.)
905 call device_memcpy(c%G12, c%G12_d, ntot, device_to_host, sync=.false.)
906 call device_memcpy(c%G13, c%G13_d, ntot, device_to_host, sync=.false.)
907 call device_memcpy(c%G23, c%G23_d, ntot, device_to_host, sync=.true.)
908
909 else
910 if(c%msh%gdim .eq. 2) then
911
912 do i = 1, ntot
913 c%G11(i, 1, 1, 1) = c%drdx(i, 1, 1, 1) * c%drdx(i, 1, 1, 1) &
914 + c%drdy(i, 1, 1, 1) * c%drdy(i, 1, 1, 1)
915
916 c%G22(i, 1, 1, 1) = c%dsdx(i, 1, 1, 1) * c%dsdx(i, 1, 1, 1) &
917 + c%dsdy(i, 1, 1, 1) * c%dsdy(i, 1, 1, 1)
918
919 c%G12(i, 1, 1, 1) = c%drdx(i, 1, 1, 1) * c%dsdx(i, 1, 1, 1) &
920 + c%drdy(i, 1, 1, 1) * c%dsdy(i, 1, 1, 1)
921 end do
922
923 do i = 1, ntot
924 c%G11(i, 1, 1, 1) = c%G11(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
925 c%G22(i, 1, 1, 1) = c%G22(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
926 c%G12(i, 1, 1, 1) = c%G12(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
927 c%G33(i, 1, 1, 1) = 0.0_rp
928 c%G13(i, 1, 1, 1) = 0.0_rp
929 c%G23(i, 1, 1, 1) = 0.0_rp
930 end do
931
932 do concurrent(e = 1:c%msh%nelv)
933 do concurrent(i = 1:lxyz)
934 c%G11(i,1,1,e) = c%G11(i,1,1,e) * c%Xh%w3(i,1,1)
935 c%G22(i,1,1,e) = c%G22(i,1,1,e) * c%Xh%w3(i,1,1)
936 c%G12(i,1,1,e) = c%G12(i,1,1,e) * c%Xh%w3(i,1,1)
937 end do
938 end do
939
940 else
941
942 do i = 1, ntot
943 c%G11(i, 1, 1, 1) = c%drdx(i, 1, 1, 1) * c%drdx(i, 1, 1, 1) &
944 + c%drdy(i, 1, 1, 1) * c%drdy(i, 1, 1, 1) &
945 + c%drdz(i, 1, 1, 1) * c%drdz(i, 1, 1, 1)
946
947 c%G22(i, 1, 1, 1) = c%dsdx(i, 1, 1, 1) * c%dsdx(i, 1, 1, 1) &
948 + c%dsdy(i, 1, 1, 1) * c%dsdy(i, 1, 1, 1) &
949 + c%dsdz(i, 1, 1, 1) * c%dsdz(i, 1, 1, 1)
950
951 c%G33(i, 1, 1, 1) = c%dtdx(i, 1, 1, 1) * c%dtdx(i, 1, 1, 1) &
952 + c%dtdy(i, 1, 1, 1) * c%dtdy(i, 1, 1, 1) &
953 + c%dtdz(i, 1, 1, 1) * c%dtdz(i, 1, 1, 1)
954 end do
955
956 do i = 1, ntot
957 c%G11(i, 1, 1, 1) = c%G11(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
958 c%G22(i, 1, 1, 1) = c%G22(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
959 c%G33(i, 1, 1, 1) = c%G33(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
960 end do
961
962 do i = 1, ntot
963 c%G12(i, 1, 1, 1) = c%drdx(i, 1, 1, 1) * c%dsdx(i, 1, 1, 1) &
964 + c%drdy(i, 1, 1, 1) * c%dsdy(i, 1, 1, 1) &
965 + c%drdz(i, 1, 1, 1) * c%dsdz(i, 1, 1, 1)
966
967 c%G13(i, 1, 1, 1) = c%drdx(i, 1, 1, 1) * c%dtdx(i, 1, 1, 1) &
968 + c%drdy(i, 1, 1, 1) * c%dtdy(i, 1, 1, 1) &
969 + c%drdz(i, 1, 1, 1) * c%dtdz(i, 1, 1, 1)
970
971 c%G23(i, 1, 1, 1) = c%dsdx(i, 1, 1, 1) * c%dtdx(i, 1, 1, 1) &
972 + c%dsdy(i, 1, 1, 1) * c%dtdy(i, 1, 1, 1) &
973 + c%dsdz(i, 1, 1, 1) * c%dtdz(i, 1, 1, 1)
974 end do
975
976 do i = 1, ntot
977 c%G12(i, 1, 1, 1) = c%G12(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
978 c%G13(i, 1, 1, 1) = c%G13(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
979 c%G23(i, 1, 1, 1) = c%G23(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
980 end do
981
982 do concurrent(e = 1:c%msh%nelv)
983 do concurrent(i = 1:lxyz)
984 c%G11(i,1,1,e) = c%G11(i,1,1,e) * c%Xh%w3(i,1,1)
985 c%G22(i,1,1,e) = c%G22(i,1,1,e) * c%Xh%w3(i,1,1)
986 c%G12(i,1,1,e) = c%G12(i,1,1,e) * c%Xh%w3(i,1,1)
987
988 c%G33(i,1,1,e) = c%G33(i,1,1,e) * c%Xh%w3(i,1,1)
989 c%G13(i,1,1,e) = c%G13(i,1,1,e) * c%Xh%w3(i,1,1)
990 c%G23(i,1,1,e) = c%G23(i,1,1,e) * c%Xh%w3(i,1,1)
991 end do
992 end do
993
994 end if
995 end if
996
997 end subroutine coef_generate_geo
998
1002 type(coef_t), intent(inout) :: c
1003 integer :: e, i, lxyz, ntot
1004
1005 lxyz = c%Xh%lx * c%Xh%ly * c%Xh%lz
1006 ntot = c%dof%size()
1007
1009 do concurrent(e = 1:c%msh%nelv)
1010 ! Here we need to handle things differently for axis symmetric elements
1011 do concurrent(i = 1:lxyz)
1012 c%B(i,1,1,e) = c%jac(i,1,1,e) * c%Xh%w3(i,1,1)
1013 c%Binv(i,1,1,e) = c%B(i,1,1,e)
1014 end do
1015 end do
1016
1017 if (neko_bcknd_device .eq. 1) then
1018 call device_memcpy(c%B, c%B_d, ntot, host_to_device, sync=.false.)
1019 call device_memcpy(c%Binv, c%Binv_d, ntot, host_to_device, sync=.false.)
1020 end if
1021
1022 call c%gs_h%op(c%Binv, ntot, gs_op_add)
1023
1024 if (neko_bcknd_device .eq. 1) then
1025 call device_invcol1(c%Binv_d, ntot)
1026 call device_memcpy(c%Binv, c%Binv_d, ntot, device_to_host, sync=.true.)
1027 else
1028 call invcol1(c%Binv, ntot)
1029 end if
1030
1032 if (neko_bcknd_device .eq. 1) then
1033 c%volume = device_glsum(c%B_d, ntot)
1034 else
1035 c%volume = glsum(c%B, ntot)
1036 end if
1037
1038 end subroutine coef_generate_mass
1039
1040 pure function coef_get_normal(this, i, j, k, e, facet) result(normal)
1041 class(coef_t), intent(in) :: this
1042 integer, intent(in) :: i, j, k, e, facet
1043 real(kind=rp) :: normal(3)
1044
1045 select case (facet)
1046 case(1,2)
1047 normal(1) = this%nx(j, k, facet, e)
1048 normal(2) = this%ny(j, k, facet, e)
1049 normal(3) = this%nz(j, k, facet, e)
1050 case(3,4)
1051 normal(1) = this%nx(i, k, facet, e)
1052 normal(2) = this%ny(i, k, facet, e)
1053 normal(3) = this%nz(i, k, facet, e)
1054 case(5,6)
1055 normal(1) = this%nx(i, j, facet, e)
1056 normal(2) = this%ny(i, j, facet, e)
1057 normal(3) = this%nz(i, j, facet, e)
1058 end select
1059 end function coef_get_normal
1060
1061 pure function coef_get_area(this, i, j, k, e, facet) result(area)
1062 class(coef_t), intent(in) :: this
1063 integer, intent(in) :: i, j, k, e, facet
1064 real(kind=rp) :: area
1065
1066 select case (facet)
1067 case(1,2)
1068 area = this%area(j, k, facet, e)
1069 case(3,4)
1070 area = this%area(i, k, facet, e)
1071 case(5,6)
1072 area = this%area(i, j, facet, e)
1073 end select
1074 end function coef_get_area
1075
1076
1079 type(coef_t), intent(inout) :: coef
1080 real(kind=rp), allocatable :: a(:,:,:,:)
1081 real(kind=rp), allocatable :: b(:,:,:,:)
1082 real(kind=rp), allocatable :: c(:,:,:,:)
1083 real(kind=rp), allocatable :: dot(:,:,:,:)
1084 integer :: n, e, i, j, k, lx
1085 real(kind=rp) :: weight, len
1086 n = coef%dof%size()
1087 lx = coef%Xh%lx
1088
1089 allocate(a(coef%Xh%lx, coef%Xh%lx, coef%Xh%lx, coef%msh%nelv))
1090 allocate(b(coef%Xh%lx, coef%Xh%lx, coef%Xh%lx, coef%msh%nelv))
1091 allocate(c(coef%Xh%lx, coef%Xh%lx, coef%Xh%lx, coef%msh%nelv))
1092 allocate(dot(coef%Xh%lx, coef%Xh%lx, coef%Xh%lx, coef%msh%nelv))
1093
1094 ! ds x dt
1095 do i = 1, n
1096 a(i, 1, 1, 1) = coef%dyds(i, 1, 1, 1) * coef%dzdt(i, 1, 1, 1) &
1097 - coef%dzds(i, 1, 1, 1) * coef%dydt(i, 1, 1, 1)
1098
1099 b(i, 1, 1, 1) = coef%dzds(i, 1, 1, 1) * coef%dxdt(i, 1, 1, 1) &
1100 - coef%dxds(i, 1, 1, 1) * coef%dzdt(i, 1, 1, 1)
1101
1102 c(i, 1, 1, 1) = coef%dxds(i, 1, 1, 1) * coef%dydt(i, 1, 1, 1) &
1103 - coef%dyds(i, 1, 1, 1) * coef%dxdt(i, 1, 1, 1)
1104 end do
1105
1106 do i = 1, n
1107 dot(i, 1, 1, 1) = a(i, 1, 1, 1) * a(i, 1, 1, 1) &
1108 + b(i, 1, 1, 1) * b(i, 1, 1, 1) &
1109 + c(i, 1, 1, 1) * c(i, 1, 1, 1)
1110 end do
1111
1112 do concurrent(e = 1:coef%msh%nelv)
1113 do concurrent(k = 1:coef%Xh%lx)
1114 do concurrent(j = 1:coef%Xh%lx)
1115 weight = coef%Xh%wy(j) * coef%Xh%wz(k)
1116 coef%area(j, k, 2, e) = sqrt(dot(lx, j, k, e)) * weight
1117 coef%area(j, k, 1, e) = sqrt(dot(1, j, k, e)) * weight
1118 coef%nx(j,k, 1, e) = -a(1, j, k, e)
1119 coef%nx(j,k, 2, e) = a(lx, j, k, e)
1120 coef%ny(j,k, 1, e) = -b(1, j, k, e)
1121 coef%ny(j,k, 2, e) = b(lx, j, k, e)
1122 coef%nz(j,k, 1, e) = -c(1, j, k, e)
1123 coef%nz(j,k, 2, e) = c(lx, j, k, e)
1124 end do
1125 end do
1126 end do
1127
1128 ! dr x dt
1129 do i = 1, n
1130 a(i, 1, 1, 1) = coef%dydr(i, 1, 1, 1) * coef%dzdt(i, 1, 1, 1) &
1131 - coef%dzdr(i, 1, 1, 1) * coef%dydt(i, 1, 1, 1)
1132
1133 b(i, 1, 1, 1) = coef%dzdr(i, 1, 1, 1) * coef%dxdt(i, 1, 1, 1) &
1134 - coef%dxdr(i, 1, 1, 1) * coef%dzdt(i, 1, 1, 1)
1135
1136 c(i, 1, 1, 1) = coef%dxdr(i, 1, 1, 1) * coef%dydt(i, 1, 1, 1) &
1137 - coef%dydr(i, 1, 1, 1) * coef%dxdt(i, 1, 1, 1)
1138 end do
1139
1140 do i = 1, n
1141 dot(i, 1, 1, 1) = a(i, 1, 1, 1) * a(i, 1, 1, 1) &
1142 + b(i, 1, 1, 1) * b(i, 1, 1, 1) &
1143 + c(i, 1, 1, 1) * c(i, 1, 1, 1)
1144 end do
1145
1146 do concurrent(e = 1:coef%msh%nelv)
1147 do concurrent(k = 1:coef%Xh%lx)
1148 do concurrent(j = 1:coef%Xh%lx)
1149 weight = coef%Xh%wx(j) * coef%Xh%wz(k)
1150 coef%area(j, k, 3, e) = sqrt(dot(j, 1, k, e)) * weight
1151 coef%area(j, k, 4, e) = sqrt(dot(j, lx, k, e)) * weight
1152 coef%nx(j,k, 3, e) = a(j, 1, k, e)
1153 coef%nx(j,k, 4, e) = -a(j, lx, k, e)
1154 coef%ny(j,k, 3, e) = b(j, 1, k, e)
1155 coef%ny(j,k, 4, e) = -b(j, lx, k, e)
1156 coef%nz(j,k, 3, e) = c(j, 1, k, e)
1157 coef%nz(j,k, 4, e) = -c(j, lx, k, e)
1158 end do
1159 end do
1160 end do
1161
1162 ! dr x ds
1163 do i = 1, n
1164 a(i, 1, 1, 1) = coef%dydr(i, 1, 1, 1) * coef%dzds(i, 1, 1, 1) &
1165 - coef%dzdr(i, 1, 1, 1) * coef%dyds(i, 1, 1, 1)
1166
1167 b(i, 1, 1, 1) = coef%dzdr(i, 1, 1, 1) * coef%dxds(i, 1, 1, 1) &
1168 - coef%dxdr(i, 1, 1, 1) * coef%dzds(i, 1, 1, 1)
1169
1170 c(i, 1, 1, 1) = coef%dxdr(i, 1, 1, 1) * coef%dyds(i, 1, 1, 1) &
1171 - coef%dydr(i, 1, 1, 1) * coef%dxds(i, 1, 1, 1)
1172 end do
1173
1174 do i = 1, n
1175 dot(i, 1, 1, 1) = a(i, 1, 1, 1) * a(i, 1, 1, 1) &
1176 + b(i, 1, 1, 1) * b(i, 1, 1, 1) &
1177 + c(i, 1, 1, 1) * c(i, 1, 1, 1)
1178 end do
1179
1180 do concurrent(e = 1:coef%msh%nelv)
1181 do concurrent(k = 1:coef%Xh%lx)
1182 do concurrent(j = 1:coef%Xh%lx)
1183 weight = coef%Xh%wx(j) * coef%Xh%wy(k)
1184 coef%area(j, k, 5, e) = sqrt(dot(j, k, 1, e)) * weight
1185 coef%area(j, k, 6, e) = sqrt(dot(j, k, lx, e)) * weight
1186 coef%nx(j,k, 5, e) = -a(j, k, 1, e)
1187 coef%nx(j,k, 6, e) = a(j, k, lx, e)
1188 coef%ny(j,k, 5, e) = -b(j, k, 1, e)
1189 coef%ny(j,k, 6, e) = b(j, k, lx, e)
1190 coef%nz(j,k, 5, e) = -c(j, k, 1, e)
1191 coef%nz(j,k, 6, e) = c(j, k, lx, e)
1192 end do
1193 end do
1194 end do
1195
1196 ! Normalize
1197 do j = 1, size(coef%nz)
1198 len = sqrt(coef%nx(j,1,1,1)**2 + &
1199 coef%ny(j,1,1,1)**2 + coef%nz(j,1,1,1)**2)
1200 if (len .gt. neko_eps) then
1201 coef%nx(j,1,1,1) = coef%nx(j,1,1,1) / len
1202 coef%ny(j,1,1,1) = coef%ny(j,1,1,1) / len
1203 coef%nz(j,1,1,1) = coef%nz(j,1,1,1) / len
1204 end if
1205 end do
1206
1207 deallocate(dot)
1208 deallocate(c)
1209 deallocate(b)
1210 deallocate(a)
1212 if (neko_bcknd_device .eq. 1) then
1213 n = size(coef%area)
1214 call device_memcpy(coef%area, coef%area_d, n, &
1215 host_to_device, sync=.false.)
1216 call device_memcpy(coef%nx, coef%nx_d, n, &
1217 host_to_device, sync=.false.)
1218 call device_memcpy(coef%ny, coef%ny_d, n, &
1219 host_to_device, sync=.false.)
1220 call device_memcpy(coef%nz, coef%nz_d, n, &
1221 host_to_device, sync=.false.)
1222 end if
1223
1224 end subroutine coef_generate_area_and_normal
1225
1227 class(coef_t), intent(inout) :: this
1228 real(kind=rp) :: un(3), len, d
1229 integer :: lx, ly, lz, np, np_glb, ierr
1230 integer :: i, j, k, pf, pe, n, nc, ncyc
1231
1232 if (.not. this%cyclic) return
1233
1234 np = this%msh%periodic%size
1235 call mpi_allreduce(np, np_glb, 1, &
1236 mpi_integer, mpi_sum, neko_comm, ierr)
1237
1238 if (np_glb .eq. 0) then
1239 call neko_error("There are no periodic boundaries. " // &
1240 "Switch cyclic off in the case file.")
1241 end if
1242
1243 if (np .eq. 0) return
1244
1245 lx = this%Xh%lx
1246 ly = this%Xh%ly
1247 lz = this%Xh%lz
1248 ncyc = this%cyc_msk(0) - 1
1249 nc = 1
1250 do n = 1, np
1251 pf = this%msh%periodic%facet_el(n)%x(1)
1252 pe = this%msh%periodic%facet_el(n)%x(2)
1253 do k = 1, lz
1254 do j = 1, ly
1255 do i = 1, lx
1256 if (index_is_on_facet(i, j, k, lx, ly, lz, pf)) then
1257 un = this%get_normal(i, j, k, pe, pf)
1258 len = sqrt(un(1) * un(1) + un(2) * un(2))
1259 if (len .gt. neko_eps) then
1260 d = this%dof%y(i, j, k, pe) * un(1) &
1261 - this%dof%x(i, j, k, pe) * un(2)
1262
1263 this%cyc_msk(nc) = linear_index(i, j, k, pe, lx, ly, lz)
1264 this%R11(nc) = un(1) / len * sign(1.0_rp, d)
1265 this%R12(nc) = un(2) / len * sign(1.0_rp, d)
1266 nc = nc + 1
1267 else
1268 call neko_error("x and y components of surface " // &
1269 "normals are zero. Cyclic rotations must be " // &
1270 "around z-axis.")
1271 end if
1272 end if
1273 end do
1274 end do
1275 end do
1276 end do
1277
1278 if (nc - 1 /= ncyc) then
1279 call neko_error("The number of cyclic GLL points were " // &
1280 "not estimated correctly.")
1281 end if
1282
1283 if (neko_bcknd_device .eq. 1) then
1284 call device_memcpy(this%cyc_msk, this%cyc_msk_d, ncyc+1, host_to_device, sync=.false.)
1285 call device_memcpy(this%R11, this%R11_d, ncyc, host_to_device, sync=.false.)
1286 call device_memcpy(this%R12, this%R12_d, ncyc, host_to_device, sync=.false.)
1287 end if
1288
1289 end subroutine coef_generate_cyclic_bc
1290
1291end module coefs
Map a Fortran array to a device (allocate and associate)
Definition device.F90:77
Copy data between host and device (or device and device)
Definition device.F90:71
Coefficients.
Definition coef.f90:34
subroutine coef_generate_geo(c)
Generate geometric data for the given mesh.
Definition coef.f90:886
subroutine coef_free(this)
Deallocate coefficients.
Definition coef.f90:408
pure real(kind=rp) function coef_get_area(this, i, j, k, e, facet)
Definition coef.f90:1062
pure real(kind=rp) function, dimension(3) coef_get_normal(this, i, j, k, e, facet)
Definition coef.f90:1041
subroutine coef_generate_dxyzdrst(c)
Definition coef.f90:728
subroutine coef_generate_area_and_normal(coef)
Generate facet area and surface normals.
Definition coef.f90:1079
subroutine coef_init_empty(this, xh, msh)
Initialize empty coefs for a space and a mesh.
Definition coef.f90:175
subroutine coef_init_all(this, gs_h)
Initialize coefficients.
Definition coef.f90:221
subroutine coef_generate_cyclic_bc(this)
Definition coef.f90:1227
subroutine coef_generate_mass(c)
Generate mass matrix B for the given mesh and space.
Definition coef.f90:1002
Definition comm.F90:1
type(mpi_comm), public neko_comm
MPI communicator.
Definition comm.F90:43
subroutine, public device_coef_generate_dxydrst(drdx_d, drdy_d, drdz_d, dsdx_d, dsdy_d, dsdz_d, dtdx_d, dtdy_d, dtdz_d, dxdr_d, dydr_d, dzdr_d, dxds_d, dyds_d, dzds_d, dxdt_d, dydt_d, dzdt_d, dx_d, dy_d, dz_d, x_d, y_d, z_d, jacinv_d, jac_d, lx, nel)
subroutine, public device_coef_generate_geo(g11_d, g12_d, g13_d, g22_d, g23_d, g33_d, drdx_d, drdy_d, drdz_d, dsdx_d, dsdy_d, dsdz_d, dtdx_d, dtdy_d, dtdz_d, jacinv_d, w3_d, nel, lx, gdim)
real(kind=rp) function, public device_glsum(a_d, n, strm)
Sum a vector of length n.
subroutine, public device_invcol1(a_d, n, strm)
Invert a vector .
subroutine, public device_rone(a_d, n, strm)
Set all elements to one.
Device abstraction, common interface for various accelerators.
Definition device.F90:34
integer, parameter, public host_to_device
Definition device.F90:47
subroutine, public device_free(x_d)
Deallocate memory on the device.
Definition device.F90:219
integer, parameter, public device_to_host
Definition device.F90:47
Defines a mapping of the degrees of freedom.
Definition dofmap.f90:35
Gather-scatter.
Defines Gather-scatter operations.
Definition gs_ops.f90:34
integer, parameter, public gs_op_add
Definition gs_ops.f90:36
Definition math.f90:60
subroutine, public invers2(a, b, n)
Compute inverted vector .
Definition math.f90:639
subroutine, public subcol3(a, b, c, n)
Returns .
Definition math.f90:881
subroutine, public rone(a, n)
Set all elements to one.
Definition math.f90:238
real(kind=rp) function, public glsum(a, n)
Sum a vector of length n.
Definition math.f90:499
subroutine, public addcol3(a, b, c, n)
Returns .
Definition math.f90:958
subroutine, public invcol1(a, n)
Invert a vector .
Definition math.f90:614
subroutine, public chsign(a, n)
Change sign of vector .
Definition math.f90:579
subroutine, public copy(a, b, n)
Copy a vector .
Definition math.f90:249
real(kind=rp), parameter, public neko_eps
Machine epsilon .
Definition math.f90:69
subroutine, public rzero(a, n)
Zero a real vector.
Definition math.f90:205
Defines a mesh.
Definition mesh.f90:34
Wrapper for all matrix-matrix product implementations.
subroutine, public mxm(a, n1, b, n2, c, n3)
Compute matrix-matrix product for contiguously packed matrices A,B, and C.
Build configurations.
integer, parameter neko_bcknd_device
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Defines a function space.
Definition space.f90:34
Utilities.
Definition utils.f90:35
pure logical function, public index_is_on_facet(i, j, k, lx, ly, lz, facet)
Definition utils.f90:262
pure integer function, public linear_index(i, j, k, l, lx, ly, lz)
Compute the address of a (i,j,k,l) array with sizes (1:lx, 1:ly, 1:lz, :)
Definition utils.f90:237
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition coef.f90:61
The function space for the SEM solution fields.
Definition space.f90:63