Neko 1.99.2
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
coef.f90
Go to the documentation of this file.
1! Copyright (c) 2020-2024, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
34module coefs
35 use gather_scatter, only : gs_t
36 use gs_ops, only : gs_op_add
38 use num_types, only : rp
39 use dofmap, only : dofmap_t
40 use space, only: space_t
41 use math, only : rone, invcol1, addcol3, subcol3, copy, &
43 use mesh, only : mesh_t
47 use mxm_wrapper, only : mxm
48 use device
50 use, intrinsic :: iso_c_binding
51 implicit none
52 private
53
56 type, public :: coef_t
58 real(kind=rp), allocatable :: g11(:,:,:,:)
60 real(kind=rp), allocatable :: g22(:,:,:,:)
62 real(kind=rp), allocatable :: g33(:,:,:,:)
64 real(kind=rp), allocatable :: g12(:,:,:,:)
66 real(kind=rp), allocatable :: g13(:,:,:,:)
68 real(kind=rp), allocatable :: g23(:,:,:,:)
69
70 real(kind=rp), allocatable :: mult(:,:,:,:)
75 real(kind=rp), allocatable :: dxdr(:,:,:,:), dydr(:,:,:,:), dzdr(:,:,:,:)
76 real(kind=rp), allocatable :: dxds(:,:,:,:), dyds(:,:,:,:), dzds(:,:,:,:)
77 real(kind=rp), allocatable :: dxdt(:,:,:,:), dydt(:,:,:,:), dzdt(:,:,:,:)
81 real(kind=rp), allocatable :: drdx(:,:,:,:), drdy(:,:,:,:), drdz(:,:,:,:)
82 real(kind=rp), allocatable :: dsdx(:,:,:,:), dsdy(:,:,:,:), dsdz(:,:,:,:)
83 real(kind=rp), allocatable :: dtdx(:,:,:,:), dtdy(:,:,:,:), dtdz(:,:,:,:)
84
85 real(kind=rp), allocatable :: h1(:,:,:,:)
86 real(kind=rp), allocatable :: h2(:,:,:,:)
87 logical :: ifh2
88
89 real(kind=rp), allocatable :: jac(:,:,:,:)
90 real(kind=rp), allocatable :: jacinv(:,:,:,:)
91 real(kind=rp), allocatable :: b(:,:,:,:)
92 real(kind=rp), allocatable :: binv(:,:,:,:)
93
94 real(kind=rp), allocatable :: area(:,:,:,:)
95 real(kind=rp), allocatable :: nx(:,:,:,:)
96 real(kind=rp), allocatable :: ny(:,:,:,:)
97 real(kind=rp), allocatable :: nz(:,:,:,:)
98 logical :: cyclic = .false.
99 integer, allocatable :: cyc_msk(:)
100 real(kind=rp), allocatable :: r11(:)
101 real(kind=rp), allocatable :: r12(:)
103
104 real(kind=rp) :: volume
105
106 type(space_t), pointer :: xh => null()
107 type(mesh_t), pointer :: msh => null()
108 type(dofmap_t), pointer :: dof => null()
109 type(gs_t), pointer :: gs_h=> null()
110
111 !
112 ! Device pointers (if present)
113 !
114
115 type(c_ptr) :: g11_d = c_null_ptr
116 type(c_ptr) :: g22_d = c_null_ptr
117 type(c_ptr) :: g33_d = c_null_ptr
118 type(c_ptr) :: g12_d = c_null_ptr
119 type(c_ptr) :: g13_d = c_null_ptr
120 type(c_ptr) :: g23_d = c_null_ptr
121 type(c_ptr) :: dxdr_d = c_null_ptr
122 type(c_ptr) :: dydr_d = c_null_ptr
123 type(c_ptr) :: dzdr_d = c_null_ptr
124 type(c_ptr) :: dxds_d = c_null_ptr
125 type(c_ptr) :: dyds_d = c_null_ptr
126 type(c_ptr) :: dzds_d = c_null_ptr
127 type(c_ptr) :: dxdt_d = c_null_ptr
128 type(c_ptr) :: dydt_d = c_null_ptr
129 type(c_ptr) :: dzdt_d = c_null_ptr
130 type(c_ptr) :: drdx_d = c_null_ptr
131 type(c_ptr) :: drdy_d = c_null_ptr
132 type(c_ptr) :: drdz_d = c_null_ptr
133 type(c_ptr) :: dsdx_d = c_null_ptr
134 type(c_ptr) :: dsdy_d = c_null_ptr
135 type(c_ptr) :: dsdz_d = c_null_ptr
136 type(c_ptr) :: dtdx_d = c_null_ptr
137 type(c_ptr) :: dtdy_d = c_null_ptr
138 type(c_ptr) :: dtdz_d = c_null_ptr
139 type(c_ptr) :: mult_d = c_null_ptr
140 type(c_ptr) :: h1_d = c_null_ptr
141 type(c_ptr) :: h2_d = c_null_ptr
142 type(c_ptr) :: jac_d = c_null_ptr
143 type(c_ptr) :: jacinv_d = c_null_ptr
144 type(c_ptr) :: b_d = c_null_ptr
145 type(c_ptr) :: binv_d = c_null_ptr
146 type(c_ptr) :: area_d = c_null_ptr
147 type(c_ptr) :: nx_d = c_null_ptr
148 type(c_ptr) :: ny_d = c_null_ptr
149 type(c_ptr) :: nz_d = c_null_ptr
150 type(c_ptr) :: cyc_msk_d = c_null_ptr
151 type(c_ptr) :: r11_d = c_null_ptr
152 type(c_ptr) :: r12_d = c_null_ptr
153
154
155
156 contains
157 procedure, private, pass(this) :: init_empty => coef_init_empty
158 procedure, private, pass(this) :: init_all => coef_init_all
159 procedure, pass(this) :: free => coef_free
160 procedure, pass(this) :: get_normal => coef_get_normal
161 procedure, pass(this) :: get_area => coef_get_area
162 generic :: init => init_empty, init_all
163 end type coef_t
164
165contains
166
168 subroutine coef_init_empty(this, Xh, msh)
169 class(coef_t), intent(inout) :: this
170 type(space_t), intent(inout), target :: Xh
171 type(mesh_t), intent(inout), target :: msh
172 integer :: n
173 call this%free()
174 this%msh => msh
175 this%Xh => xh
176
177 allocate(this%drdx(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
178 allocate(this%dsdx(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
179 allocate(this%dtdx(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
180
181 allocate(this%drdy(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
182 allocate(this%dsdy(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
183 allocate(this%dtdy(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
184
185 allocate(this%drdz(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
186 allocate(this%dsdz(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
187 allocate(this%dtdz(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
188
189
190 !
191 ! Setup device memory (if present)
192 !
193
194 n = this%Xh%lx * this%Xh%ly * this%Xh%lz * this%msh%nelv
195 if (neko_bcknd_device .eq. 1) then
196
197 call device_map(this%drdx, this%drdx_d, n)
198 call device_map(this%drdy, this%drdy_d, n)
199 call device_map(this%drdz, this%drdz_d, n)
200
201 call device_map(this%dsdx, this%dsdx_d, n)
202 call device_map(this%dsdy, this%dsdy_d, n)
203 call device_map(this%dsdz, this%dsdz_d, n)
204
205 call device_map(this%dtdx, this%dtdx_d, n)
206 call device_map(this%dtdy, this%dtdy_d, n)
207 call device_map(this%dtdz, this%dtdz_d, n)
208
209 end if
210
211 end subroutine coef_init_empty
212
214 subroutine coef_init_all(this, gs_h)
215 class(coef_t), intent(inout) :: this
216 type(gs_t), intent(inout), target :: gs_h
217 integer :: n, m, ncyc
218 call this%free()
219
220 this%msh => gs_h%dofmap%msh
221 this%Xh => gs_h%dofmap%Xh
222 this%dof => gs_h%dofmap
223 this%gs_h => gs_h
224
225 !
226 ! Allocate arrays for geometric data
227 !
229 allocate(this%G11(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
230 allocate(this%G22(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
231 allocate(this%G33(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
232 allocate(this%G12(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
233 allocate(this%G13(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
234 allocate(this%G23(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
235
236 allocate(this%dxdr(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
237 allocate(this%dxds(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
238 allocate(this%dxdt(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
239
240 allocate(this%dydr(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
241 allocate(this%dyds(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
242 allocate(this%dydt(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
243
244 allocate(this%dzdr(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
245 allocate(this%dzds(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
246 allocate(this%dzdt(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
247
248 allocate(this%drdx(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
249 allocate(this%dsdx(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
250 allocate(this%dtdx(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
251
252 allocate(this%drdy(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
253 allocate(this%dsdy(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
254 allocate(this%dtdy(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
255
256 allocate(this%drdz(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
257 allocate(this%dsdz(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
258 allocate(this%dtdz(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
259
260 allocate(this%jac(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
261 allocate(this%jacinv(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
262
263 allocate(this%area(this%Xh%lx, this%Xh%ly, 6, this%msh%nelv))
264 allocate(this%nx(this%Xh%lx, this%Xh%ly, 6, this%msh%nelv))
265 allocate(this%ny(this%Xh%lx, this%Xh%ly, 6, this%msh%nelv))
266 allocate(this%nz(this%Xh%lx, this%Xh%ly, 6, this%msh%nelv))
267
268 allocate(this%B(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
269 allocate(this%Binv(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
270
271 allocate(this%h1(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
272 allocate(this%h2(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
273
274 allocate(this%mult(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
275
276
277 !
278 ! Setup device memory (if present)
279 !
280
281 n = this%Xh%lx * this%Xh%ly * this%Xh%lz * this%msh%nelv
282 if (neko_bcknd_device .eq. 1) then
283 call device_map(this%G11, this%G11_d, n)
284 call device_map(this%G22, this%G22_d, n)
285 call device_map(this%G33, this%G33_d, n)
286 call device_map(this%G12, this%G12_d, n)
287 call device_map(this%G13, this%G13_d, n)
288 call device_map(this%G23, this%G23_d, n)
289
290 call device_map(this%dxdr, this%dxdr_d, n)
291 call device_map(this%dydr, this%dydr_d, n)
292 call device_map(this%dzdr, this%dzdr_d, n)
293
294 call device_map(this%dxds, this%dxds_d, n)
295 call device_map(this%dyds, this%dyds_d, n)
296 call device_map(this%dzds, this%dzds_d, n)
297
298 call device_map(this%dxdt, this%dxdt_d, n)
299 call device_map(this%dydt, this%dydt_d, n)
300 call device_map(this%dzdt, this%dzdt_d, n)
301
302 call device_map(this%drdx, this%drdx_d, n)
303 call device_map(this%drdy, this%drdy_d, n)
304 call device_map(this%drdz, this%drdz_d, n)
305
306 call device_map(this%dsdx, this%dsdx_d, n)
307 call device_map(this%dsdy, this%dsdy_d, n)
308 call device_map(this%dsdz, this%dsdz_d, n)
309
310 call device_map(this%dtdx, this%dtdx_d, n)
311 call device_map(this%dtdy, this%dtdy_d, n)
312 call device_map(this%dtdz, this%dtdz_d, n)
313
314 call device_map(this%mult, this%mult_d, n)
315 call device_map(this%h1, this%h1_d, n)
316 call device_map(this%h2, this%h2_d, n)
317
318 call device_map(this%jac, this%jac_d, n)
319 call device_map(this%jacinv, this%jacinv_d, n)
320 call device_map(this%B, this%B_d, n)
321 call device_map(this%Binv, this%Binv_d, n)
322
323 m = this%Xh%lx * this%Xh%ly * 6 * this%msh%nelv
324
325 call device_map(this%area, this%area_d, m)
326 call device_map(this%nx, this%nx_d, m)
327 call device_map(this%ny, this%ny_d, m)
328 call device_map(this%nz, this%nz_d, m)
329
330 end if
331
332 call coef_generate_dxyzdrst(this)
333
334 call coef_generate_geo(this)
335
337
338 call coef_generate_mass(this)
339
340
341 ! This is a placeholder, just for now
342 ! We can probably find a prettier solution
343 if (neko_bcknd_device .eq. 1) then
344 call device_rone(this%h1_d, n)
345 call device_rone(this%h2_d, n)
346 call device_memcpy(this%h1, this%h1_d, n, &
347 device_to_host, sync=.false.)
348 call device_memcpy(this%h2, this%h2_d, n, &
349 device_to_host, sync=.false.)
350 else
351 call rone(this%h1,n)
352 call rone(this%h2,n)
353 end if
354
355 this%ifh2 = .false.
356
357 !
358 ! Set up multiplicity
359 !
360 if (neko_bcknd_device .eq. 1) then
361 call device_rone(this%mult_d, n)
362 else
363 call rone(this%mult, n)
364 end if
365
366 call gs_h%op(this%mult, n, gs_op_add)
367
368 if (neko_bcknd_device .eq. 1) then
369 call device_invcol1(this%mult_d, n)
370 call device_memcpy(this%mult, this%mult_d, n, &
371 device_to_host, sync=.true.)
372 else
373 call invcol1(this%mult, n)
374 end if
375
376 ncyc = this%msh%periodic%size * this%Xh%lx * this%Xh%lx
377 allocate(this%cyc_msk(0:ncyc))
378 allocate(this%R11(ncyc))
379 allocate(this%R12(ncyc))
380 if (neko_bcknd_device .eq. 1) then
381 call device_map(this%cyc_msk, this%cyc_msk_d, ncyc+1)
382 call device_map(this%R11, this%R11_d, ncyc)
383 call device_map(this%R12, this%R12_d, ncyc)
384 end if
385 call coef_generate_cyclic_bc(this)
386 end subroutine coef_init_all
387
389 subroutine coef_free(this)
390 class(coef_t), intent(inout) :: this
391
392 if (allocated(this%G11)) then
393 deallocate(this%G11)
394 end if
395
396 if (allocated(this%G22)) then
397 deallocate(this%G22)
398 end if
399
400 if (allocated(this%G33)) then
401 deallocate(this%G33)
402 end if
403
404 if (allocated(this%G12)) then
405 deallocate(this%G12)
406 end if
407
408 if (allocated(this%G13)) then
409 deallocate(this%G13)
410 end if
411
412 if (allocated(this%G23)) then
413 deallocate(this%G23)
414 end if
415
416 if (allocated(this%mult)) then
417 deallocate(this%mult)
418 end if
419
420 if (allocated(this%B)) then
421 deallocate(this%B)
422 end if
423
424 if (allocated(this%Binv)) then
425 deallocate(this%Binv)
426 end if
427
428 if(allocated(this%dxdr)) then
429 deallocate(this%dxdr)
430 end if
431
432 if(allocated(this%dxds)) then
433 deallocate(this%dxds)
434 end if
435
436 if(allocated(this%dxdt)) then
437 deallocate(this%dxdt)
438 end if
439
440 if(allocated(this%dydr)) then
441 deallocate(this%dydr)
442 end if
443
444 if(allocated(this%dyds)) then
445 deallocate(this%dyds)
446 end if
447
448 if(allocated(this%dydt)) then
449 deallocate(this%dydt)
450 end if
451
452 if(allocated(this%dzdr)) then
453 deallocate(this%dzdr)
454 end if
455
456 if(allocated(this%dzds)) then
457 deallocate(this%dzds)
458 end if
459
460 if(allocated(this%dzdt)) then
461 deallocate(this%dzdt)
462 end if
463
464 if(allocated(this%drdx)) then
465 deallocate(this%drdx)
466 end if
467
468 if(allocated(this%dsdx)) then
469 deallocate(this%dsdx)
470 end if
471
472 if(allocated(this%dtdx)) then
473 deallocate(this%dtdx)
474 end if
475
476 if(allocated(this%drdy)) then
477 deallocate(this%drdy)
478 end if
479
480 if(allocated(this%dsdy)) then
481 deallocate(this%dsdy)
482 end if
483
484 if(allocated(this%dtdy)) then
485 deallocate(this%dtdy)
486 end if
487
488 if(allocated(this%drdz)) then
489 deallocate(this%drdz)
490 end if
491
492 if(allocated(this%dsdz)) then
493 deallocate(this%dsdz)
494 end if
495
496 if(allocated(this%dtdz)) then
497 deallocate(this%dtdz)
498 end if
499
500 if(allocated(this%jac)) then
501 deallocate(this%jac)
502 end if
503
504 if(allocated(this%jacinv)) then
505 deallocate(this%jacinv)
506 end if
507
508 if(allocated(this%h1)) then
509 deallocate(this%h1)
510 end if
511
512 if(allocated(this%h2)) then
513 deallocate(this%h2)
514 end if
515
516 if (allocated(this%area)) then
517 deallocate(this%area)
518 end if
519
520 if (allocated(this%nx)) then
521 deallocate(this%nx)
522 end if
523
524 if (allocated(this%ny)) then
525 deallocate(this%ny)
526 end if
527
528 if (allocated(this%nz)) then
529 deallocate(this%nz)
530 end if
531
532 if (allocated(this%cyc_msk)) then
533 deallocate(this%cyc_msk)
534 end if
535
536 if (allocated(this%R11)) then
537 deallocate(this%R11)
538 end if
539
540 if (allocated(this%R12)) then
541 deallocate(this%R12)
542 end if
543
544
545 nullify(this%msh)
546 nullify(this%Xh)
547 nullify(this%dof)
548 nullify(this%gs_h)
549
550 !
551 ! Cleanup the device (if present)
552 !
553
554 if (c_associated(this%G11_d)) then
555 call device_free(this%G11_d)
556 end if
557
558 if (c_associated(this%G22_d)) then
559 call device_free(this%G22_d)
560 end if
561
562 if (c_associated(this%G33_d)) then
563 call device_free(this%G33_d)
564 end if
565
566 if (c_associated(this%G12_d)) then
567 call device_free(this%G12_d)
568 end if
569
570 if (c_associated(this%G13_d)) then
571 call device_free(this%G13_d)
572 end if
573
574 if (c_associated(this%G23_d)) then
575 call device_free(this%G23_d)
576 end if
577
578 if (c_associated(this%dxdr_d)) then
579 call device_free(this%dxdr_d)
580 end if
581
582 if (c_associated(this%dydr_d)) then
583 call device_free(this%dydr_d)
584 end if
585
586 if (c_associated(this%dzdr_d)) then
587 call device_free(this%dzdr_d)
588 end if
589
590 if (c_associated(this%dxds_d)) then
591 call device_free(this%dxds_d)
592 end if
593
594 if (c_associated(this%dyds_d)) then
595 call device_free(this%dyds_d)
596 end if
597
598 if (c_associated(this%dzds_d)) then
599 call device_free(this%dzds_d)
600 end if
601
602 if (c_associated(this%dxdt_d)) then
603 call device_free(this%dxdt_d)
604 end if
605
606 if (c_associated(this%dydt_d)) then
607 call device_free(this%dydt_d)
608 end if
609
610 if (c_associated(this%dzdt_d)) then
611 call device_free(this%dzdt_d)
612 end if
613
614 if (c_associated(this%drdx_d)) then
615 call device_free(this%drdx_d)
616 end if
617
618 if (c_associated(this%drdy_d)) then
619 call device_free(this%drdy_d)
620 end if
621
622 if (c_associated(this%drdz_d)) then
623 call device_free(this%drdz_d)
624 end if
625
626 if (c_associated(this%dsdx_d)) then
627 call device_free(this%dsdx_d)
628 end if
629
630 if (c_associated(this%dsdy_d)) then
631 call device_free(this%dsdy_d)
632 end if
633
634 if (c_associated(this%dsdz_d)) then
635 call device_free(this%dsdz_d)
636 end if
637
638 if (c_associated(this%dtdx_d)) then
639 call device_free(this%dtdx_d)
640 end if
641
642 if (c_associated(this%dtdy_d)) then
643 call device_free(this%dtdy_d)
644 end if
645
646 if (c_associated(this%dtdz_d)) then
647 call device_free(this%dtdz_d)
648 end if
649
650 if (c_associated(this%mult_d)) then
651 call device_free(this%mult_d)
652 end if
653
654 if (c_associated(this%h1_d)) then
655 call device_free(this%h1_d)
656 end if
657
658 if (c_associated(this%h2_d)) then
659 call device_free(this%h2_d)
660 end if
661
662 if (c_associated(this%jac_d)) then
663 call device_free(this%jac_d)
664 end if
665
666 if (c_associated(this%jacinv_d)) then
667 call device_free(this%jacinv_d)
668 end if
669
670 if (c_associated(this%B_d)) then
671 call device_free(this%B_d)
672 end if
673
674 if (c_associated(this%Binv_d)) then
675 call device_free(this%Binv_d)
676 end if
677
678 if (c_associated(this%area_d)) then
679 call device_free(this%area_d)
680 end if
681
682 if (c_associated(this%nx_d)) then
683 call device_free(this%nx_d)
684 end if
685
686 if (c_associated(this%ny_d)) then
687 call device_free(this%ny_d)
688 end if
689
690 if (c_associated(this%nz_d)) then
691 call device_free(this%nz_d)
692 end if
693
694 if (c_associated(this%cyc_msk_d)) then
695 call device_free(this%cyc_msk_d)
696 end if
697
698 if (c_associated(this%R11_d)) then
699 call device_free(this%R11_d)
700 end if
701
702 if (c_associated(this%R12_d)) then
703 call device_free(this%R12_d)
704 end if
705
706
707 end subroutine coef_free
708
710 type(coef_t), intent(inout) :: c
711 integer :: e,i,lxy,lyz,ntot
712
713 lxy=c%Xh%lx*c%Xh%ly
714 lyz=c%Xh%ly*c%Xh%lz
715 ntot = c%dof%size()
716
717 associate(drdx => c%drdx, drdy => c%drdy, drdz => c%drdz, &
718 dsdx => c%dsdx, dsdy => c%dsdy, dsdz => c%dsdz, &
719 dtdx => c%dtdx, dtdy => c%dtdy, dtdz => c%dtdz, &
720 dxdr => c%dxdr, dydr => c%dydr, dzdr => c%dzdr, &
721 dxds => c%dxds, dyds => c%dyds, dzds => c%dzds, &
722 dxdt => c%dxdt, dydt => c%dydt, dzdt => c%dzdt, &
723 dx => c%Xh%dx, dy => c%Xh%dy, dz => c%Xh%dz, &
724 x => c%dof%x, y => c%dof%y, z => c%dof%z, &
725 lx => c%Xh%lx, ly => c%Xh%ly, lz => c%Xh%lz, &
726 dyt => c%Xh%dyt, dzt => c%Xh%dzt, &
727 jacinv => c%jacinv, jac => c%jac)
728
729 if (neko_bcknd_device .eq. 1) then
730
731 call device_coef_generate_dxydrst(c%drdx_d, c%drdy_d, c%drdz_d, &
732 c%dsdx_d, c%dsdy_d, c%dsdz_d, c%dtdx_d, c%dtdy_d, c%dtdz_d, &
733 c%dxdr_d, c%dydr_d, c%dzdr_d, c%dxds_d, c%dyds_d, c%dzds_d, &
734 c%dxdt_d, c%dydt_d, c%dzdt_d, c%Xh%dx_d, c%Xh%dy_d, c%Xh%dz_d, &
735 c%dof%x_d, c%dof%y_d, c%dof%z_d, c%jacinv_d, c%jac_d, &
736 c%Xh%lx, c%msh%nelv)
737
738 call device_memcpy(dxdr, c%dxdr_d, ntot, device_to_host, sync=.false.)
739 call device_memcpy(dydr, c%dydr_d, ntot, device_to_host, sync=.false.)
740 call device_memcpy(dzdr, c%dzdr_d, ntot, device_to_host, sync=.false.)
741 call device_memcpy(dxds, c%dxds_d, ntot, device_to_host, sync=.false.)
742 call device_memcpy(dyds, c%dyds_d, ntot, device_to_host, sync=.false.)
743 call device_memcpy(dzds, c%dzds_d, ntot, device_to_host, sync=.false.)
744 call device_memcpy(dxdt, c%dxdt_d, ntot, device_to_host, sync=.false.)
745 call device_memcpy(dydt, c%dydt_d, ntot, device_to_host, sync=.false.)
746 call device_memcpy(dzdt, c%dzdt_d, ntot, device_to_host, sync=.false.)
747 call device_memcpy(drdx, c%drdx_d, ntot, device_to_host, sync=.false.)
748 call device_memcpy(drdy, c%drdy_d, ntot, device_to_host, sync=.false.)
749 call device_memcpy(drdz, c%drdz_d, ntot, device_to_host, sync=.false.)
750 call device_memcpy(dsdx, c%dsdx_d, ntot, device_to_host, sync=.false.)
751 call device_memcpy(dsdy, c%dsdy_d, ntot, device_to_host, sync=.false.)
752 call device_memcpy(dsdz, c%dsdz_d, ntot, device_to_host, sync=.false.)
753 call device_memcpy(dtdx, c%dtdx_d, ntot, device_to_host, sync=.false.)
754 call device_memcpy(dtdy, c%dtdy_d, ntot, device_to_host, sync=.false.)
755 call device_memcpy(dtdz, c%dtdz_d, ntot, device_to_host, sync=.false.)
756 call device_memcpy(jac, c%jac_d, ntot, device_to_host, sync=.false.)
757 call device_memcpy(jacinv, c%jacinv_d, ntot, &
758 device_to_host, sync=.true.)
759
760 else
761 do e = 1, c%msh%nelv
762 call mxm(dx, lx, x(1,1,1,e), lx, dxdr(1,1,1,e), lyz)
763 call mxm(dx, lx, y(1,1,1,e), lx, dydr(1,1,1,e), lyz)
764 call mxm(dx, lx, z(1,1,1,e), lx, dzdr(1,1,1,e), lyz)
765
766 do i = 1, lz
767 call mxm(x(1,1,i,e), lx, dyt, ly, dxds(1,1,i,e), ly)
768 call mxm(y(1,1,i,e), lx, dyt, ly, dyds(1,1,i,e), ly)
769 call mxm(z(1,1,i,e), lx, dyt, ly, dzds(1,1,i,e), ly)
770 end do
771
772 ! We actually take 2d into account, wow, need to do that for the rest.
773 if(c%msh%gdim .eq. 3) then
774 call mxm(x(1,1,1,e), lxy, dzt, lz, dxdt(1,1,1,e), lz)
775 call mxm(y(1,1,1,e), lxy, dzt, lz, dydt(1,1,1,e), lz)
776 call mxm(z(1,1,1,e), lxy, dzt, lz, dzdt(1,1,1,e), lz)
777 else
778 call rzero(dxdt(1,1,1,e), lxy)
779 call rzero(dydt(1,1,1,e), lxy)
780 call rone(dzdt(1,1,1,e), lxy)
781 end if
782 end do
783
784 if (c%msh%gdim .eq. 2) then
785 call rzero (jac, ntot)
786 call addcol3 (jac, dxdr, dyds, ntot)
787 call subcol3 (jac, dxds, dydr, ntot)
788 call copy (drdx, dyds, ntot)
789 call copy (drdy, dxds, ntot)
790 call chsign (drdy, ntot)
791 call copy (dsdx, dydr, ntot)
792 call chsign (dsdx, ntot)
793 call copy (dsdy, dxdr, ntot)
794 call rzero (drdz, ntot)
795 call rzero (dsdz, ntot)
796 call rone (dtdz, ntot)
797 else
798
799 do i = 1, ntot
800 c%jac(i, 1, 1, 1) = 0.0_rp
801 end do
802
803 do i = 1, ntot
804 c%jac(i, 1, 1, 1) = c%jac(i, 1, 1, 1) + ( c%dxdr(i, 1, 1, 1) &
805 * c%dyds(i, 1, 1, 1) * c%dzdt(i, 1, 1, 1) )
806
807 c%jac(i, 1, 1, 1) = c%jac(i, 1, 1, 1) + ( c%dxdt(i, 1, 1, 1) &
808 * c%dydr(i, 1, 1, 1) * c%dzds(i, 1, 1, 1) )
809
810 c%jac(i, 1, 1, 1) = c%jac(i, 1, 1, 1) + ( c%dxds(i, 1, 1, 1) &
811 * c%dydt(i, 1, 1, 1) * c%dzdr(i, 1, 1, 1) )
812 end do
813
814 do i = 1, ntot
815 c%jac(i, 1, 1, 1) = c%jac(i, 1, 1, 1) - ( c%dxdr(i, 1, 1, 1) &
816 * c%dydt(i, 1, 1, 1) * c%dzds(i, 1, 1, 1) )
817
818 c%jac(i, 1, 1, 1) = c%jac(i, 1, 1, 1) - ( c%dxds(i, 1, 1, 1) &
819 * c%dydr(i, 1, 1, 1) * c%dzdt(i, 1, 1, 1) )
820
821 c%jac(i, 1, 1, 1) = c%jac(i, 1, 1, 1) - ( c%dxdt(i, 1, 1, 1) &
822 * c%dyds(i, 1, 1, 1) * c%dzdr(i, 1, 1, 1) )
823 end do
824
825 do i = 1, ntot
826 c%drdx(i, 1, 1, 1) = c%dyds(i, 1, 1, 1) * c%dzdt(i, 1, 1, 1) &
827 - c%dydt(i, 1, 1, 1) * c%dzds(i, 1, 1, 1)
828
829 c%drdy(i, 1, 1, 1) = c%dxdt(i, 1, 1, 1) * c%dzds(i, 1, 1, 1) &
830 - c%dxds(i, 1, 1, 1) * c%dzdt(i, 1, 1, 1)
831
832 c%drdz(i, 1, 1, 1) = c%dxds(i, 1, 1, 1) * c%dydt(i, 1, 1, 1) &
833 - c%dxdt(i, 1, 1, 1) * c%dyds(i, 1, 1, 1)
834 end do
835
836 do i = 1, ntot
837 c%dsdx(i, 1, 1, 1) = c%dydt(i, 1, 1, 1) * c%dzdr(i, 1, 1, 1) &
838 - c%dydr(i, 1, 1, 1) * c%dzdt(i, 1, 1, 1)
839
840 c%dsdy(i, 1, 1, 1) = c%dxdr(i, 1, 1, 1) * c%dzdt(i, 1, 1, 1) &
841 - c%dxdt(i, 1, 1, 1) * c%dzdr(i, 1, 1, 1)
842
843 c%dsdz(i, 1, 1, 1) = c%dxdt(i, 1, 1, 1) * c%dydr(i, 1, 1, 1) &
844 - c%dxdr(i, 1, 1, 1) * c%dydt(i, 1, 1, 1)
845 end do
846
847 do i = 1, ntot
848 c%dtdx(i, 1, 1, 1) = c%dydr(i, 1, 1, 1) * c%dzds(i, 1, 1, 1) &
849 - c%dyds(i, 1, 1, 1) * c%dzdr(i, 1, 1, 1)
850
851 c%dtdy(i, 1, 1, 1) = c%dxds(i, 1, 1, 1) * c%dzdr(i, 1, 1, 1) &
852 - c%dxdr(i, 1, 1, 1) * c%dzds(i, 1, 1, 1)
853
854 c%dtdz(i, 1, 1, 1) = c%dxdr(i, 1, 1, 1) * c%dyds(i, 1, 1, 1) &
855 - c%dxds(i, 1, 1, 1) * c%dydr(i, 1, 1, 1)
856 end do
857
858 end if
859 call invers2(jacinv, jac, ntot)
860 end if
861 end associate
862
863 end subroutine coef_generate_dxyzdrst
864
867 subroutine coef_generate_geo(c)
868 type(coef_t), intent(inout) :: c
869 integer :: e, i, lxyz, ntot
870
871 lxyz = c%Xh%lx * c%Xh%ly * c%Xh%lz
872 ntot = c%dof%size()
873
874 if (neko_bcknd_device .eq. 1) then
875
876 call device_coef_generate_geo(c%G11_d, c%G12_d, c%G13_d, &
877 c%G22_d, c%G23_d, c%G33_d, &
878 c%drdx_d, c%drdy_d, c%drdz_d, &
879 c%dsdx_d, c%dsdy_d, c%dsdz_d, &
880 c%dtdx_d, c%dtdy_d, c%dtdz_d, &
881 c%jacinv_d, c%Xh%w3_d, c%msh%nelv, &
882 c%Xh%lx, c%msh%gdim)
883
884 call device_memcpy(c%G11, c%G11_d, ntot, device_to_host, sync=.false.)
885 call device_memcpy(c%G22, c%G22_d, ntot, device_to_host, sync=.false.)
886 call device_memcpy(c%G33, c%G33_d, ntot, device_to_host, sync=.false.)
887 call device_memcpy(c%G12, c%G12_d, ntot, device_to_host, sync=.false.)
888 call device_memcpy(c%G13, c%G13_d, ntot, device_to_host, sync=.false.)
889 call device_memcpy(c%G23, c%G23_d, ntot, device_to_host, sync=.true.)
890
891 else
892 if(c%msh%gdim .eq. 2) then
893
894 do i = 1, ntot
895 c%G11(i, 1, 1, 1) = c%drdx(i, 1, 1, 1) * c%drdx(i, 1, 1, 1) &
896 + c%drdy(i, 1, 1, 1) * c%drdy(i, 1, 1, 1)
897
898 c%G22(i, 1, 1, 1) = c%dsdx(i, 1, 1, 1) * c%dsdx(i, 1, 1, 1) &
899 + c%dsdy(i, 1, 1, 1) * c%dsdy(i, 1, 1, 1)
900
901 c%G12(i, 1, 1, 1) = c%drdx(i, 1, 1, 1) * c%dsdx(i, 1, 1, 1) &
902 + c%drdy(i, 1, 1, 1) * c%dsdy(i, 1, 1, 1)
903 end do
904
905 do i = 1, ntot
906 c%G11(i, 1, 1, 1) = c%G11(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
907 c%G22(i, 1, 1, 1) = c%G22(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
908 c%G12(i, 1, 1, 1) = c%G12(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
909 c%G33(i, 1, 1, 1) = 0.0_rp
910 c%G13(i, 1, 1, 1) = 0.0_rp
911 c%G23(i, 1, 1, 1) = 0.0_rp
912 end do
913
914 do concurrent(e = 1:c%msh%nelv)
915 do concurrent(i = 1:lxyz)
916 c%G11(i,1,1,e) = c%G11(i,1,1,e) * c%Xh%w3(i,1,1)
917 c%G22(i,1,1,e) = c%G22(i,1,1,e) * c%Xh%w3(i,1,1)
918 c%G12(i,1,1,e) = c%G12(i,1,1,e) * c%Xh%w3(i,1,1)
919 end do
920 end do
921
922 else
923
924 do i = 1, ntot
925 c%G11(i, 1, 1, 1) = c%drdx(i, 1, 1, 1) * c%drdx(i, 1, 1, 1) &
926 + c%drdy(i, 1, 1, 1) * c%drdy(i, 1, 1, 1) &
927 + c%drdz(i, 1, 1, 1) * c%drdz(i, 1, 1, 1)
928
929 c%G22(i, 1, 1, 1) = c%dsdx(i, 1, 1, 1) * c%dsdx(i, 1, 1, 1) &
930 + c%dsdy(i, 1, 1, 1) * c%dsdy(i, 1, 1, 1) &
931 + c%dsdz(i, 1, 1, 1) * c%dsdz(i, 1, 1, 1)
932
933 c%G33(i, 1, 1, 1) = c%dtdx(i, 1, 1, 1) * c%dtdx(i, 1, 1, 1) &
934 + c%dtdy(i, 1, 1, 1) * c%dtdy(i, 1, 1, 1) &
935 + c%dtdz(i, 1, 1, 1) * c%dtdz(i, 1, 1, 1)
936 end do
937
938 do i = 1, ntot
939 c%G11(i, 1, 1, 1) = c%G11(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
940 c%G22(i, 1, 1, 1) = c%G22(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
941 c%G33(i, 1, 1, 1) = c%G33(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
942 end do
943
944 do i = 1, ntot
945 c%G12(i, 1, 1, 1) = c%drdx(i, 1, 1, 1) * c%dsdx(i, 1, 1, 1) &
946 + c%drdy(i, 1, 1, 1) * c%dsdy(i, 1, 1, 1) &
947 + c%drdz(i, 1, 1, 1) * c%dsdz(i, 1, 1, 1)
948
949 c%G13(i, 1, 1, 1) = c%drdx(i, 1, 1, 1) * c%dtdx(i, 1, 1, 1) &
950 + c%drdy(i, 1, 1, 1) * c%dtdy(i, 1, 1, 1) &
951 + c%drdz(i, 1, 1, 1) * c%dtdz(i, 1, 1, 1)
952
953 c%G23(i, 1, 1, 1) = c%dsdx(i, 1, 1, 1) * c%dtdx(i, 1, 1, 1) &
954 + c%dsdy(i, 1, 1, 1) * c%dtdy(i, 1, 1, 1) &
955 + c%dsdz(i, 1, 1, 1) * c%dtdz(i, 1, 1, 1)
956 end do
957
958 do i = 1, ntot
959 c%G12(i, 1, 1, 1) = c%G12(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
960 c%G13(i, 1, 1, 1) = c%G13(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
961 c%G23(i, 1, 1, 1) = c%G23(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
962 end do
963
964 do concurrent(e = 1:c%msh%nelv)
965 do concurrent(i = 1:lxyz)
966 c%G11(i,1,1,e) = c%G11(i,1,1,e) * c%Xh%w3(i,1,1)
967 c%G22(i,1,1,e) = c%G22(i,1,1,e) * c%Xh%w3(i,1,1)
968 c%G12(i,1,1,e) = c%G12(i,1,1,e) * c%Xh%w3(i,1,1)
969
970 c%G33(i,1,1,e) = c%G33(i,1,1,e) * c%Xh%w3(i,1,1)
971 c%G13(i,1,1,e) = c%G13(i,1,1,e) * c%Xh%w3(i,1,1)
972 c%G23(i,1,1,e) = c%G23(i,1,1,e) * c%Xh%w3(i,1,1)
973 end do
974 end do
975
976 end if
977 end if
978
979 end subroutine coef_generate_geo
980
983 subroutine coef_generate_mass(c)
984 type(coef_t), intent(inout) :: c
985 integer :: e, i, lxyz, ntot
986
987 lxyz = c%Xh%lx * c%Xh%ly * c%Xh%lz
988 ntot = c%dof%size()
989
991 do concurrent(e = 1:c%msh%nelv)
992 ! Here we need to handle things differently for axis symmetric elements
993 do concurrent(i = 1:lxyz)
994 c%B(i,1,1,e) = c%jac(i,1,1,e) * c%Xh%w3(i,1,1)
995 c%Binv(i,1,1,e) = c%B(i,1,1,e)
996 end do
997 end do
998
999 if (neko_bcknd_device .eq. 1) then
1000 call device_memcpy(c%B, c%B_d, ntot, host_to_device, sync=.false.)
1001 call device_memcpy(c%Binv, c%Binv_d, ntot, host_to_device, sync=.false.)
1002 end if
1003
1004 call c%gs_h%op(c%Binv, ntot, gs_op_add)
1005
1006 if (neko_bcknd_device .eq. 1) then
1007 call device_invcol1(c%Binv_d, ntot)
1008 call device_memcpy(c%Binv, c%Binv_d, ntot, device_to_host, sync=.true.)
1009 else
1010 call invcol1(c%Binv, ntot)
1011 end if
1012
1014 if (neko_bcknd_device .eq. 1) then
1015 c%volume = device_glsum(c%B_d, ntot)
1016 else
1017 c%volume = glsum(c%B, ntot)
1018 end if
1019
1020 end subroutine coef_generate_mass
1021
1022 pure function coef_get_normal(this, i, j, k, e, facet) result(normal)
1023 class(coef_t), intent(in) :: this
1024 integer, intent(in) :: i, j, k, e, facet
1025 real(kind=rp) :: normal(3)
1026
1027 select case (facet)
1028 case(1,2)
1029 normal(1) = this%nx(j, k, facet, e)
1030 normal(2) = this%ny(j, k, facet, e)
1031 normal(3) = this%nz(j, k, facet, e)
1032 case(3,4)
1033 normal(1) = this%nx(i, k, facet, e)
1034 normal(2) = this%ny(i, k, facet, e)
1035 normal(3) = this%nz(i, k, facet, e)
1036 case(5,6)
1037 normal(1) = this%nx(i, j, facet, e)
1038 normal(2) = this%ny(i, j, facet, e)
1039 normal(3) = this%nz(i, j, facet, e)
1040 end select
1041 end function coef_get_normal
1042
1043 pure function coef_get_area(this, i, j, k, e, facet) result(area)
1044 class(coef_t), intent(in) :: this
1045 integer, intent(in) :: i, j, k, e, facet
1046 real(kind=rp) :: area
1047
1048 select case (facet)
1049 case(1,2)
1050 area = this%area(j, k, facet, e)
1051 case(3,4)
1052 area = this%area(i, k, facet, e)
1053 case(5,6)
1054 area = this%area(i, j, facet, e)
1055 end select
1056 end function coef_get_area
1057
1058
1061 type(coef_t), intent(inout) :: coef
1062 real(kind=rp), allocatable :: a(:,:,:,:)
1063 real(kind=rp), allocatable :: b(:,:,:,:)
1064 real(kind=rp), allocatable :: c(:,:,:,:)
1065 real(kind=rp), allocatable :: dot(:,:,:,:)
1066 integer :: n, e, i, j, k, lx
1067 real(kind=rp) :: weight, len
1068 n = coef%dof%size()
1069 lx = coef%Xh%lx
1070
1071 allocate(a(coef%Xh%lx, coef%Xh%lx, coef%Xh%lx, coef%msh%nelv))
1072 allocate(b(coef%Xh%lx, coef%Xh%lx, coef%Xh%lx, coef%msh%nelv))
1073 allocate(c(coef%Xh%lx, coef%Xh%lx, coef%Xh%lx, coef%msh%nelv))
1074 allocate(dot(coef%Xh%lx, coef%Xh%lx, coef%Xh%lx, coef%msh%nelv))
1075
1076 ! ds x dt
1077 do i = 1, n
1078 a(i, 1, 1, 1) = coef%dyds(i, 1, 1, 1) * coef%dzdt(i, 1, 1, 1) &
1079 - coef%dzds(i, 1, 1, 1) * coef%dydt(i, 1, 1, 1)
1080
1081 b(i, 1, 1, 1) = coef%dzds(i, 1, 1, 1) * coef%dxdt(i, 1, 1, 1) &
1082 - coef%dxds(i, 1, 1, 1) * coef%dzdt(i, 1, 1, 1)
1083
1084 c(i, 1, 1, 1) = coef%dxds(i, 1, 1, 1) * coef%dydt(i, 1, 1, 1) &
1085 - coef%dyds(i, 1, 1, 1) * coef%dxdt(i, 1, 1, 1)
1086 end do
1087
1088 do i = 1, n
1089 dot(i, 1, 1, 1) = a(i, 1, 1, 1) * a(i, 1, 1, 1) &
1090 + b(i, 1, 1, 1) * b(i, 1, 1, 1) &
1091 + c(i, 1, 1, 1) * c(i, 1, 1, 1)
1092 end do
1093
1094 do concurrent(e = 1:coef%msh%nelv)
1095 do concurrent(k = 1:coef%Xh%lx)
1096 do concurrent(j = 1:coef%Xh%lx)
1097 weight = coef%Xh%wy(j) * coef%Xh%wz(k)
1098 coef%area(j, k, 2, e) = sqrt(dot(lx, j, k, e)) * weight
1099 coef%area(j, k, 1, e) = sqrt(dot(1, j, k, e)) * weight
1100 coef%nx(j,k, 1, e) = -a(1, j, k, e)
1101 coef%nx(j,k, 2, e) = a(lx, j, k, e)
1102 coef%ny(j,k, 1, e) = -b(1, j, k, e)
1103 coef%ny(j,k, 2, e) = b(lx, j, k, e)
1104 coef%nz(j,k, 1, e) = -c(1, j, k, e)
1105 coef%nz(j,k, 2, e) = c(lx, j, k, e)
1106 end do
1107 end do
1108 end do
1109
1110 ! dr x dt
1111 do i = 1, n
1112 a(i, 1, 1, 1) = coef%dydr(i, 1, 1, 1) * coef%dzdt(i, 1, 1, 1) &
1113 - coef%dzdr(i, 1, 1, 1) * coef%dydt(i, 1, 1, 1)
1114
1115 b(i, 1, 1, 1) = coef%dzdr(i, 1, 1, 1) * coef%dxdt(i, 1, 1, 1) &
1116 - coef%dxdr(i, 1, 1, 1) * coef%dzdt(i, 1, 1, 1)
1117
1118 c(i, 1, 1, 1) = coef%dxdr(i, 1, 1, 1) * coef%dydt(i, 1, 1, 1) &
1119 - coef%dydr(i, 1, 1, 1) * coef%dxdt(i, 1, 1, 1)
1120 end do
1121
1122 do i = 1, n
1123 dot(i, 1, 1, 1) = a(i, 1, 1, 1) * a(i, 1, 1, 1) &
1124 + b(i, 1, 1, 1) * b(i, 1, 1, 1) &
1125 + c(i, 1, 1, 1) * c(i, 1, 1, 1)
1126 end do
1127
1128 do concurrent(e = 1:coef%msh%nelv)
1129 do concurrent(k = 1:coef%Xh%lx)
1130 do concurrent(j = 1:coef%Xh%lx)
1131 weight = coef%Xh%wx(j) * coef%Xh%wz(k)
1132 coef%area(j, k, 3, e) = sqrt(dot(j, 1, k, e)) * weight
1133 coef%area(j, k, 4, e) = sqrt(dot(j, lx, k, e)) * weight
1134 coef%nx(j,k, 3, e) = a(j, 1, k, e)
1135 coef%nx(j,k, 4, e) = -a(j, lx, k, e)
1136 coef%ny(j,k, 3, e) = b(j, 1, k, e)
1137 coef%ny(j,k, 4, e) = -b(j, lx, k, e)
1138 coef%nz(j,k, 3, e) = c(j, 1, k, e)
1139 coef%nz(j,k, 4, e) = -c(j, lx, k, e)
1140 end do
1141 end do
1142 end do
1143
1144 ! dr x ds
1145 do i = 1, n
1146 a(i, 1, 1, 1) = coef%dydr(i, 1, 1, 1) * coef%dzds(i, 1, 1, 1) &
1147 - coef%dzdr(i, 1, 1, 1) * coef%dyds(i, 1, 1, 1)
1148
1149 b(i, 1, 1, 1) = coef%dzdr(i, 1, 1, 1) * coef%dxds(i, 1, 1, 1) &
1150 - coef%dxdr(i, 1, 1, 1) * coef%dzds(i, 1, 1, 1)
1151
1152 c(i, 1, 1, 1) = coef%dxdr(i, 1, 1, 1) * coef%dyds(i, 1, 1, 1) &
1153 - coef%dydr(i, 1, 1, 1) * coef%dxds(i, 1, 1, 1)
1154 end do
1155
1156 do i = 1, n
1157 dot(i, 1, 1, 1) = a(i, 1, 1, 1) * a(i, 1, 1, 1) &
1158 + b(i, 1, 1, 1) * b(i, 1, 1, 1) &
1159 + c(i, 1, 1, 1) * c(i, 1, 1, 1)
1160 end do
1161
1162 do concurrent(e = 1:coef%msh%nelv)
1163 do concurrent(k = 1:coef%Xh%lx)
1164 do concurrent(j = 1:coef%Xh%lx)
1165 weight = coef%Xh%wx(j) * coef%Xh%wy(k)
1166 coef%area(j, k, 5, e) = sqrt(dot(j, k, 1, e)) * weight
1167 coef%area(j, k, 6, e) = sqrt(dot(j, k, lx, e)) * weight
1168 coef%nx(j,k, 5, e) = -a(j, k, 1, e)
1169 coef%nx(j,k, 6, e) = a(j, k, lx, e)
1170 coef%ny(j,k, 5, e) = -b(j, k, 1, e)
1171 coef%ny(j,k, 6, e) = b(j, k, lx, e)
1172 coef%nz(j,k, 5, e) = -c(j, k, 1, e)
1173 coef%nz(j,k, 6, e) = c(j, k, lx, e)
1174 end do
1175 end do
1176 end do
1177
1178 ! Normalize
1179 do j = 1, size(coef%nz)
1180 len = sqrt(coef%nx(j,1,1,1)**2 + &
1181 coef%ny(j,1,1,1)**2 + coef%nz(j,1,1,1)**2)
1182 if (len .gt. neko_eps) then
1183 coef%nx(j,1,1,1) = coef%nx(j,1,1,1) / len
1184 coef%ny(j,1,1,1) = coef%ny(j,1,1,1) / len
1185 coef%nz(j,1,1,1) = coef%nz(j,1,1,1) / len
1186 end if
1187 end do
1188
1189 deallocate(dot)
1190 deallocate(c)
1191 deallocate(b)
1192 deallocate(a)
1194 if (neko_bcknd_device .eq. 1) then
1195 n = size(coef%area)
1196 call device_memcpy(coef%area, coef%area_d, n, &
1197 host_to_device, sync=.false.)
1198 call device_memcpy(coef%nx, coef%nx_d, n, &
1199 host_to_device, sync=.false.)
1200 call device_memcpy(coef%ny, coef%ny_d, n, &
1201 host_to_device, sync=.false.)
1202 call device_memcpy(coef%nz, coef%nz_d, n, &
1203 host_to_device, sync=.false.)
1204 end if
1205
1206 end subroutine coef_generate_area_and_normal
1207
1208
1210 type(coef_t), intent(inout) :: coef
1211 real(kind=rp) :: un(3), len, d
1212 integer :: lx, ly, lz, ntot, np
1213 integer :: i, j, k, pf, pe, n, nc, ncyc
1214
1215 ntot = coef%dof%size()
1216 np = coef%msh%periodic%size
1217 lx = coef%Xh%lx
1218 ly = coef%Xh%ly
1219 lz = coef%Xh%lz
1220 ncyc = coef%msh%periodic%size * coef%Xh%lx * coef%Xh%lx
1221 nc = 1
1222 coef%cyc_msk(0) = ncyc + 1
1223
1224 do n = 1, np
1225 pf = coef%msh%periodic%facet_el(n)%x(1)
1226 pe = coef%msh%periodic%facet_el(n)%x(2)
1227 do k = 1, lz
1228 do j = 1, ly
1229 do i = 1, lx
1230 if (index_is_on_facet(i, j, k, lx, ly, lz, pf)) then
1231 un = coef%get_normal(i, j, k, pe, pf)
1232 len = sqrt(un(1) * un(1) + un(2) * un(2))
1233 d = coef%dof%y(i, j, k, pe) * un(1) - coef%dof%x(i, j, k, pe) * un(2)
1234
1235 coef%cyc_msk(nc) = linear_index(i, j, k, pe, lx, ly, lz)
1236 coef%R11(nc) = un(1)/len * sign(1.0_rp, d)
1237 coef%R12(nc) = un(2)/len * sign(1.0_rp, d)
1238
1239 nc = nc + 1
1240 end if
1241 end do
1242 end do
1243 end do
1244 end do
1245
1246 if (neko_bcknd_device .eq. 1) then
1247 call device_memcpy(coef%cyc_msk, coef%cyc_msk_d, ncyc+1, host_to_device, sync=.false.)
1248 call device_memcpy(coef%R11, coef%R11_d, ncyc, host_to_device, sync=.false.)
1249 call device_memcpy(coef%R12, coef%R12_d, ncyc, host_to_device, sync=.false.)
1250 end if
1251
1252 end subroutine coef_generate_cyclic_bc
1253
1254
1255end module coefs
Map a Fortran array to a device (allocate and associate)
Definition device.F90:77
Copy data between host and device (or device and device)
Definition device.F90:71
Coefficients.
Definition coef.f90:34
subroutine coef_generate_geo(c)
Generate geometric data for the given mesh.
Definition coef.f90:868
subroutine coef_free(this)
Deallocate coefficients.
Definition coef.f90:390
pure real(kind=rp) function coef_get_area(this, i, j, k, e, facet)
Definition coef.f90:1044
pure real(kind=rp) function, dimension(3) coef_get_normal(this, i, j, k, e, facet)
Definition coef.f90:1023
subroutine coef_generate_dxyzdrst(c)
Definition coef.f90:710
subroutine coef_generate_area_and_normal(coef)
Generate facet area and surface normals.
Definition coef.f90:1061
subroutine coef_init_empty(this, xh, msh)
Initialize empty coefs for a space and a mesh.
Definition coef.f90:169
subroutine coef_init_all(this, gs_h)
Initialize coefficients.
Definition coef.f90:215
subroutine coef_generate_cyclic_bc(coef)
Definition coef.f90:1210
subroutine coef_generate_mass(c)
Generate mass matrix B for the given mesh and space.
Definition coef.f90:984
subroutine, public device_coef_generate_dxydrst(drdx_d, drdy_d, drdz_d, dsdx_d, dsdy_d, dsdz_d, dtdx_d, dtdy_d, dtdz_d, dxdr_d, dydr_d, dzdr_d, dxds_d, dyds_d, dzds_d, dxdt_d, dydt_d, dzdt_d, dx_d, dy_d, dz_d, x_d, y_d, z_d, jacinv_d, jac_d, lx, nel)
subroutine, public device_coef_generate_geo(g11_d, g12_d, g13_d, g22_d, g23_d, g33_d, drdx_d, drdy_d, drdz_d, dsdx_d, dsdy_d, dsdz_d, dtdx_d, dtdy_d, dtdz_d, jacinv_d, w3_d, nel, lx, gdim)
real(kind=rp) function, public device_glsum(a_d, n, strm)
Sum a vector of length n.
subroutine, public device_invcol1(a_d, n, strm)
Invert a vector .
subroutine, public device_rone(a_d, n, strm)
Set all elements to one.
Device abstraction, common interface for various accelerators.
Definition device.F90:34
subroutine, public device_free(x_d)
Deallocate memory on the device.
Definition device.F90:219
integer, parameter, public device_to_host
Definition device.F90:47
Defines a mapping of the degrees of freedom.
Definition dofmap.f90:35
Gather-scatter.
Defines Gather-scatter operations.
Definition gs_ops.f90:34
integer, parameter, public gs_op_add
Definition gs_ops.f90:36
Definition math.f90:60
subroutine, public invers2(a, b, n)
Compute inverted vector .
Definition math.f90:639
subroutine, public subcol3(a, b, c, n)
Returns .
Definition math.f90:881
subroutine, public rone(a, n)
Set all elements to one.
Definition math.f90:238
real(kind=rp) function, public glsum(a, n)
Sum a vector of length n.
Definition math.f90:499
subroutine, public addcol3(a, b, c, n)
Returns .
Definition math.f90:958
subroutine, public invcol1(a, n)
Invert a vector .
Definition math.f90:614
subroutine, public chsign(a, n)
Change sign of vector .
Definition math.f90:579
subroutine, public copy(a, b, n)
Copy a vector .
Definition math.f90:249
real(kind=rp), parameter, public neko_eps
Machine epsilon .
Definition math.f90:69
subroutine, public rzero(a, n)
Zero a real vector.
Definition math.f90:205
Defines a mesh.
Definition mesh.f90:34
Wrapper for all matrix-matrix product implementations.
subroutine, public mxm(a, n1, b, n2, c, n3)
Compute matrix-matrix product for contiguously packed matrices A,B, and C.
Build configurations.
integer, parameter neko_bcknd_device
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Defines a function space.
Definition space.f90:34
Utilities.
Definition utils.f90:35
pure logical function, public index_is_on_facet(i, j, k, lx, ly, lz, facet)
Definition utils.f90:262
pure integer function, public linear_index(i, j, k, l, lx, ly, lz)
Compute the address of a (i,j,k,l) array with sizes (1:lx, 1:ly, 1:lz, :)
Definition utils.f90:237
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition coef.f90:56
The function space for the SEM solution fields.
Definition space.f90:63