Neko 1.99.3
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
coef.f90
Go to the documentation of this file.
1! Copyright (c) 2020-2024, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
34module coefs
35 use gather_scatter, only : gs_t
36 use gs_ops, only : gs_op_add
38 use num_types, only : rp
39 use dofmap, only : dofmap_t
40 use space, only: space_t
41 use math, only : rone, invcol1, addcol3, subcol3, copy, &
43 use mesh, only : mesh_t
48 use mxm_wrapper, only : mxm
49 use device
52 use, intrinsic :: iso_c_binding
53 implicit none
54 private
55
58 type, public :: coef_t
60 real(kind=rp), allocatable :: g11(:,:,:,:)
62 real(kind=rp), allocatable :: g22(:,:,:,:)
64 real(kind=rp), allocatable :: g33(:,:,:,:)
66 real(kind=rp), allocatable :: g12(:,:,:,:)
68 real(kind=rp), allocatable :: g13(:,:,:,:)
70 real(kind=rp), allocatable :: g23(:,:,:,:)
71
72 real(kind=rp), allocatable :: mult(:,:,:,:)
77 real(kind=rp), allocatable :: dxdr(:,:,:,:), dydr(:,:,:,:), dzdr(:,:,:,:)
78 real(kind=rp), allocatable :: dxds(:,:,:,:), dyds(:,:,:,:), dzds(:,:,:,:)
79 real(kind=rp), allocatable :: dxdt(:,:,:,:), dydt(:,:,:,:), dzdt(:,:,:,:)
83 real(kind=rp), allocatable :: drdx(:,:,:,:), drdy(:,:,:,:), drdz(:,:,:,:)
84 real(kind=rp), allocatable :: dsdx(:,:,:,:), dsdy(:,:,:,:), dsdz(:,:,:,:)
85 real(kind=rp), allocatable :: dtdx(:,:,:,:), dtdy(:,:,:,:), dtdz(:,:,:,:)
86
87 real(kind=rp), allocatable :: h1(:,:,:,:)
88 real(kind=rp), allocatable :: h2(:,:,:,:)
89 logical :: ifh2
90
91 real(kind=rp), allocatable :: jac(:,:,:,:)
92 real(kind=rp), allocatable :: jacinv(:,:,:,:)
93 real(kind=rp), allocatable :: b(:,:,:,:)
94 real(kind=rp), allocatable :: binv(:,:,:,:)
95
96 real(kind=rp), allocatable :: area(:,:,:,:)
97 real(kind=rp), allocatable :: nx(:,:,:,:)
98 real(kind=rp), allocatable :: ny(:,:,:,:)
99 real(kind=rp), allocatable :: nz(:,:,:,:)
100 logical :: cyclic = .false.
101 integer, allocatable :: cyc_msk(:)
102 real(kind=rp), allocatable :: r11(:)
103 real(kind=rp), allocatable :: r12(:)
105
106 real(kind=rp) :: volume
107
108 type(space_t), pointer :: xh => null()
109 type(mesh_t), pointer :: msh => null()
110 type(dofmap_t), pointer :: dof => null()
111 type(gs_t), pointer :: gs_h=> null()
112
113 !
114 ! Device pointers (if present)
115 !
116
117 type(c_ptr) :: g11_d = c_null_ptr
118 type(c_ptr) :: g22_d = c_null_ptr
119 type(c_ptr) :: g33_d = c_null_ptr
120 type(c_ptr) :: g12_d = c_null_ptr
121 type(c_ptr) :: g13_d = c_null_ptr
122 type(c_ptr) :: g23_d = c_null_ptr
123 type(c_ptr) :: dxdr_d = c_null_ptr
124 type(c_ptr) :: dydr_d = c_null_ptr
125 type(c_ptr) :: dzdr_d = c_null_ptr
126 type(c_ptr) :: dxds_d = c_null_ptr
127 type(c_ptr) :: dyds_d = c_null_ptr
128 type(c_ptr) :: dzds_d = c_null_ptr
129 type(c_ptr) :: dxdt_d = c_null_ptr
130 type(c_ptr) :: dydt_d = c_null_ptr
131 type(c_ptr) :: dzdt_d = c_null_ptr
132 type(c_ptr) :: drdx_d = c_null_ptr
133 type(c_ptr) :: drdy_d = c_null_ptr
134 type(c_ptr) :: drdz_d = c_null_ptr
135 type(c_ptr) :: dsdx_d = c_null_ptr
136 type(c_ptr) :: dsdy_d = c_null_ptr
137 type(c_ptr) :: dsdz_d = c_null_ptr
138 type(c_ptr) :: dtdx_d = c_null_ptr
139 type(c_ptr) :: dtdy_d = c_null_ptr
140 type(c_ptr) :: dtdz_d = c_null_ptr
141 type(c_ptr) :: mult_d = c_null_ptr
142 type(c_ptr) :: h1_d = c_null_ptr
143 type(c_ptr) :: h2_d = c_null_ptr
144 type(c_ptr) :: jac_d = c_null_ptr
145 type(c_ptr) :: jacinv_d = c_null_ptr
146 type(c_ptr) :: b_d = c_null_ptr
147 type(c_ptr) :: binv_d = c_null_ptr
148 type(c_ptr) :: area_d = c_null_ptr
149 type(c_ptr) :: nx_d = c_null_ptr
150 type(c_ptr) :: ny_d = c_null_ptr
151 type(c_ptr) :: nz_d = c_null_ptr
152 type(c_ptr) :: cyc_msk_d = c_null_ptr
153 type(c_ptr) :: r11_d = c_null_ptr
154 type(c_ptr) :: r12_d = c_null_ptr
155
156
157
158 contains
159 procedure, private, pass(this) :: init_empty => coef_init_empty
160 procedure, private, pass(this) :: init_all => coef_init_all
161 procedure, pass(this) :: free => coef_free
162 procedure, pass(this) :: get_normal => coef_get_normal
163 procedure, pass(this) :: get_area => coef_get_area
164 procedure, pass(this) :: check_cyclic => coef_check_cyclic
165 generic :: init => init_empty, init_all
166 end type coef_t
167
168contains
169
171 subroutine coef_init_empty(this, Xh, msh)
172 class(coef_t), intent(inout) :: this
173 type(space_t), intent(inout), target :: Xh
174 type(mesh_t), intent(inout), target :: msh
175 integer :: n
176 call this%free()
177 this%msh => msh
178 this%Xh => xh
179
180 allocate(this%drdx(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
181 allocate(this%dsdx(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
182 allocate(this%dtdx(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
183
184 allocate(this%drdy(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
185 allocate(this%dsdy(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
186 allocate(this%dtdy(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
187
188 allocate(this%drdz(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
189 allocate(this%dsdz(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
190 allocate(this%dtdz(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
191
192
193 !
194 ! Setup device memory (if present)
195 !
196
197 n = this%Xh%lx * this%Xh%ly * this%Xh%lz * this%msh%nelv
198 if (neko_bcknd_device .eq. 1) then
199
200 call device_map(this%drdx, this%drdx_d, n)
201 call device_map(this%drdy, this%drdy_d, n)
202 call device_map(this%drdz, this%drdz_d, n)
203
204 call device_map(this%dsdx, this%dsdx_d, n)
205 call device_map(this%dsdy, this%dsdy_d, n)
206 call device_map(this%dsdz, this%dsdz_d, n)
207
208 call device_map(this%dtdx, this%dtdx_d, n)
209 call device_map(this%dtdy, this%dtdy_d, n)
210 call device_map(this%dtdz, this%dtdz_d, n)
211
212 end if
213
214 end subroutine coef_init_empty
215
217 subroutine coef_init_all(this, gs_h)
218 class(coef_t), intent(inout) :: this
219 type(gs_t), intent(inout), target :: gs_h
220 integer :: n, m, ncyc
221 call this%free()
222
223 this%msh => gs_h%dofmap%msh
224 this%Xh => gs_h%dofmap%Xh
225 this%dof => gs_h%dofmap
226 this%gs_h => gs_h
227
228 !
229 ! Allocate arrays for geometric data
230 !
232 allocate(this%G11(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
233 allocate(this%G22(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
234 allocate(this%G33(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
235 allocate(this%G12(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
236 allocate(this%G13(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
237 allocate(this%G23(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
238
239 allocate(this%dxdr(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
240 allocate(this%dxds(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
241 allocate(this%dxdt(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
242
243 allocate(this%dydr(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
244 allocate(this%dyds(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
245 allocate(this%dydt(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
246
247 allocate(this%dzdr(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
248 allocate(this%dzds(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
249 allocate(this%dzdt(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
250
251 allocate(this%drdx(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
252 allocate(this%dsdx(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
253 allocate(this%dtdx(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
254
255 allocate(this%drdy(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
256 allocate(this%dsdy(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
257 allocate(this%dtdy(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
258
259 allocate(this%drdz(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
260 allocate(this%dsdz(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
261 allocate(this%dtdz(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
262
263 allocate(this%jac(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
264 allocate(this%jacinv(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
265
266 allocate(this%area(this%Xh%lx, this%Xh%ly, 6, this%msh%nelv))
267 allocate(this%nx(this%Xh%lx, this%Xh%ly, 6, this%msh%nelv))
268 allocate(this%ny(this%Xh%lx, this%Xh%ly, 6, this%msh%nelv))
269 allocate(this%nz(this%Xh%lx, this%Xh%ly, 6, this%msh%nelv))
270
271 allocate(this%B(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
272 allocate(this%Binv(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
273
274 allocate(this%h1(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
275 allocate(this%h2(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
276
277 allocate(this%mult(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
278
279
280 !
281 ! Setup device memory (if present)
282 !
283
284 n = this%Xh%lx * this%Xh%ly * this%Xh%lz * this%msh%nelv
285 if (neko_bcknd_device .eq. 1) then
286 call device_map(this%G11, this%G11_d, n)
287 call device_map(this%G22, this%G22_d, n)
288 call device_map(this%G33, this%G33_d, n)
289 call device_map(this%G12, this%G12_d, n)
290 call device_map(this%G13, this%G13_d, n)
291 call device_map(this%G23, this%G23_d, n)
292
293 call device_map(this%dxdr, this%dxdr_d, n)
294 call device_map(this%dydr, this%dydr_d, n)
295 call device_map(this%dzdr, this%dzdr_d, n)
296
297 call device_map(this%dxds, this%dxds_d, n)
298 call device_map(this%dyds, this%dyds_d, n)
299 call device_map(this%dzds, this%dzds_d, n)
300
301 call device_map(this%dxdt, this%dxdt_d, n)
302 call device_map(this%dydt, this%dydt_d, n)
303 call device_map(this%dzdt, this%dzdt_d, n)
304
305 call device_map(this%drdx, this%drdx_d, n)
306 call device_map(this%drdy, this%drdy_d, n)
307 call device_map(this%drdz, this%drdz_d, n)
308
309 call device_map(this%dsdx, this%dsdx_d, n)
310 call device_map(this%dsdy, this%dsdy_d, n)
311 call device_map(this%dsdz, this%dsdz_d, n)
312
313 call device_map(this%dtdx, this%dtdx_d, n)
314 call device_map(this%dtdy, this%dtdy_d, n)
315 call device_map(this%dtdz, this%dtdz_d, n)
316
317 call device_map(this%mult, this%mult_d, n)
318 call device_map(this%h1, this%h1_d, n)
319 call device_map(this%h2, this%h2_d, n)
320
321 call device_map(this%jac, this%jac_d, n)
322 call device_map(this%jacinv, this%jacinv_d, n)
323 call device_map(this%B, this%B_d, n)
324 call device_map(this%Binv, this%Binv_d, n)
325
326 m = this%Xh%lx * this%Xh%ly * 6 * this%msh%nelv
327
328 call device_map(this%area, this%area_d, m)
329 call device_map(this%nx, this%nx_d, m)
330 call device_map(this%ny, this%ny_d, m)
331 call device_map(this%nz, this%nz_d, m)
332
333 end if
334
335 call coef_generate_dxyzdrst(this)
336
337 call coef_generate_geo(this)
338
340
341 call coef_generate_mass(this)
342
343
344 ! This is a placeholder, just for now
345 ! We can probably find a prettier solution
346 if (neko_bcknd_device .eq. 1) then
347 call device_rone(this%h1_d, n)
348 call device_rone(this%h2_d, n)
349 call device_memcpy(this%h1, this%h1_d, n, &
350 device_to_host, sync=.false.)
351 call device_memcpy(this%h2, this%h2_d, n, &
352 device_to_host, sync=.false.)
353 else
354 call rone(this%h1,n)
355 call rone(this%h2,n)
356 end if
357
358 this%ifh2 = .false.
359
360 !
361 ! Set up multiplicity
362 !
363 if (neko_bcknd_device .eq. 1) then
364 call device_rone(this%mult_d, n)
365 else
366 call rone(this%mult, n)
367 end if
368
369 call gs_h%op(this%mult, n, gs_op_add)
370
371 if (neko_bcknd_device .eq. 1) then
372 call device_invcol1(this%mult_d, n)
373 call device_memcpy(this%mult, this%mult_d, n, &
374 device_to_host, sync=.true.)
375 else
376 call invcol1(this%mult, n)
377 end if
378
379 ncyc = this%msh%periodic%size * this%Xh%lx * this%Xh%lx
380 allocate(this%cyc_msk(0:ncyc))
381 this%cyc_msk(0) = ncyc + 1
382 if (ncyc .gt. 0) then
383 allocate(this%R11(ncyc))
384 allocate(this%R12(ncyc))
385
387 call rone(this%R11, ncyc)
388 call rzero(this%R12, ncyc)
389
390 if (neko_bcknd_device .eq. 1) then
391 call device_map(this%cyc_msk, this%cyc_msk_d, ncyc+1)
392 call device_map(this%R11, this%R11_d, ncyc)
393 call device_map(this%R12, this%R12_d, ncyc)
394
395 call device_memcpy(this%cyc_msk, this%cyc_msk_d, ncyc+1, host_to_device, sync=.false.)
396 call device_memcpy(this%R11, this%R11_d, ncyc, host_to_device, sync=.false.)
397 call device_memcpy(this%R12, this%R12_d, ncyc, host_to_device, sync=.false.)
398 end if
399
400 end if
401 end subroutine coef_init_all
402
404 subroutine coef_free(this)
405 class(coef_t), intent(inout) :: this
406
407 if (allocated(this%G11)) then
408 deallocate(this%G11)
409 end if
410
411 if (allocated(this%G22)) then
412 deallocate(this%G22)
413 end if
414
415 if (allocated(this%G33)) then
416 deallocate(this%G33)
417 end if
418
419 if (allocated(this%G12)) then
420 deallocate(this%G12)
421 end if
422
423 if (allocated(this%G13)) then
424 deallocate(this%G13)
425 end if
426
427 if (allocated(this%G23)) then
428 deallocate(this%G23)
429 end if
430
431 if (allocated(this%mult)) then
432 deallocate(this%mult)
433 end if
434
435 if (allocated(this%B)) then
436 deallocate(this%B)
437 end if
438
439 if (allocated(this%Binv)) then
440 deallocate(this%Binv)
441 end if
442
443 if(allocated(this%dxdr)) then
444 deallocate(this%dxdr)
445 end if
446
447 if(allocated(this%dxds)) then
448 deallocate(this%dxds)
449 end if
450
451 if(allocated(this%dxdt)) then
452 deallocate(this%dxdt)
453 end if
454
455 if(allocated(this%dydr)) then
456 deallocate(this%dydr)
457 end if
458
459 if(allocated(this%dyds)) then
460 deallocate(this%dyds)
461 end if
462
463 if(allocated(this%dydt)) then
464 deallocate(this%dydt)
465 end if
466
467 if(allocated(this%dzdr)) then
468 deallocate(this%dzdr)
469 end if
470
471 if(allocated(this%dzds)) then
472 deallocate(this%dzds)
473 end if
474
475 if(allocated(this%dzdt)) then
476 deallocate(this%dzdt)
477 end if
478
479 if(allocated(this%drdx)) then
480 deallocate(this%drdx)
481 end if
482
483 if(allocated(this%dsdx)) then
484 deallocate(this%dsdx)
485 end if
486
487 if(allocated(this%dtdx)) then
488 deallocate(this%dtdx)
489 end if
490
491 if(allocated(this%drdy)) then
492 deallocate(this%drdy)
493 end if
494
495 if(allocated(this%dsdy)) then
496 deallocate(this%dsdy)
497 end if
498
499 if(allocated(this%dtdy)) then
500 deallocate(this%dtdy)
501 end if
502
503 if(allocated(this%drdz)) then
504 deallocate(this%drdz)
505 end if
506
507 if(allocated(this%dsdz)) then
508 deallocate(this%dsdz)
509 end if
510
511 if(allocated(this%dtdz)) then
512 deallocate(this%dtdz)
513 end if
514
515 if(allocated(this%jac)) then
516 deallocate(this%jac)
517 end if
518
519 if(allocated(this%jacinv)) then
520 deallocate(this%jacinv)
521 end if
522
523 if(allocated(this%h1)) then
524 deallocate(this%h1)
525 end if
526
527 if(allocated(this%h2)) then
528 deallocate(this%h2)
529 end if
530
531 if (allocated(this%area)) then
532 deallocate(this%area)
533 end if
534
535 if (allocated(this%nx)) then
536 deallocate(this%nx)
537 end if
538
539 if (allocated(this%ny)) then
540 deallocate(this%ny)
541 end if
542
543 if (allocated(this%nz)) then
544 deallocate(this%nz)
545 end if
546
547 if (allocated(this%cyc_msk)) then
548 deallocate(this%cyc_msk)
549 end if
550
551 if (allocated(this%R11)) then
552 deallocate(this%R11)
553 end if
554
555 if (allocated(this%R12)) then
556 deallocate(this%R12)
557 end if
558
559
560 nullify(this%msh)
561 nullify(this%Xh)
562 nullify(this%dof)
563 nullify(this%gs_h)
564
565 !
566 ! Cleanup the device (if present)
567 !
568
569 if (c_associated(this%G11_d)) then
570 call device_free(this%G11_d)
571 end if
572
573 if (c_associated(this%G22_d)) then
574 call device_free(this%G22_d)
575 end if
576
577 if (c_associated(this%G33_d)) then
578 call device_free(this%G33_d)
579 end if
580
581 if (c_associated(this%G12_d)) then
582 call device_free(this%G12_d)
583 end if
584
585 if (c_associated(this%G13_d)) then
586 call device_free(this%G13_d)
587 end if
588
589 if (c_associated(this%G23_d)) then
590 call device_free(this%G23_d)
591 end if
592
593 if (c_associated(this%dxdr_d)) then
594 call device_free(this%dxdr_d)
595 end if
596
597 if (c_associated(this%dydr_d)) then
598 call device_free(this%dydr_d)
599 end if
600
601 if (c_associated(this%dzdr_d)) then
602 call device_free(this%dzdr_d)
603 end if
604
605 if (c_associated(this%dxds_d)) then
606 call device_free(this%dxds_d)
607 end if
608
609 if (c_associated(this%dyds_d)) then
610 call device_free(this%dyds_d)
611 end if
612
613 if (c_associated(this%dzds_d)) then
614 call device_free(this%dzds_d)
615 end if
616
617 if (c_associated(this%dxdt_d)) then
618 call device_free(this%dxdt_d)
619 end if
620
621 if (c_associated(this%dydt_d)) then
622 call device_free(this%dydt_d)
623 end if
624
625 if (c_associated(this%dzdt_d)) then
626 call device_free(this%dzdt_d)
627 end if
628
629 if (c_associated(this%drdx_d)) then
630 call device_free(this%drdx_d)
631 end if
632
633 if (c_associated(this%drdy_d)) then
634 call device_free(this%drdy_d)
635 end if
636
637 if (c_associated(this%drdz_d)) then
638 call device_free(this%drdz_d)
639 end if
640
641 if (c_associated(this%dsdx_d)) then
642 call device_free(this%dsdx_d)
643 end if
644
645 if (c_associated(this%dsdy_d)) then
646 call device_free(this%dsdy_d)
647 end if
648
649 if (c_associated(this%dsdz_d)) then
650 call device_free(this%dsdz_d)
651 end if
652
653 if (c_associated(this%dtdx_d)) then
654 call device_free(this%dtdx_d)
655 end if
656
657 if (c_associated(this%dtdy_d)) then
658 call device_free(this%dtdy_d)
659 end if
660
661 if (c_associated(this%dtdz_d)) then
662 call device_free(this%dtdz_d)
663 end if
664
665 if (c_associated(this%mult_d)) then
666 call device_free(this%mult_d)
667 end if
668
669 if (c_associated(this%h1_d)) then
670 call device_free(this%h1_d)
671 end if
672
673 if (c_associated(this%h2_d)) then
674 call device_free(this%h2_d)
675 end if
676
677 if (c_associated(this%jac_d)) then
678 call device_free(this%jac_d)
679 end if
680
681 if (c_associated(this%jacinv_d)) then
682 call device_free(this%jacinv_d)
683 end if
684
685 if (c_associated(this%B_d)) then
686 call device_free(this%B_d)
687 end if
688
689 if (c_associated(this%Binv_d)) then
690 call device_free(this%Binv_d)
691 end if
692
693 if (c_associated(this%area_d)) then
694 call device_free(this%area_d)
695 end if
696
697 if (c_associated(this%nx_d)) then
698 call device_free(this%nx_d)
699 end if
700
701 if (c_associated(this%ny_d)) then
702 call device_free(this%ny_d)
703 end if
704
705 if (c_associated(this%nz_d)) then
706 call device_free(this%nz_d)
707 end if
708
709 if (c_associated(this%cyc_msk_d)) then
710 call device_free(this%cyc_msk_d)
711 end if
712
713 if (c_associated(this%R11_d)) then
714 call device_free(this%R11_d)
715 end if
716
717 if (c_associated(this%R12_d)) then
718 call device_free(this%R12_d)
719 end if
720
721
722 end subroutine coef_free
723
725 type(coef_t), intent(inout) :: c
726 integer :: e,i,lxy,lyz,ntot
727
728 lxy=c%Xh%lx*c%Xh%ly
729 lyz=c%Xh%ly*c%Xh%lz
730 ntot = c%dof%size()
731
732 associate(drdx => c%drdx, drdy => c%drdy, drdz => c%drdz, &
733 dsdx => c%dsdx, dsdy => c%dsdy, dsdz => c%dsdz, &
734 dtdx => c%dtdx, dtdy => c%dtdy, dtdz => c%dtdz, &
735 dxdr => c%dxdr, dydr => c%dydr, dzdr => c%dzdr, &
736 dxds => c%dxds, dyds => c%dyds, dzds => c%dzds, &
737 dxdt => c%dxdt, dydt => c%dydt, dzdt => c%dzdt, &
738 dx => c%Xh%dx, dy => c%Xh%dy, dz => c%Xh%dz, &
739 x => c%dof%x, y => c%dof%y, z => c%dof%z, &
740 lx => c%Xh%lx, ly => c%Xh%ly, lz => c%Xh%lz, &
741 dyt => c%Xh%dyt, dzt => c%Xh%dzt, &
742 jacinv => c%jacinv, jac => c%jac)
743
744 if (neko_bcknd_device .eq. 1) then
745
746 call device_coef_generate_dxydrst(c%drdx_d, c%drdy_d, c%drdz_d, &
747 c%dsdx_d, c%dsdy_d, c%dsdz_d, c%dtdx_d, c%dtdy_d, c%dtdz_d, &
748 c%dxdr_d, c%dydr_d, c%dzdr_d, c%dxds_d, c%dyds_d, c%dzds_d, &
749 c%dxdt_d, c%dydt_d, c%dzdt_d, c%Xh%dx_d, c%Xh%dy_d, c%Xh%dz_d, &
750 c%dof%x_d, c%dof%y_d, c%dof%z_d, c%jacinv_d, c%jac_d, &
751 c%Xh%lx, c%msh%nelv)
752
753 call device_memcpy(dxdr, c%dxdr_d, ntot, device_to_host, sync=.false.)
754 call device_memcpy(dydr, c%dydr_d, ntot, device_to_host, sync=.false.)
755 call device_memcpy(dzdr, c%dzdr_d, ntot, device_to_host, sync=.false.)
756 call device_memcpy(dxds, c%dxds_d, ntot, device_to_host, sync=.false.)
757 call device_memcpy(dyds, c%dyds_d, ntot, device_to_host, sync=.false.)
758 call device_memcpy(dzds, c%dzds_d, ntot, device_to_host, sync=.false.)
759 call device_memcpy(dxdt, c%dxdt_d, ntot, device_to_host, sync=.false.)
760 call device_memcpy(dydt, c%dydt_d, ntot, device_to_host, sync=.false.)
761 call device_memcpy(dzdt, c%dzdt_d, ntot, device_to_host, sync=.false.)
762 call device_memcpy(drdx, c%drdx_d, ntot, device_to_host, sync=.false.)
763 call device_memcpy(drdy, c%drdy_d, ntot, device_to_host, sync=.false.)
764 call device_memcpy(drdz, c%drdz_d, ntot, device_to_host, sync=.false.)
765 call device_memcpy(dsdx, c%dsdx_d, ntot, device_to_host, sync=.false.)
766 call device_memcpy(dsdy, c%dsdy_d, ntot, device_to_host, sync=.false.)
767 call device_memcpy(dsdz, c%dsdz_d, ntot, device_to_host, sync=.false.)
768 call device_memcpy(dtdx, c%dtdx_d, ntot, device_to_host, sync=.false.)
769 call device_memcpy(dtdy, c%dtdy_d, ntot, device_to_host, sync=.false.)
770 call device_memcpy(dtdz, c%dtdz_d, ntot, device_to_host, sync=.false.)
771 call device_memcpy(jac, c%jac_d, ntot, device_to_host, sync=.false.)
772 call device_memcpy(jacinv, c%jacinv_d, ntot, &
773 device_to_host, sync=.true.)
774
775 else
776 do e = 1, c%msh%nelv
777 call mxm(dx, lx, x(1,1,1,e), lx, dxdr(1,1,1,e), lyz)
778 call mxm(dx, lx, y(1,1,1,e), lx, dydr(1,1,1,e), lyz)
779 call mxm(dx, lx, z(1,1,1,e), lx, dzdr(1,1,1,e), lyz)
780
781 do i = 1, lz
782 call mxm(x(1,1,i,e), lx, dyt, ly, dxds(1,1,i,e), ly)
783 call mxm(y(1,1,i,e), lx, dyt, ly, dyds(1,1,i,e), ly)
784 call mxm(z(1,1,i,e), lx, dyt, ly, dzds(1,1,i,e), ly)
785 end do
786
787 ! We actually take 2d into account, wow, need to do that for the rest.
788 if(c%msh%gdim .eq. 3) then
789 call mxm(x(1,1,1,e), lxy, dzt, lz, dxdt(1,1,1,e), lz)
790 call mxm(y(1,1,1,e), lxy, dzt, lz, dydt(1,1,1,e), lz)
791 call mxm(z(1,1,1,e), lxy, dzt, lz, dzdt(1,1,1,e), lz)
792 else
793 call rzero(dxdt(1,1,1,e), lxy)
794 call rzero(dydt(1,1,1,e), lxy)
795 call rone(dzdt(1,1,1,e), lxy)
796 end if
797 end do
798
799 if (c%msh%gdim .eq. 2) then
800 call rzero (jac, ntot)
801 call addcol3 (jac, dxdr, dyds, ntot)
802 call subcol3 (jac, dxds, dydr, ntot)
803 call copy (drdx, dyds, ntot)
804 call copy (drdy, dxds, ntot)
805 call chsign (drdy, ntot)
806 call copy (dsdx, dydr, ntot)
807 call chsign (dsdx, ntot)
808 call copy (dsdy, dxdr, ntot)
809 call rzero (drdz, ntot)
810 call rzero (dsdz, ntot)
811 call rone (dtdz, ntot)
812 else
813
814 do i = 1, ntot
815 c%jac(i, 1, 1, 1) = 0.0_rp
816 end do
817
818 do i = 1, ntot
819 c%jac(i, 1, 1, 1) = c%jac(i, 1, 1, 1) + ( c%dxdr(i, 1, 1, 1) &
820 * c%dyds(i, 1, 1, 1) * c%dzdt(i, 1, 1, 1) )
821
822 c%jac(i, 1, 1, 1) = c%jac(i, 1, 1, 1) + ( c%dxdt(i, 1, 1, 1) &
823 * c%dydr(i, 1, 1, 1) * c%dzds(i, 1, 1, 1) )
824
825 c%jac(i, 1, 1, 1) = c%jac(i, 1, 1, 1) + ( c%dxds(i, 1, 1, 1) &
826 * c%dydt(i, 1, 1, 1) * c%dzdr(i, 1, 1, 1) )
827 end do
828
829 do i = 1, ntot
830 c%jac(i, 1, 1, 1) = c%jac(i, 1, 1, 1) - ( c%dxdr(i, 1, 1, 1) &
831 * c%dydt(i, 1, 1, 1) * c%dzds(i, 1, 1, 1) )
832
833 c%jac(i, 1, 1, 1) = c%jac(i, 1, 1, 1) - ( c%dxds(i, 1, 1, 1) &
834 * c%dydr(i, 1, 1, 1) * c%dzdt(i, 1, 1, 1) )
835
836 c%jac(i, 1, 1, 1) = c%jac(i, 1, 1, 1) - ( c%dxdt(i, 1, 1, 1) &
837 * c%dyds(i, 1, 1, 1) * c%dzdr(i, 1, 1, 1) )
838 end do
839
840 do i = 1, ntot
841 c%drdx(i, 1, 1, 1) = c%dyds(i, 1, 1, 1) * c%dzdt(i, 1, 1, 1) &
842 - c%dydt(i, 1, 1, 1) * c%dzds(i, 1, 1, 1)
843
844 c%drdy(i, 1, 1, 1) = c%dxdt(i, 1, 1, 1) * c%dzds(i, 1, 1, 1) &
845 - c%dxds(i, 1, 1, 1) * c%dzdt(i, 1, 1, 1)
846
847 c%drdz(i, 1, 1, 1) = c%dxds(i, 1, 1, 1) * c%dydt(i, 1, 1, 1) &
848 - c%dxdt(i, 1, 1, 1) * c%dyds(i, 1, 1, 1)
849 end do
850
851 do i = 1, ntot
852 c%dsdx(i, 1, 1, 1) = c%dydt(i, 1, 1, 1) * c%dzdr(i, 1, 1, 1) &
853 - c%dydr(i, 1, 1, 1) * c%dzdt(i, 1, 1, 1)
854
855 c%dsdy(i, 1, 1, 1) = c%dxdr(i, 1, 1, 1) * c%dzdt(i, 1, 1, 1) &
856 - c%dxdt(i, 1, 1, 1) * c%dzdr(i, 1, 1, 1)
857
858 c%dsdz(i, 1, 1, 1) = c%dxdt(i, 1, 1, 1) * c%dydr(i, 1, 1, 1) &
859 - c%dxdr(i, 1, 1, 1) * c%dydt(i, 1, 1, 1)
860 end do
861
862 do i = 1, ntot
863 c%dtdx(i, 1, 1, 1) = c%dydr(i, 1, 1, 1) * c%dzds(i, 1, 1, 1) &
864 - c%dyds(i, 1, 1, 1) * c%dzdr(i, 1, 1, 1)
865
866 c%dtdy(i, 1, 1, 1) = c%dxds(i, 1, 1, 1) * c%dzdr(i, 1, 1, 1) &
867 - c%dxdr(i, 1, 1, 1) * c%dzds(i, 1, 1, 1)
868
869 c%dtdz(i, 1, 1, 1) = c%dxdr(i, 1, 1, 1) * c%dyds(i, 1, 1, 1) &
870 - c%dxds(i, 1, 1, 1) * c%dydr(i, 1, 1, 1)
871 end do
872
873 end if
874 call invers2(jacinv, jac, ntot)
875 end if
876 end associate
877
878 end subroutine coef_generate_dxyzdrst
879
882 subroutine coef_generate_geo(c)
883 type(coef_t), intent(inout) :: c
884 integer :: e, i, lxyz, ntot
885
886 lxyz = c%Xh%lx * c%Xh%ly * c%Xh%lz
887 ntot = c%dof%size()
888
889 if (neko_bcknd_device .eq. 1) then
890
891 call device_coef_generate_geo(c%G11_d, c%G12_d, c%G13_d, &
892 c%G22_d, c%G23_d, c%G33_d, &
893 c%drdx_d, c%drdy_d, c%drdz_d, &
894 c%dsdx_d, c%dsdy_d, c%dsdz_d, &
895 c%dtdx_d, c%dtdy_d, c%dtdz_d, &
896 c%jacinv_d, c%Xh%w3_d, c%msh%nelv, &
897 c%Xh%lx, c%msh%gdim)
898
899 call device_memcpy(c%G11, c%G11_d, ntot, device_to_host, sync=.false.)
900 call device_memcpy(c%G22, c%G22_d, ntot, device_to_host, sync=.false.)
901 call device_memcpy(c%G33, c%G33_d, ntot, device_to_host, sync=.false.)
902 call device_memcpy(c%G12, c%G12_d, ntot, device_to_host, sync=.false.)
903 call device_memcpy(c%G13, c%G13_d, ntot, device_to_host, sync=.false.)
904 call device_memcpy(c%G23, c%G23_d, ntot, device_to_host, sync=.true.)
905
906 else
907 if(c%msh%gdim .eq. 2) then
908
909 do i = 1, ntot
910 c%G11(i, 1, 1, 1) = c%drdx(i, 1, 1, 1) * c%drdx(i, 1, 1, 1) &
911 + c%drdy(i, 1, 1, 1) * c%drdy(i, 1, 1, 1)
912
913 c%G22(i, 1, 1, 1) = c%dsdx(i, 1, 1, 1) * c%dsdx(i, 1, 1, 1) &
914 + c%dsdy(i, 1, 1, 1) * c%dsdy(i, 1, 1, 1)
915
916 c%G12(i, 1, 1, 1) = c%drdx(i, 1, 1, 1) * c%dsdx(i, 1, 1, 1) &
917 + c%drdy(i, 1, 1, 1) * c%dsdy(i, 1, 1, 1)
918 end do
919
920 do i = 1, ntot
921 c%G11(i, 1, 1, 1) = c%G11(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
922 c%G22(i, 1, 1, 1) = c%G22(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
923 c%G12(i, 1, 1, 1) = c%G12(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
924 c%G33(i, 1, 1, 1) = 0.0_rp
925 c%G13(i, 1, 1, 1) = 0.0_rp
926 c%G23(i, 1, 1, 1) = 0.0_rp
927 end do
928
929 do concurrent(e = 1:c%msh%nelv)
930 do concurrent(i = 1:lxyz)
931 c%G11(i,1,1,e) = c%G11(i,1,1,e) * c%Xh%w3(i,1,1)
932 c%G22(i,1,1,e) = c%G22(i,1,1,e) * c%Xh%w3(i,1,1)
933 c%G12(i,1,1,e) = c%G12(i,1,1,e) * c%Xh%w3(i,1,1)
934 end do
935 end do
936
937 else
938
939 do i = 1, ntot
940 c%G11(i, 1, 1, 1) = c%drdx(i, 1, 1, 1) * c%drdx(i, 1, 1, 1) &
941 + c%drdy(i, 1, 1, 1) * c%drdy(i, 1, 1, 1) &
942 + c%drdz(i, 1, 1, 1) * c%drdz(i, 1, 1, 1)
943
944 c%G22(i, 1, 1, 1) = c%dsdx(i, 1, 1, 1) * c%dsdx(i, 1, 1, 1) &
945 + c%dsdy(i, 1, 1, 1) * c%dsdy(i, 1, 1, 1) &
946 + c%dsdz(i, 1, 1, 1) * c%dsdz(i, 1, 1, 1)
947
948 c%G33(i, 1, 1, 1) = c%dtdx(i, 1, 1, 1) * c%dtdx(i, 1, 1, 1) &
949 + c%dtdy(i, 1, 1, 1) * c%dtdy(i, 1, 1, 1) &
950 + c%dtdz(i, 1, 1, 1) * c%dtdz(i, 1, 1, 1)
951 end do
952
953 do i = 1, ntot
954 c%G11(i, 1, 1, 1) = c%G11(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
955 c%G22(i, 1, 1, 1) = c%G22(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
956 c%G33(i, 1, 1, 1) = c%G33(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
957 end do
958
959 do i = 1, ntot
960 c%G12(i, 1, 1, 1) = c%drdx(i, 1, 1, 1) * c%dsdx(i, 1, 1, 1) &
961 + c%drdy(i, 1, 1, 1) * c%dsdy(i, 1, 1, 1) &
962 + c%drdz(i, 1, 1, 1) * c%dsdz(i, 1, 1, 1)
963
964 c%G13(i, 1, 1, 1) = c%drdx(i, 1, 1, 1) * c%dtdx(i, 1, 1, 1) &
965 + c%drdy(i, 1, 1, 1) * c%dtdy(i, 1, 1, 1) &
966 + c%drdz(i, 1, 1, 1) * c%dtdz(i, 1, 1, 1)
967
968 c%G23(i, 1, 1, 1) = c%dsdx(i, 1, 1, 1) * c%dtdx(i, 1, 1, 1) &
969 + c%dsdy(i, 1, 1, 1) * c%dtdy(i, 1, 1, 1) &
970 + c%dsdz(i, 1, 1, 1) * c%dtdz(i, 1, 1, 1)
971 end do
972
973 do i = 1, ntot
974 c%G12(i, 1, 1, 1) = c%G12(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
975 c%G13(i, 1, 1, 1) = c%G13(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
976 c%G23(i, 1, 1, 1) = c%G23(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
977 end do
978
979 do concurrent(e = 1:c%msh%nelv)
980 do concurrent(i = 1:lxyz)
981 c%G11(i,1,1,e) = c%G11(i,1,1,e) * c%Xh%w3(i,1,1)
982 c%G22(i,1,1,e) = c%G22(i,1,1,e) * c%Xh%w3(i,1,1)
983 c%G12(i,1,1,e) = c%G12(i,1,1,e) * c%Xh%w3(i,1,1)
984
985 c%G33(i,1,1,e) = c%G33(i,1,1,e) * c%Xh%w3(i,1,1)
986 c%G13(i,1,1,e) = c%G13(i,1,1,e) * c%Xh%w3(i,1,1)
987 c%G23(i,1,1,e) = c%G23(i,1,1,e) * c%Xh%w3(i,1,1)
988 end do
989 end do
990
991 end if
992 end if
993
994 end subroutine coef_generate_geo
995
998 subroutine coef_generate_mass(c)
999 type(coef_t), intent(inout) :: c
1000 integer :: e, i, lxyz, ntot
1001
1002 lxyz = c%Xh%lx * c%Xh%ly * c%Xh%lz
1003 ntot = c%dof%size()
1004
1006 do concurrent(e = 1:c%msh%nelv)
1007 ! Here we need to handle things differently for axis symmetric elements
1008 do concurrent(i = 1:lxyz)
1009 c%B(i,1,1,e) = c%jac(i,1,1,e) * c%Xh%w3(i,1,1)
1010 c%Binv(i,1,1,e) = c%B(i,1,1,e)
1011 end do
1012 end do
1013
1014 if (neko_bcknd_device .eq. 1) then
1015 call device_memcpy(c%B, c%B_d, ntot, host_to_device, sync=.false.)
1016 call device_memcpy(c%Binv, c%Binv_d, ntot, host_to_device, sync=.false.)
1017 end if
1018
1019 call c%gs_h%op(c%Binv, ntot, gs_op_add)
1020
1021 if (neko_bcknd_device .eq. 1) then
1022 call device_invcol1(c%Binv_d, ntot)
1023 call device_memcpy(c%Binv, c%Binv_d, ntot, device_to_host, sync=.true.)
1024 else
1025 call invcol1(c%Binv, ntot)
1026 end if
1027
1029 if (neko_bcknd_device .eq. 1) then
1030 c%volume = device_glsum(c%B_d, ntot)
1031 else
1032 c%volume = glsum(c%B, ntot)
1033 end if
1034
1035 end subroutine coef_generate_mass
1036
1037 pure function coef_get_normal(this, i, j, k, e, facet) result(normal)
1038 class(coef_t), intent(in) :: this
1039 integer, intent(in) :: i, j, k, e, facet
1040 real(kind=rp) :: normal(3)
1041
1042 select case (facet)
1043 case(1,2)
1044 normal(1) = this%nx(j, k, facet, e)
1045 normal(2) = this%ny(j, k, facet, e)
1046 normal(3) = this%nz(j, k, facet, e)
1047 case(3,4)
1048 normal(1) = this%nx(i, k, facet, e)
1049 normal(2) = this%ny(i, k, facet, e)
1050 normal(3) = this%nz(i, k, facet, e)
1051 case(5,6)
1052 normal(1) = this%nx(i, j, facet, e)
1053 normal(2) = this%ny(i, j, facet, e)
1054 normal(3) = this%nz(i, j, facet, e)
1055 end select
1056 end function coef_get_normal
1057
1058 pure function coef_get_area(this, i, j, k, e, facet) result(area)
1059 class(coef_t), intent(in) :: this
1060 integer, intent(in) :: i, j, k, e, facet
1061 real(kind=rp) :: area
1062
1063 select case (facet)
1064 case(1,2)
1065 area = this%area(j, k, facet, e)
1066 case(3,4)
1067 area = this%area(i, k, facet, e)
1068 case(5,6)
1069 area = this%area(i, j, facet, e)
1070 end select
1071 end function coef_get_area
1072
1073
1076 type(coef_t), intent(inout) :: coef
1077 real(kind=rp), allocatable :: a(:,:,:,:)
1078 real(kind=rp), allocatable :: b(:,:,:,:)
1079 real(kind=rp), allocatable :: c(:,:,:,:)
1080 real(kind=rp), allocatable :: dot(:,:,:,:)
1081 integer :: n, e, i, j, k, lx
1082 real(kind=rp) :: weight, len
1083 n = coef%dof%size()
1084 lx = coef%Xh%lx
1085
1086 allocate(a(coef%Xh%lx, coef%Xh%lx, coef%Xh%lx, coef%msh%nelv))
1087 allocate(b(coef%Xh%lx, coef%Xh%lx, coef%Xh%lx, coef%msh%nelv))
1088 allocate(c(coef%Xh%lx, coef%Xh%lx, coef%Xh%lx, coef%msh%nelv))
1089 allocate(dot(coef%Xh%lx, coef%Xh%lx, coef%Xh%lx, coef%msh%nelv))
1090
1091 ! ds x dt
1092 do i = 1, n
1093 a(i, 1, 1, 1) = coef%dyds(i, 1, 1, 1) * coef%dzdt(i, 1, 1, 1) &
1094 - coef%dzds(i, 1, 1, 1) * coef%dydt(i, 1, 1, 1)
1095
1096 b(i, 1, 1, 1) = coef%dzds(i, 1, 1, 1) * coef%dxdt(i, 1, 1, 1) &
1097 - coef%dxds(i, 1, 1, 1) * coef%dzdt(i, 1, 1, 1)
1098
1099 c(i, 1, 1, 1) = coef%dxds(i, 1, 1, 1) * coef%dydt(i, 1, 1, 1) &
1100 - coef%dyds(i, 1, 1, 1) * coef%dxdt(i, 1, 1, 1)
1101 end do
1102
1103 do i = 1, n
1104 dot(i, 1, 1, 1) = a(i, 1, 1, 1) * a(i, 1, 1, 1) &
1105 + b(i, 1, 1, 1) * b(i, 1, 1, 1) &
1106 + c(i, 1, 1, 1) * c(i, 1, 1, 1)
1107 end do
1108
1109 do concurrent(e = 1:coef%msh%nelv)
1110 do concurrent(k = 1:coef%Xh%lx)
1111 do concurrent(j = 1:coef%Xh%lx)
1112 weight = coef%Xh%wy(j) * coef%Xh%wz(k)
1113 coef%area(j, k, 2, e) = sqrt(dot(lx, j, k, e)) * weight
1114 coef%area(j, k, 1, e) = sqrt(dot(1, j, k, e)) * weight
1115 coef%nx(j,k, 1, e) = -a(1, j, k, e)
1116 coef%nx(j,k, 2, e) = a(lx, j, k, e)
1117 coef%ny(j,k, 1, e) = -b(1, j, k, e)
1118 coef%ny(j,k, 2, e) = b(lx, j, k, e)
1119 coef%nz(j,k, 1, e) = -c(1, j, k, e)
1120 coef%nz(j,k, 2, e) = c(lx, j, k, e)
1121 end do
1122 end do
1123 end do
1124
1125 ! dr x dt
1126 do i = 1, n
1127 a(i, 1, 1, 1) = coef%dydr(i, 1, 1, 1) * coef%dzdt(i, 1, 1, 1) &
1128 - coef%dzdr(i, 1, 1, 1) * coef%dydt(i, 1, 1, 1)
1129
1130 b(i, 1, 1, 1) = coef%dzdr(i, 1, 1, 1) * coef%dxdt(i, 1, 1, 1) &
1131 - coef%dxdr(i, 1, 1, 1) * coef%dzdt(i, 1, 1, 1)
1132
1133 c(i, 1, 1, 1) = coef%dxdr(i, 1, 1, 1) * coef%dydt(i, 1, 1, 1) &
1134 - coef%dydr(i, 1, 1, 1) * coef%dxdt(i, 1, 1, 1)
1135 end do
1136
1137 do i = 1, n
1138 dot(i, 1, 1, 1) = a(i, 1, 1, 1) * a(i, 1, 1, 1) &
1139 + b(i, 1, 1, 1) * b(i, 1, 1, 1) &
1140 + c(i, 1, 1, 1) * c(i, 1, 1, 1)
1141 end do
1142
1143 do concurrent(e = 1:coef%msh%nelv)
1144 do concurrent(k = 1:coef%Xh%lx)
1145 do concurrent(j = 1:coef%Xh%lx)
1146 weight = coef%Xh%wx(j) * coef%Xh%wz(k)
1147 coef%area(j, k, 3, e) = sqrt(dot(j, 1, k, e)) * weight
1148 coef%area(j, k, 4, e) = sqrt(dot(j, lx, k, e)) * weight
1149 coef%nx(j,k, 3, e) = a(j, 1, k, e)
1150 coef%nx(j,k, 4, e) = -a(j, lx, k, e)
1151 coef%ny(j,k, 3, e) = b(j, 1, k, e)
1152 coef%ny(j,k, 4, e) = -b(j, lx, k, e)
1153 coef%nz(j,k, 3, e) = c(j, 1, k, e)
1154 coef%nz(j,k, 4, e) = -c(j, lx, k, e)
1155 end do
1156 end do
1157 end do
1158
1159 ! dr x ds
1160 do i = 1, n
1161 a(i, 1, 1, 1) = coef%dydr(i, 1, 1, 1) * coef%dzds(i, 1, 1, 1) &
1162 - coef%dzdr(i, 1, 1, 1) * coef%dyds(i, 1, 1, 1)
1163
1164 b(i, 1, 1, 1) = coef%dzdr(i, 1, 1, 1) * coef%dxds(i, 1, 1, 1) &
1165 - coef%dxdr(i, 1, 1, 1) * coef%dzds(i, 1, 1, 1)
1166
1167 c(i, 1, 1, 1) = coef%dxdr(i, 1, 1, 1) * coef%dyds(i, 1, 1, 1) &
1168 - coef%dydr(i, 1, 1, 1) * coef%dxds(i, 1, 1, 1)
1169 end do
1170
1171 do i = 1, n
1172 dot(i, 1, 1, 1) = a(i, 1, 1, 1) * a(i, 1, 1, 1) &
1173 + b(i, 1, 1, 1) * b(i, 1, 1, 1) &
1174 + c(i, 1, 1, 1) * c(i, 1, 1, 1)
1175 end do
1176
1177 do concurrent(e = 1:coef%msh%nelv)
1178 do concurrent(k = 1:coef%Xh%lx)
1179 do concurrent(j = 1:coef%Xh%lx)
1180 weight = coef%Xh%wx(j) * coef%Xh%wy(k)
1181 coef%area(j, k, 5, e) = sqrt(dot(j, k, 1, e)) * weight
1182 coef%area(j, k, 6, e) = sqrt(dot(j, k, lx, e)) * weight
1183 coef%nx(j,k, 5, e) = -a(j, k, 1, e)
1184 coef%nx(j,k, 6, e) = a(j, k, lx, e)
1185 coef%ny(j,k, 5, e) = -b(j, k, 1, e)
1186 coef%ny(j,k, 6, e) = b(j, k, lx, e)
1187 coef%nz(j,k, 5, e) = -c(j, k, 1, e)
1188 coef%nz(j,k, 6, e) = c(j, k, lx, e)
1189 end do
1190 end do
1191 end do
1192
1193 ! Normalize
1194 do j = 1, size(coef%nz)
1195 len = sqrt(coef%nx(j,1,1,1)**2 + &
1196 coef%ny(j,1,1,1)**2 + coef%nz(j,1,1,1)**2)
1197 if (len .gt. neko_eps) then
1198 coef%nx(j,1,1,1) = coef%nx(j,1,1,1) / len
1199 coef%ny(j,1,1,1) = coef%ny(j,1,1,1) / len
1200 coef%nz(j,1,1,1) = coef%nz(j,1,1,1) / len
1201 end if
1202 end do
1203
1204 deallocate(dot)
1205 deallocate(c)
1206 deallocate(b)
1207 deallocate(a)
1209 if (neko_bcknd_device .eq. 1) then
1210 n = size(coef%area)
1211 call device_memcpy(coef%area, coef%area_d, n, &
1212 host_to_device, sync=.false.)
1213 call device_memcpy(coef%nx, coef%nx_d, n, &
1214 host_to_device, sync=.false.)
1215 call device_memcpy(coef%ny, coef%ny_d, n, &
1216 host_to_device, sync=.false.)
1217 call device_memcpy(coef%nz, coef%nz_d, n, &
1218 host_to_device, sync=.false.)
1219 end if
1220
1221 end subroutine coef_generate_area_and_normal
1222
1224 type(coef_t), intent(inout) :: coef
1225 real(kind=rp) :: un(3), len, d
1226 integer :: lx, ly, lz, np
1227 integer :: i, j, k, pf, pe, n, nc, ncyc
1228
1229 np = coef%msh%periodic%size
1230 lx = coef%Xh%lx
1231 ly = coef%Xh%ly
1232 lz = coef%Xh%lz
1233 ncyc = coef%cyc_msk(0) - 1
1234 nc = 1
1235 do n = 1, np
1236 pf = coef%msh%periodic%facet_el(n)%x(1)
1237 pe = coef%msh%periodic%facet_el(n)%x(2)
1238 do k = 1, lz
1239 do j = 1, ly
1240 do i = 1, lx
1241 if (index_is_on_facet(i, j, k, lx, ly, lz, pf)) then
1242 un = coef%get_normal(i, j, k, pe, pf)
1243 len = sqrt(un(1) * un(1) + un(2) * un(2))
1244 if (len .gt. neko_eps) then
1245 d = coef%dof%y(i, j, k, pe) * un(1) &
1246 - coef%dof%x(i, j, k, pe) * un(2)
1247
1248 coef%cyc_msk(nc) = linear_index(i, j, k, pe, lx, ly, lz)
1249 coef%R11(nc) = un(1) / len * sign(1.0_rp, d)
1250 coef%R12(nc) = un(2) / len * sign(1.0_rp, d)
1251 nc = nc + 1
1252 else
1253 call neko_error("x and y components of surface " // &
1254 "normals are zero. Cyclic rotations must be " // &
1255 "around z-axis.")
1256 end if
1257 end if
1258 end do
1259 end do
1260 end do
1261 end do
1262
1263 if (nc - 1 /= ncyc) then
1264 call neko_error("The number of cyclic GLL points were " // &
1265 "not estimated correctly.")
1266 end if
1267
1268 if (neko_bcknd_device .eq. 1) then
1269 call device_memcpy(coef%cyc_msk, coef%cyc_msk_d, ncyc+1, host_to_device, sync=.false.)
1270 call device_memcpy(coef%R11, coef%R11_d, ncyc, host_to_device, sync=.false.)
1271 call device_memcpy(coef%R12, coef%R12_d, ncyc, host_to_device, sync=.false.)
1272 end if
1273
1274 end subroutine coef_generate_cyclic_bc
1275
1276
1277 subroutine coef_check_cyclic(this)
1278 class(coef_t), intent(inout) :: this
1279 !DSS is applied on vector field "norm" with all zeros
1280 !except values equal to surface normals at periodic
1281 !faces. The results must sum to zero
1282 !Check happens in two passes -
1284 !no cyclic rotation is required and must be switched off.
1286 !If yields 0, cyclic rotation is required and must be
1287 !switched on. If not, then the current rotation logic
1288 !is not sufficient and must be modified.
1289 !"cyclic": true in case file invokes these checks.
1290 integer :: np, n, lx, pf, pe, i, j, k, nc, ipass, ntot, ncyc
1291 real(kind=rp) :: un(3)
1292 real(kind=rp), allocatable :: normx(:,:,:,:)
1293 real(kind=rp), allocatable :: normy(:,:,:,:)
1294 real(kind=rp), allocatable :: normz(:,:,:,:)
1295 type(c_ptr) :: normx_d = c_null_ptr
1296 type(c_ptr) :: normy_d = c_null_ptr
1297 type(c_ptr) :: normz_d = c_null_ptr
1298
1299 logical :: norm_dss = .false.
1300
1301 np = this%msh%periodic%size
1302 lx = this%Xh%lx
1303 ntot = this%dof%size()
1304 ncyc = np*lx*lx
1305
1306 if (.not. this%cyclic) return
1307
1308 if (np .eq. 0) then
1309 call neko_error("There are no periodic boundaries. " // &
1310 "Switch cyclic off in the case file.")
1311 end if
1312
1313 allocate(normx(lx, lx, lx, this%msh%nelv))
1314 allocate(normy(lx, lx, lx, this%msh%nelv))
1315 allocate(normz(lx, lx, lx, this%msh%nelv))
1316
1317 if (neko_bcknd_device .eq. 1) then
1318 call device_map(normx, normx_d, ntot)
1319 call device_map(normy, normy_d, ntot)
1320 call device_map(normz, normz_d, ntot)
1321 end if
1322
1323 do ipass = 1, 2
1324 norm_dss = .false.
1325 call rzero(normx, ntot)
1326 call rzero(normy, ntot)
1327 call rzero(normz, ntot)
1328
1329 if (ipass .eq. 2) then
1330 call coef_generate_cyclic_bc(this)
1331 end if
1332
1333 nc = 1
1334 do n = 1, np
1335 pf = this%msh%periodic%facet_el(n)%x(1)
1336 pe = this%msh%periodic%facet_el(n)%x(2)
1337 do k = 1, lx
1338 do j = 1, lx
1339 do i = 1, lx
1340 if (index_is_on_facet(i, j, k, lx, lx, lx, pf)) then
1341 un = this%get_normal(i, j, k, pe, pf)
1342 normx(i,j,k,pe) = un(1) * this%R11(nc) &
1343 + un(2) * this%R12(nc)
1344
1345 normy(i,j,k,pe) =-un(1) * this%R12(nc) &
1346 + un(2) * this%R11(nc)
1347
1348 normz(i,j,k,pe) = un(3)
1349
1350 nc = nc + 1
1351 end if
1352 end do
1353 end do
1354 end do
1355 end do
1356
1357 if (neko_bcknd_device .eq. 1) then
1358 call device_memcpy(normx, normx_d, ntot, host_to_device, sync=.false.)
1359 call device_memcpy(normy, normy_d, ntot, host_to_device, sync=.false.)
1360 call device_memcpy(normz, normz_d, ntot, host_to_device, sync=.false.)
1361 end if
1362
1363 call this%gs_h%op(normx, ntot, gs_op_add)
1364 call this%gs_h%op(normy, ntot, gs_op_add)
1365 call this%gs_h%op(normz, ntot, gs_op_add)
1366
1367 if (neko_bcknd_device .eq. 1) then
1368 call device_absval(normx_d, ntot)
1369 call device_absval(normy_d, ntot)
1370 call device_absval(normz_d, ntot)
1371 norm_dss = device_glsum(normx_d, ntot)/ntot .lt. 100.0*neko_eps .and. &
1372 device_glsum(normy_d, ntot)/ntot .lt. 100.0*neko_eps .and. &
1373 device_glsum(normz_d, ntot)/ntot .lt. 100.0*neko_eps
1374 else
1375 call absval(normx, ntot)
1376 call absval(normy, ntot)
1377 call absval(normz, ntot)
1378 norm_dss = glsum(normx, ntot)/ntot .lt. 100*neko_eps .and. &
1379 glsum(normy, ntot)/ntot .lt. 100*neko_eps .and. &
1380 glsum(normz, ntot)/ntot .lt. 100*neko_eps
1381 end if
1382
1383 if (ipass .eq. 1 .and. norm_dss) then
1384 call neko_error("Cyclic rotation is not required. " // &
1385 "Switch it off in case file.")
1386 !else if (ipass .eq. 1 .and. norm_dss .eqv. false)
1387 !wait for ipass=2 to check if current rotation logic is sufficient
1388 else if (ipass .eq. 2 .and. .not. norm_dss) then
1389 call neko_error("Cylic rotation is required, but " // &
1390 "rotation logic must be modified.")
1391 !if (ipass .eq. 2 .and. norm_dss)
1392 !current logic is sufficient, proceed.
1393 end if
1394 end do
1395 deallocate(normx)
1396 deallocate(normy)
1397 deallocate(normz)
1398
1399 if (neko_bcknd_device .eq. 1) then
1400 call device_free(normx_d)
1401 call device_free(normy_d)
1402 call device_free(normz_d)
1403 end if
1404
1405 end subroutine coef_check_cyclic
1406
1407end module coefs
Map a Fortran array to a device (allocate and associate)
Definition device.F90:77
Copy data between host and device (or device and device)
Definition device.F90:71
Coefficients.
Definition coef.f90:34
subroutine coef_generate_geo(c)
Generate geometric data for the given mesh.
Definition coef.f90:883
subroutine coef_free(this)
Deallocate coefficients.
Definition coef.f90:405
pure real(kind=rp) function coef_get_area(this, i, j, k, e, facet)
Definition coef.f90:1059
pure real(kind=rp) function, dimension(3) coef_get_normal(this, i, j, k, e, facet)
Definition coef.f90:1038
subroutine coef_generate_dxyzdrst(c)
Definition coef.f90:725
subroutine coef_generate_area_and_normal(coef)
Generate facet area and surface normals.
Definition coef.f90:1076
subroutine coef_init_empty(this, xh, msh)
Initialize empty coefs for a space and a mesh.
Definition coef.f90:172
subroutine coef_init_all(this, gs_h)
Initialize coefficients.
Definition coef.f90:218
subroutine coef_check_cyclic(this)
Definition coef.f90:1278
subroutine coef_generate_cyclic_bc(coef)
Definition coef.f90:1224
subroutine coef_generate_mass(c)
Generate mass matrix B for the given mesh and space.
Definition coef.f90:999
subroutine, public device_coef_generate_dxydrst(drdx_d, drdy_d, drdz_d, dsdx_d, dsdy_d, dsdz_d, dtdx_d, dtdy_d, dtdz_d, dxdr_d, dydr_d, dzdr_d, dxds_d, dyds_d, dzds_d, dxdt_d, dydt_d, dzdt_d, dx_d, dy_d, dz_d, x_d, y_d, z_d, jacinv_d, jac_d, lx, nel)
subroutine, public device_coef_generate_geo(g11_d, g12_d, g13_d, g22_d, g23_d, g33_d, drdx_d, drdy_d, drdz_d, dsdx_d, dsdy_d, dsdz_d, dtdx_d, dtdy_d, dtdz_d, jacinv_d, w3_d, nel, lx, gdim)
real(kind=rp) function, public device_glsum(a_d, n, strm)
Sum a vector of length n.
subroutine, public device_invcol1(a_d, n, strm)
Invert a vector .
subroutine, public device_rone(a_d, n, strm)
Set all elements to one.
subroutine, public device_absval(a_d, n, strm)
Device abstraction, common interface for various accelerators.
Definition device.F90:34
integer, parameter, public host_to_device
Definition device.F90:47
subroutine, public device_free(x_d)
Deallocate memory on the device.
Definition device.F90:219
integer, parameter, public device_to_host
Definition device.F90:47
Defines a mapping of the degrees of freedom.
Definition dofmap.f90:35
Gather-scatter.
Defines Gather-scatter operations.
Definition gs_ops.f90:34
integer, parameter, public gs_op_add
Definition gs_ops.f90:36
Definition math.f90:60
subroutine, public invers2(a, b, n)
Compute inverted vector .
Definition math.f90:639
subroutine, public subcol3(a, b, c, n)
Returns .
Definition math.f90:881
subroutine, public rone(a, n)
Set all elements to one.
Definition math.f90:238
real(kind=rp) function, public glsum(a, n)
Sum a vector of length n.
Definition math.f90:499
subroutine, public absval(a, n)
Take the absolute value of an array.
Definition math.f90:1372
subroutine, public addcol3(a, b, c, n)
Returns .
Definition math.f90:958
subroutine, public invcol1(a, n)
Invert a vector .
Definition math.f90:614
subroutine, public chsign(a, n)
Change sign of vector .
Definition math.f90:579
subroutine, public copy(a, b, n)
Copy a vector .
Definition math.f90:249
real(kind=rp), parameter, public neko_eps
Machine epsilon .
Definition math.f90:69
subroutine, public rzero(a, n)
Zero a real vector.
Definition math.f90:205
Defines a mesh.
Definition mesh.f90:34
Wrapper for all matrix-matrix product implementations.
subroutine, public mxm(a, n1, b, n2, c, n3)
Compute matrix-matrix product for contiguously packed matrices A,B, and C.
Build configurations.
integer, parameter neko_bcknd_device
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Defines a function space.
Definition space.f90:34
Utilities.
Definition utils.f90:35
pure logical function, public index_is_on_facet(i, j, k, lx, ly, lz, facet)
Definition utils.f90:262
pure integer function, public linear_index(i, j, k, l, lx, ly, lz)
Compute the address of a (i,j,k,l) array with sizes (1:lx, 1:ly, 1:lz, :)
Definition utils.f90:237
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition coef.f90:58
The function space for the SEM solution fields.
Definition space.f90:63