Neko 0.9.99
A portable framework for high-order spectral element flow simulations
Loading...
Searching...
No Matches
coef.f90
Go to the documentation of this file.
1! Copyright (c) 2020-2024, The Neko Authors
2! All rights reserved.
3!
4! Redistribution and use in source and binary forms, with or without
5! modification, are permitted provided that the following conditions
6! are met:
7!
8! * Redistributions of source code must retain the above copyright
9! notice, this list of conditions and the following disclaimer.
10!
11! * Redistributions in binary form must reproduce the above
12! copyright notice, this list of conditions and the following
13! disclaimer in the documentation and/or other materials provided
14! with the distribution.
15!
16! * Neither the name of the authors nor the names of its
17! contributors may be used to endorse or promote products derived
18! from this software without specific prior written permission.
19!
20! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31! POSSIBILITY OF SUCH DAMAGE.
32!
34module coefs
35 use gather_scatter, only : gs_t
36 use gs_ops, only : gs_op_add
38 use num_types, only : rp
39 use dofmap, only : dofmap_t
40 use space, only: space_t
41 use math, only : rone, invcol1, addcol3, subcol3, copy, &
43 use mesh, only : mesh_t
47 use mxm_wrapper, only : mxm
48 use device
49 use, intrinsic :: iso_c_binding
50 implicit none
51 private
52
55 type, public :: coef_t
57 real(kind=rp), allocatable :: g11(:,:,:,:)
59 real(kind=rp), allocatable :: g22(:,:,:,:)
61 real(kind=rp), allocatable :: g33(:,:,:,:)
63 real(kind=rp), allocatable :: g12(:,:,:,:)
65 real(kind=rp), allocatable :: g13(:,:,:,:)
67 real(kind=rp), allocatable :: g23(:,:,:,:)
68
69 real(kind=rp), allocatable :: mult(:,:,:,:)
74 real(kind=rp), allocatable :: dxdr(:,:,:,:), dydr(:,:,:,:), dzdr(:,:,:,:)
75 real(kind=rp), allocatable :: dxds(:,:,:,:), dyds(:,:,:,:), dzds(:,:,:,:)
76 real(kind=rp), allocatable :: dxdt(:,:,:,:), dydt(:,:,:,:), dzdt(:,:,:,:)
80 real(kind=rp), allocatable :: drdx(:,:,:,:), drdy(:,:,:,:), drdz(:,:,:,:)
81 real(kind=rp), allocatable :: dsdx(:,:,:,:), dsdy(:,:,:,:), dsdz(:,:,:,:)
82 real(kind=rp), allocatable :: dtdx(:,:,:,:), dtdy(:,:,:,:), dtdz(:,:,:,:)
83
84 real(kind=rp), allocatable :: h1(:,:,:,:)
85 real(kind=rp), allocatable :: h2(:,:,:,:)
86 logical :: ifh2
87
88 real(kind=rp), allocatable :: jac(:,:,:,:)
89 real(kind=rp), allocatable :: jacinv(:,:,:,:)
90 real(kind=rp), allocatable :: b(:,:,:,:)
91 real(kind=rp), allocatable :: binv(:,:,:,:)
92
93 real(kind=rp), allocatable :: area(:,:,:,:)
94 real(kind=rp), allocatable :: nx(:,:,:,:)
95 real(kind=rp), allocatable :: ny(:,:,:,:)
96 real(kind=rp), allocatable :: nz(:,:,:,:)
98
99 real(kind=rp) :: volume
100
101 type(space_t), pointer :: xh => null()
102 type(mesh_t), pointer :: msh => null()
103 type(dofmap_t), pointer :: dof => null()
104 type(gs_t), pointer :: gs_h=> null()
105
106 !
107 ! Device pointers (if present)
108 !
109
110 type(c_ptr) :: g11_d = c_null_ptr
111 type(c_ptr) :: g22_d = c_null_ptr
112 type(c_ptr) :: g33_d = c_null_ptr
113 type(c_ptr) :: g12_d = c_null_ptr
114 type(c_ptr) :: g13_d = c_null_ptr
115 type(c_ptr) :: g23_d = c_null_ptr
116 type(c_ptr) :: dxdr_d = c_null_ptr
117 type(c_ptr) :: dydr_d = c_null_ptr
118 type(c_ptr) :: dzdr_d = c_null_ptr
119 type(c_ptr) :: dxds_d = c_null_ptr
120 type(c_ptr) :: dyds_d = c_null_ptr
121 type(c_ptr) :: dzds_d = c_null_ptr
122 type(c_ptr) :: dxdt_d = c_null_ptr
123 type(c_ptr) :: dydt_d = c_null_ptr
124 type(c_ptr) :: dzdt_d = c_null_ptr
125 type(c_ptr) :: drdx_d = c_null_ptr
126 type(c_ptr) :: drdy_d = c_null_ptr
127 type(c_ptr) :: drdz_d = c_null_ptr
128 type(c_ptr) :: dsdx_d = c_null_ptr
129 type(c_ptr) :: dsdy_d = c_null_ptr
130 type(c_ptr) :: dsdz_d = c_null_ptr
131 type(c_ptr) :: dtdx_d = c_null_ptr
132 type(c_ptr) :: dtdy_d = c_null_ptr
133 type(c_ptr) :: dtdz_d = c_null_ptr
134 type(c_ptr) :: mult_d = c_null_ptr
135 type(c_ptr) :: h1_d = c_null_ptr
136 type(c_ptr) :: h2_d = c_null_ptr
137 type(c_ptr) :: jac_d = c_null_ptr
138 type(c_ptr) :: jacinv_d = c_null_ptr
139 type(c_ptr) :: b_d = c_null_ptr
140 type(c_ptr) :: binv_d = c_null_ptr
141 type(c_ptr) :: area_d = c_null_ptr
142 type(c_ptr) :: nx_d = c_null_ptr
143 type(c_ptr) :: ny_d = c_null_ptr
144 type(c_ptr) :: nz_d = c_null_ptr
145
146
147 contains
148 procedure, private, pass(this) :: init_empty => coef_init_empty
149 procedure, private, pass(this) :: init_all => coef_init_all
150 procedure, pass(this) :: free => coef_free
151 procedure, pass(this) :: get_normal => coef_get_normal
152 procedure, pass(this) :: get_area => coef_get_area
153 generic :: init => init_empty, init_all
154 end type coef_t
155
156contains
157
159 subroutine coef_init_empty(this, Xh, msh)
160 class(coef_t), intent(inout) :: this
161 type(space_t), intent(inout), target :: Xh
162 type(mesh_t), intent(inout), target :: msh
163 integer :: n
164 call this%free()
165 this%msh => msh
166 this%Xh => xh
167
168 allocate(this%drdx(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
169 allocate(this%dsdx(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
170 allocate(this%dtdx(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
171
172 allocate(this%drdy(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
173 allocate(this%dsdy(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
174 allocate(this%dtdy(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
175
176 allocate(this%drdz(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
177 allocate(this%dsdz(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
178 allocate(this%dtdz(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
179
180
181 !
182 ! Setup device memory (if present)
183 !
184
185 n = this%Xh%lx * this%Xh%ly * this%Xh%lz * this%msh%nelv
186 if (neko_bcknd_device .eq. 1) then
187
188 call device_map(this%drdx, this%drdx_d, n)
189 call device_map(this%drdy, this%drdy_d, n)
190 call device_map(this%drdz, this%drdz_d, n)
191
192 call device_map(this%dsdx, this%dsdx_d, n)
193 call device_map(this%dsdy, this%dsdy_d, n)
194 call device_map(this%dsdz, this%dsdz_d, n)
195
196 call device_map(this%dtdx, this%dtdx_d, n)
197 call device_map(this%dtdy, this%dtdy_d, n)
198 call device_map(this%dtdz, this%dtdz_d, n)
199
200 end if
201
202 end subroutine coef_init_empty
203
205 subroutine coef_init_all(this, gs_h)
206 class(coef_t), intent(inout) :: this
207 type(gs_t), intent(inout), target :: gs_h
208 integer :: n, m
209 call this%free()
210
211 this%msh => gs_h%dofmap%msh
212 this%Xh => gs_h%dofmap%Xh
213 this%dof => gs_h%dofmap
214 this%gs_h => gs_h
215
216 !
217 ! Allocate arrays for geometric data
218 !
220 allocate(this%G11(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
221 allocate(this%G22(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
222 allocate(this%G33(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
223 allocate(this%G12(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
224 allocate(this%G13(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
225 allocate(this%G23(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
226
227 allocate(this%dxdr(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
228 allocate(this%dxds(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
229 allocate(this%dxdt(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
230
231 allocate(this%dydr(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
232 allocate(this%dyds(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
233 allocate(this%dydt(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
234
235 allocate(this%dzdr(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
236 allocate(this%dzds(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
237 allocate(this%dzdt(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
238
239 allocate(this%drdx(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
240 allocate(this%dsdx(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
241 allocate(this%dtdx(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
242
243 allocate(this%drdy(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
244 allocate(this%dsdy(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
245 allocate(this%dtdy(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
246
247 allocate(this%drdz(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
248 allocate(this%dsdz(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
249 allocate(this%dtdz(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
250
251 allocate(this%jac(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
252 allocate(this%jacinv(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
253
254 allocate(this%area(this%Xh%lx, this%Xh%ly, 6, this%msh%nelv))
255 allocate(this%nx(this%Xh%lx, this%Xh%ly, 6, this%msh%nelv))
256 allocate(this%ny(this%Xh%lx, this%Xh%ly, 6, this%msh%nelv))
257 allocate(this%nz(this%Xh%lx, this%Xh%ly, 6, this%msh%nelv))
258
259 allocate(this%B(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
260 allocate(this%Binv(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
261
262 allocate(this%h1(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
263 allocate(this%h2(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
264
265 allocate(this%mult(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
266
267
268 !
269 ! Setup device memory (if present)
270 !
271
272 n = this%Xh%lx * this%Xh%ly * this%Xh%lz * this%msh%nelv
273 if (neko_bcknd_device .eq. 1) then
274 call device_map(this%G11, this%G11_d, n)
275 call device_map(this%G22, this%G22_d, n)
276 call device_map(this%G33, this%G33_d, n)
277 call device_map(this%G12, this%G12_d, n)
278 call device_map(this%G13, this%G13_d, n)
279 call device_map(this%G23, this%G23_d, n)
280
281 call device_map(this%dxdr, this%dxdr_d, n)
282 call device_map(this%dydr, this%dydr_d, n)
283 call device_map(this%dzdr, this%dzdr_d, n)
284
285 call device_map(this%dxds, this%dxds_d, n)
286 call device_map(this%dyds, this%dyds_d, n)
287 call device_map(this%dzds, this%dzds_d, n)
288
289 call device_map(this%dxdt, this%dxdt_d, n)
290 call device_map(this%dydt, this%dydt_d, n)
291 call device_map(this%dzdt, this%dzdt_d, n)
292
293 call device_map(this%drdx, this%drdx_d, n)
294 call device_map(this%drdy, this%drdy_d, n)
295 call device_map(this%drdz, this%drdz_d, n)
296
297 call device_map(this%dsdx, this%dsdx_d, n)
298 call device_map(this%dsdy, this%dsdy_d, n)
299 call device_map(this%dsdz, this%dsdz_d, n)
300
301 call device_map(this%dtdx, this%dtdx_d, n)
302 call device_map(this%dtdy, this%dtdy_d, n)
303 call device_map(this%dtdz, this%dtdz_d, n)
304
305 call device_map(this%mult, this%mult_d, n)
306 call device_map(this%h1, this%h1_d, n)
307 call device_map(this%h2, this%h2_d, n)
308
309 call device_map(this%jac, this%jac_d, n)
310 call device_map(this%jacinv, this%jacinv_d, n)
311 call device_map(this%B, this%B_d, n)
312 call device_map(this%Binv, this%Binv_d, n)
313
314 m = this%Xh%lx * this%Xh%ly * 6 * this%msh%nelv
315
316 call device_map(this%area, this%area_d, m)
317 call device_map(this%nx, this%nx_d, m)
318 call device_map(this%ny, this%ny_d, m)
319 call device_map(this%nz, this%nz_d, m)
320
321 end if
322
323 call coef_generate_dxyzdrst(this)
324
325 call coef_generate_geo(this)
326
328
329 call coef_generate_mass(this)
330
331
332 ! This is a placeholder, just for now
333 ! We can probably find a prettier solution
334 if (neko_bcknd_device .eq. 1) then
335 call device_rone(this%h1_d, n)
336 call device_rone(this%h2_d, n)
337 call device_memcpy(this%h1, this%h1_d, n, &
338 device_to_host, sync=.false.)
339 call device_memcpy(this%h2, this%h2_d, n, &
340 device_to_host, sync=.false.)
341 else
342 call rone(this%h1,n)
343 call rone(this%h2,n)
344 end if
345
346 this%ifh2 = .false.
347
348 !
349 ! Set up multiplicity
350 !
351 if (neko_bcknd_device .eq. 1) then
352 call device_rone(this%mult_d, n)
353 else
354 call rone(this%mult, n)
355 end if
356
357 call gs_h%op(this%mult, n, gs_op_add)
358
359 if (neko_bcknd_device .eq. 1) then
360 call device_invcol1(this%mult_d, n)
361 call device_memcpy(this%mult, this%mult_d, n, &
362 device_to_host, sync=.true.)
363 else
364 call invcol1(this%mult, n)
365 end if
366
367 end subroutine coef_init_all
368
370 subroutine coef_free(this)
371 class(coef_t), intent(inout) :: this
372
373 if (allocated(this%G11)) then
374 deallocate(this%G11)
375 end if
376
377 if (allocated(this%G22)) then
378 deallocate(this%G22)
379 end if
380
381 if (allocated(this%G33)) then
382 deallocate(this%G33)
383 end if
384
385 if (allocated(this%G12)) then
386 deallocate(this%G12)
387 end if
388
389 if (allocated(this%G13)) then
390 deallocate(this%G13)
391 end if
392
393 if (allocated(this%G23)) then
394 deallocate(this%G23)
395 end if
396
397 if (allocated(this%mult)) then
398 deallocate(this%mult)
399 end if
400
401 if (allocated(this%B)) then
402 deallocate(this%B)
403 end if
404
405 if (allocated(this%Binv)) then
406 deallocate(this%Binv)
407 end if
408
409 if(allocated(this%dxdr)) then
410 deallocate(this%dxdr)
411 end if
412
413 if(allocated(this%dxds)) then
414 deallocate(this%dxds)
415 end if
416
417 if(allocated(this%dxdt)) then
418 deallocate(this%dxdt)
419 end if
420
421 if(allocated(this%dydr)) then
422 deallocate(this%dydr)
423 end if
424
425 if(allocated(this%dyds)) then
426 deallocate(this%dyds)
427 end if
428
429 if(allocated(this%dydt)) then
430 deallocate(this%dydt)
431 end if
432
433 if(allocated(this%dzdr)) then
434 deallocate(this%dzdr)
435 end if
436
437 if(allocated(this%dzds)) then
438 deallocate(this%dzds)
439 end if
440
441 if(allocated(this%dzdt)) then
442 deallocate(this%dzdt)
443 end if
444
445 if(allocated(this%drdx)) then
446 deallocate(this%drdx)
447 end if
448
449 if(allocated(this%dsdx)) then
450 deallocate(this%dsdx)
451 end if
452
453 if(allocated(this%dtdx)) then
454 deallocate(this%dtdx)
455 end if
456
457 if(allocated(this%drdy)) then
458 deallocate(this%drdy)
459 end if
460
461 if(allocated(this%dsdy)) then
462 deallocate(this%dsdy)
463 end if
464
465 if(allocated(this%dtdy)) then
466 deallocate(this%dtdy)
467 end if
468
469 if(allocated(this%drdz)) then
470 deallocate(this%drdz)
471 end if
472
473 if(allocated(this%dsdz)) then
474 deallocate(this%dsdz)
475 end if
476
477 if(allocated(this%dtdz)) then
478 deallocate(this%dtdz)
479 end if
480
481 if(allocated(this%jac)) then
482 deallocate(this%jac)
483 end if
484
485 if(allocated(this%jacinv)) then
486 deallocate(this%jacinv)
487 end if
488
489 if(allocated(this%h1)) then
490 deallocate(this%h1)
491 end if
492
493 if(allocated(this%h2)) then
494 deallocate(this%h2)
495 end if
496
497 if (allocated(this%area)) then
498 deallocate(this%area)
499 end if
500
501 if (allocated(this%nx)) then
502 deallocate(this%nx)
503 end if
504
505 if (allocated(this%ny)) then
506 deallocate(this%ny)
507 end if
508
509 if (allocated(this%nz)) then
510 deallocate(this%nz)
511 end if
512
513
514 nullify(this%msh)
515 nullify(this%Xh)
516 nullify(this%dof)
517
518 !
519 ! Cleanup the device (if present)
520 !
521
522 if (c_associated(this%G11_d)) then
523 call device_free(this%G11_d)
524 end if
525
526 if (c_associated(this%G22_d)) then
527 call device_free(this%G22_d)
528 end if
529
530 if (c_associated(this%G33_d)) then
531 call device_free(this%G33_d)
532 end if
533
534 if (c_associated(this%G12_d)) then
535 call device_free(this%G12_d)
536 end if
537
538 if (c_associated(this%G13_d)) then
539 call device_free(this%G13_d)
540 end if
541
542 if (c_associated(this%G23_d)) then
543 call device_free(this%G23_d)
544 end if
545
546 if (c_associated(this%dxdr_d)) then
547 call device_free(this%dxdr_d)
548 end if
549
550 if (c_associated(this%dydr_d)) then
551 call device_free(this%dydr_d)
552 end if
553
554 if (c_associated(this%dzdr_d)) then
555 call device_free(this%dzdr_d)
556 end if
557
558 if (c_associated(this%dxds_d)) then
559 call device_free(this%dxds_d)
560 end if
561
562 if (c_associated(this%dyds_d)) then
563 call device_free(this%dyds_d)
564 end if
565
566 if (c_associated(this%dzds_d)) then
567 call device_free(this%dzds_d)
568 end if
569
570 if (c_associated(this%dxdt_d)) then
571 call device_free(this%dxdt_d)
572 end if
573
574 if (c_associated(this%dydt_d)) then
575 call device_free(this%dydt_d)
576 end if
577
578 if (c_associated(this%dzdt_d)) then
579 call device_free(this%dzdt_d)
580 end if
581
582 if (c_associated(this%drdx_d)) then
583 call device_free(this%drdx_d)
584 end if
585
586 if (c_associated(this%drdy_d)) then
587 call device_free(this%drdy_d)
588 end if
589
590 if (c_associated(this%drdz_d)) then
591 call device_free(this%drdz_d)
592 end if
593
594 if (c_associated(this%dsdx_d)) then
595 call device_free(this%dsdx_d)
596 end if
597
598 if (c_associated(this%dsdy_d)) then
599 call device_free(this%dsdy_d)
600 end if
601
602 if (c_associated(this%dsdz_d)) then
603 call device_free(this%dsdz_d)
604 end if
605
606 if (c_associated(this%dtdx_d)) then
607 call device_free(this%dtdx_d)
608 end if
609
610 if (c_associated(this%dtdy_d)) then
611 call device_free(this%dtdy_d)
612 end if
613
614 if (c_associated(this%dtdz_d)) then
615 call device_free(this%dtdz_d)
616 end if
617
618 if (c_associated(this%mult_d)) then
619 call device_free(this%mult_d)
620 end if
621
622 if (c_associated(this%h1_d)) then
623 call device_free(this%h1_d)
624 end if
625
626 if (c_associated(this%h2_d)) then
627 call device_free(this%h2_d)
628 end if
629
630 if (c_associated(this%jac_d)) then
631 call device_free(this%jac_d)
632 end if
633
634 if (c_associated(this%jacinv_d)) then
635 call device_free(this%jacinv_d)
636 end if
637
638 if (c_associated(this%B_d)) then
639 call device_free(this%B_d)
640 end if
641
642 if (c_associated(this%Binv_d)) then
643 call device_free(this%Binv_d)
644 end if
645
646 if (c_associated(this%area_d)) then
647 call device_free(this%area_d)
648 end if
649
650 if (c_associated(this%nx_d)) then
651 call device_free(this%nx_d)
652 end if
653
654 if (c_associated(this%ny_d)) then
655 call device_free(this%ny_d)
656 end if
657
658 if (c_associated(this%nz_d)) then
659 call device_free(this%nz_d)
660 end if
661
662
663 end subroutine coef_free
664
666 type(coef_t), intent(inout) :: c
667 integer :: e,i,lxy,lyz,ntot
668
669 lxy=c%Xh%lx*c%Xh%ly
670 lyz=c%Xh%ly*c%Xh%lz
671 ntot = c%dof%size()
672
673 associate(drdx => c%drdx, drdy => c%drdy, drdz => c%drdz, &
674 dsdx => c%dsdx, dsdy => c%dsdy, dsdz => c%dsdz, &
675 dtdx => c%dtdx, dtdy => c%dtdy, dtdz => c%dtdz, &
676 dxdr => c%dxdr, dydr => c%dydr, dzdr => c%dzdr, &
677 dxds => c%dxds, dyds => c%dyds, dzds => c%dzds, &
678 dxdt => c%dxdt, dydt => c%dydt, dzdt => c%dzdt, &
679 dx => c%Xh%dx, dy => c%Xh%dy, dz => c%Xh%dz, &
680 x => c%dof%x, y => c%dof%y, z => c%dof%z, &
681 lx => c%Xh%lx, ly => c%Xh%ly, lz => c%Xh%lz, &
682 dyt => c%Xh%dyt, dzt => c%Xh%dzt, &
683 jacinv => c%jacinv, jac => c%jac)
684
685 if (neko_bcknd_device .eq. 1) then
686
687 call device_coef_generate_dxydrst(c%drdx_d, c%drdy_d, c%drdz_d, &
688 c%dsdx_d, c%dsdy_d, c%dsdz_d, c%dtdx_d, c%dtdy_d, c%dtdz_d, &
689 c%dxdr_d, c%dydr_d, c%dzdr_d, c%dxds_d, c%dyds_d, c%dzds_d, &
690 c%dxdt_d, c%dydt_d, c%dzdt_d, c%Xh%dx_d, c%Xh%dy_d, c%Xh%dz_d, &
691 c%dof%x_d, c%dof%y_d, c%dof%z_d, c%jacinv_d, c%jac_d, &
692 c%Xh%lx, c%msh%nelv)
693
694 call device_memcpy(dxdr, c%dxdr_d, ntot, device_to_host, sync=.false.)
695 call device_memcpy(dydr, c%dydr_d, ntot, device_to_host, sync=.false.)
696 call device_memcpy(dzdr, c%dzdr_d, ntot, device_to_host, sync=.false.)
697 call device_memcpy(dxds, c%dxds_d, ntot, device_to_host, sync=.false.)
698 call device_memcpy(dyds, c%dyds_d, ntot, device_to_host, sync=.false.)
699 call device_memcpy(dzds, c%dzds_d, ntot, device_to_host, sync=.false.)
700 call device_memcpy(dxdt, c%dxdt_d, ntot, device_to_host, sync=.false.)
701 call device_memcpy(dydt, c%dydt_d, ntot, device_to_host, sync=.false.)
702 call device_memcpy(dzdt, c%dzdt_d, ntot, device_to_host, sync=.false.)
703 call device_memcpy(drdx, c%drdx_d, ntot, device_to_host, sync=.false.)
704 call device_memcpy(drdy, c%drdy_d, ntot, device_to_host, sync=.false.)
705 call device_memcpy(drdz, c%drdz_d, ntot, device_to_host, sync=.false.)
706 call device_memcpy(dsdx, c%dsdx_d, ntot, device_to_host, sync=.false.)
707 call device_memcpy(dsdy, c%dsdy_d, ntot, device_to_host, sync=.false.)
708 call device_memcpy(dsdz, c%dsdz_d, ntot, device_to_host, sync=.false.)
709 call device_memcpy(dtdx, c%dtdx_d, ntot, device_to_host, sync=.false.)
710 call device_memcpy(dtdy, c%dtdy_d, ntot, device_to_host, sync=.false.)
711 call device_memcpy(dtdz, c%dtdz_d, ntot, device_to_host, sync=.false.)
712 call device_memcpy(jac, c%jac_d, ntot, device_to_host, sync=.false.)
713 call device_memcpy(jacinv, c%jacinv_d, ntot, &
714 device_to_host, sync=.true.)
715
716 else
717 do e = 1, c%msh%nelv
718 call mxm(dx, lx, x(1,1,1,e), lx, dxdr(1,1,1,e), lyz)
719 call mxm(dx, lx, y(1,1,1,e), lx, dydr(1,1,1,e), lyz)
720 call mxm(dx, lx, z(1,1,1,e), lx, dzdr(1,1,1,e), lyz)
721
722 do i = 1, lz
723 call mxm(x(1,1,i,e), lx, dyt, ly, dxds(1,1,i,e), ly)
724 call mxm(y(1,1,i,e), lx, dyt, ly, dyds(1,1,i,e), ly)
725 call mxm(z(1,1,i,e), lx, dyt, ly, dzds(1,1,i,e), ly)
726 end do
727
728 ! We actually take 2d into account, wow, need to do that for the rest.
729 if(c%msh%gdim .eq. 3) then
730 call mxm(x(1,1,1,e), lxy, dzt, lz, dxdt(1,1,1,e), lz)
731 call mxm(y(1,1,1,e), lxy, dzt, lz, dydt(1,1,1,e), lz)
732 call mxm(z(1,1,1,e), lxy, dzt, lz, dzdt(1,1,1,e), lz)
733 else
734 call rzero(dxdt(1,1,1,e), lxy)
735 call rzero(dydt(1,1,1,e), lxy)
736 call rone(dzdt(1,1,1,e), lxy)
737 end if
738 end do
739
740 if (c%msh%gdim .eq. 2) then
741 call rzero (jac, ntot)
742 call addcol3 (jac, dxdr, dyds, ntot)
743 call subcol3 (jac, dxds, dydr, ntot)
744 call copy (drdx, dyds, ntot)
745 call copy (drdy, dxds, ntot)
746 call chsign (drdy, ntot)
747 call copy (dsdx, dydr, ntot)
748 call chsign (dsdx, ntot)
749 call copy (dsdy, dxdr, ntot)
750 call rzero (drdz, ntot)
751 call rzero (dsdz, ntot)
752 call rone (dtdz, ntot)
753 else
754
755 do i = 1, ntot
756 c%jac(i, 1, 1, 1) = 0.0_rp
757 end do
758
759 do i = 1, ntot
760 c%jac(i, 1, 1, 1) = c%jac(i, 1, 1, 1) + ( c%dxdr(i, 1, 1, 1) &
761 * c%dyds(i, 1, 1, 1) * c%dzdt(i, 1, 1, 1) )
762
763 c%jac(i, 1, 1, 1) = c%jac(i, 1, 1, 1) + ( c%dxdt(i, 1, 1, 1) &
764 * c%dydr(i, 1, 1, 1) * c%dzds(i, 1, 1, 1) )
765
766 c%jac(i, 1, 1, 1) = c%jac(i, 1, 1, 1) + ( c%dxds(i, 1, 1, 1) &
767 * c%dydt(i, 1, 1, 1) * c%dzdr(i, 1, 1, 1) )
768 end do
769
770 do i = 1, ntot
771 c%jac(i, 1, 1, 1) = c%jac(i, 1, 1, 1) - ( c%dxdr(i, 1, 1, 1) &
772 * c%dydt(i, 1, 1, 1) * c%dzds(i, 1, 1, 1) )
773
774 c%jac(i, 1, 1, 1) = c%jac(i, 1, 1, 1) - ( c%dxds(i, 1, 1, 1) &
775 * c%dydr(i, 1, 1, 1) * c%dzdt(i, 1, 1, 1) )
776
777 c%jac(i, 1, 1, 1) = c%jac(i, 1, 1, 1) - ( c%dxdt(i, 1, 1, 1) &
778 * c%dyds(i, 1, 1, 1) * c%dzdr(i, 1, 1, 1) )
779 end do
780
781 do i = 1, ntot
782 c%drdx(i, 1, 1, 1) = c%dyds(i, 1, 1, 1) * c%dzdt(i, 1, 1, 1) &
783 - c%dydt(i, 1, 1, 1) * c%dzds(i, 1, 1, 1)
784
785 c%drdy(i, 1, 1, 1) = c%dxdt(i, 1, 1, 1) * c%dzds(i, 1, 1, 1) &
786 - c%dxds(i, 1, 1, 1) * c%dzdt(i, 1, 1, 1)
787
788 c%drdz(i, 1, 1, 1) = c%dxds(i, 1, 1, 1) * c%dydt(i, 1, 1, 1) &
789 - c%dxdt(i, 1, 1, 1) * c%dyds(i, 1, 1, 1)
790 end do
791
792 do i = 1, ntot
793 c%dsdx(i, 1, 1, 1) = c%dydt(i, 1, 1, 1) * c%dzdr(i, 1, 1, 1) &
794 - c%dydr(i, 1, 1, 1) * c%dzdt(i, 1, 1, 1)
795
796 c%dsdy(i, 1, 1, 1) = c%dxdr(i, 1, 1, 1) * c%dzdt(i, 1, 1, 1) &
797 - c%dxdt(i, 1, 1, 1) * c%dzdr(i, 1, 1, 1)
798
799 c%dsdz(i, 1, 1, 1) = c%dxdt(i, 1, 1, 1) * c%dydr(i, 1, 1, 1) &
800 - c%dxdr(i, 1, 1, 1) * c%dydt(i, 1, 1, 1)
801 end do
802
803 do i = 1, ntot
804 c%dtdx(i, 1, 1, 1) = c%dydr(i, 1, 1, 1) * c%dzds(i, 1, 1, 1) &
805 - c%dyds(i, 1, 1, 1) * c%dzdr(i, 1, 1, 1)
806
807 c%dtdy(i, 1, 1, 1) = c%dxds(i, 1, 1, 1) * c%dzdr(i, 1, 1, 1) &
808 - c%dxdr(i, 1, 1, 1) * c%dzds(i, 1, 1, 1)
809
810 c%dtdz(i, 1, 1, 1) = c%dxdr(i, 1, 1, 1) * c%dyds(i, 1, 1, 1) &
811 - c%dxds(i, 1, 1, 1) * c%dydr(i, 1, 1, 1)
812 end do
813
814 end if
815 call invers2(jacinv, jac, ntot)
816 end if
817 end associate
818
819 end subroutine coef_generate_dxyzdrst
820
823 subroutine coef_generate_geo(c)
824 type(coef_t), intent(inout) :: c
825 integer :: e, i, lxyz, ntot
826
827 lxyz = c%Xh%lx * c%Xh%ly * c%Xh%lz
828 ntot = c%dof%size()
829
830 if (neko_bcknd_device .eq. 1) then
831
832 call device_coef_generate_geo(c%G11_d, c%G12_d, c%G13_d, &
833 c%G22_d, c%G23_d, c%G33_d, &
834 c%drdx_d, c%drdy_d, c%drdz_d, &
835 c%dsdx_d, c%dsdy_d, c%dsdz_d, &
836 c%dtdx_d, c%dtdy_d, c%dtdz_d, &
837 c%jacinv_d, c%Xh%w3_d, c%msh%nelv, &
838 c%Xh%lx, c%msh%gdim)
839
840 call device_memcpy(c%G11, c%G11_d, ntot, device_to_host, sync=.false.)
841 call device_memcpy(c%G22, c%G22_d, ntot, device_to_host, sync=.false.)
842 call device_memcpy(c%G33, c%G33_d, ntot, device_to_host, sync=.false.)
843 call device_memcpy(c%G12, c%G12_d, ntot, device_to_host, sync=.false.)
844 call device_memcpy(c%G13, c%G13_d, ntot, device_to_host, sync=.false.)
845 call device_memcpy(c%G23, c%G23_d, ntot, device_to_host, sync=.true.)
846
847 else
848 if(c%msh%gdim .eq. 2) then
849
850 do i = 1, ntot
851 c%G11(i, 1, 1, 1) = c%drdx(i, 1, 1, 1) * c%drdx(i, 1, 1, 1) &
852 + c%drdy(i, 1, 1, 1) * c%drdy(i, 1, 1, 1)
853
854 c%G22(i, 1, 1, 1) = c%dsdx(i, 1, 1, 1) * c%dsdx(i, 1, 1, 1) &
855 + c%dsdy(i, 1, 1, 1) * c%dsdy(i, 1, 1, 1)
856
857 c%G12(i, 1, 1, 1) = c%drdx(i, 1, 1, 1) * c%dsdx(i, 1, 1, 1) &
858 + c%drdy(i, 1, 1, 1) * c%dsdy(i, 1, 1, 1)
859 end do
860
861 do i = 1, ntot
862 c%G11(i, 1, 1, 1) = c%G11(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
863 c%G22(i, 1, 1, 1) = c%G22(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
864 c%G12(i, 1, 1, 1) = c%G12(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
865 c%G33(i, 1, 1, 1) = 0.0_rp
866 c%G13(i, 1, 1, 1) = 0.0_rp
867 c%G23(i, 1, 1, 1) = 0.0_rp
868 end do
869
870 do concurrent(e = 1:c%msh%nelv)
871 do concurrent(i = 1:lxyz)
872 c%G11(i,1,1,e) = c%G11(i,1,1,e) * c%Xh%w3(i,1,1)
873 c%G22(i,1,1,e) = c%G22(i,1,1,e) * c%Xh%w3(i,1,1)
874 c%G12(i,1,1,e) = c%G12(i,1,1,e) * c%Xh%w3(i,1,1)
875 end do
876 end do
877
878 else
879
880 do i = 1, ntot
881 c%G11(i, 1, 1, 1) = c%drdx(i, 1, 1, 1) * c%drdx(i, 1, 1, 1) &
882 + c%drdy(i, 1, 1, 1) * c%drdy(i, 1, 1, 1) &
883 + c%drdz(i, 1, 1, 1) * c%drdz(i, 1, 1, 1)
884
885 c%G22(i, 1, 1, 1) = c%dsdx(i, 1, 1, 1) * c%dsdx(i, 1, 1, 1) &
886 + c%dsdy(i, 1, 1, 1) * c%dsdy(i, 1, 1, 1) &
887 + c%dsdz(i, 1, 1, 1) * c%dsdz(i, 1, 1, 1)
888
889 c%G33(i, 1, 1, 1) = c%dtdx(i, 1, 1, 1) * c%dtdx(i, 1, 1, 1) &
890 + c%dtdy(i, 1, 1, 1) * c%dtdy(i, 1, 1, 1) &
891 + c%dtdz(i, 1, 1, 1) * c%dtdz(i, 1, 1, 1)
892 end do
893
894 do i = 1, ntot
895 c%G11(i, 1, 1, 1) = c%G11(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
896 c%G22(i, 1, 1, 1) = c%G22(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
897 c%G33(i, 1, 1, 1) = c%G33(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
898 end do
899
900 do i = 1, ntot
901 c%G12(i, 1, 1, 1) = c%drdx(i, 1, 1, 1) * c%dsdx(i, 1, 1, 1) &
902 + c%drdy(i, 1, 1, 1) * c%dsdy(i, 1, 1, 1) &
903 + c%drdz(i, 1, 1, 1) * c%dsdz(i, 1, 1, 1)
904
905 c%G13(i, 1, 1, 1) = c%drdx(i, 1, 1, 1) * c%dtdx(i, 1, 1, 1) &
906 + c%drdy(i, 1, 1, 1) * c%dtdy(i, 1, 1, 1) &
907 + c%drdz(i, 1, 1, 1) * c%dtdz(i, 1, 1, 1)
908
909 c%G23(i, 1, 1, 1) = c%dsdx(i, 1, 1, 1) * c%dtdx(i, 1, 1, 1) &
910 + c%dsdy(i, 1, 1, 1) * c%dtdy(i, 1, 1, 1) &
911 + c%dsdz(i, 1, 1, 1) * c%dtdz(i, 1, 1, 1)
912 end do
913
914 do i = 1, ntot
915 c%G12(i, 1, 1, 1) = c%G12(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
916 c%G13(i, 1, 1, 1) = c%G13(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
917 c%G23(i, 1, 1, 1) = c%G23(i, 1, 1, 1) * c%jacinv(i, 1, 1, 1)
918 end do
919
920 do concurrent(e = 1:c%msh%nelv)
921 do concurrent(i = 1:lxyz)
922 c%G11(i,1,1,e) = c%G11(i,1,1,e) * c%Xh%w3(i,1,1)
923 c%G22(i,1,1,e) = c%G22(i,1,1,e) * c%Xh%w3(i,1,1)
924 c%G12(i,1,1,e) = c%G12(i,1,1,e) * c%Xh%w3(i,1,1)
925
926 c%G33(i,1,1,e) = c%G33(i,1,1,e) * c%Xh%w3(i,1,1)
927 c%G13(i,1,1,e) = c%G13(i,1,1,e) * c%Xh%w3(i,1,1)
928 c%G23(i,1,1,e) = c%G23(i,1,1,e) * c%Xh%w3(i,1,1)
929 end do
930 end do
931
932 end if
933 end if
934
935 end subroutine coef_generate_geo
936
939 subroutine coef_generate_mass(c)
940 type(coef_t), intent(inout) :: c
941 integer :: e, i, lxyz, ntot
942
943 lxyz = c%Xh%lx * c%Xh%ly * c%Xh%lz
944 ntot = c%dof%size()
945
947 do concurrent(e = 1:c%msh%nelv)
948 ! Here we need to handle things differently for axis symmetric elements
949 do concurrent(i = 1:lxyz)
950 c%B(i,1,1,e) = c%jac(i,1,1,e) * c%Xh%w3(i,1,1)
951 c%Binv(i,1,1,e) = c%B(i,1,1,e)
952 end do
953 end do
954
955 if (neko_bcknd_device .eq. 1) then
956 call device_memcpy(c%B, c%B_d, ntot, host_to_device, sync=.false.)
957 call device_memcpy(c%Binv, c%Binv_d, ntot, host_to_device, sync=.false.)
958 end if
959
960 call c%gs_h%op(c%Binv, ntot, gs_op_add)
961
962 if (neko_bcknd_device .eq. 1) then
963 call device_invcol1(c%Binv_d, ntot)
964 call device_memcpy(c%Binv, c%Binv_d, ntot, device_to_host, sync=.true.)
965 else
966 call invcol1(c%Binv, ntot)
967 end if
968
970 if (neko_bcknd_device .eq. 1) then
971 c%volume = device_glsum(c%B_d, ntot)
972 else
973 c%volume = glsum(c%B, ntot)
974 end if
975
976 end subroutine coef_generate_mass
977
978 pure function coef_get_normal(this, i, j, k, e, facet) result(normal)
979 class(coef_t), intent(in) :: this
980 integer, intent(in) :: i, j, k, e, facet
981 real(kind=rp) :: normal(3)
982
983 select case (facet)
984 case(1,2)
985 normal(1) = this%nx(j, k, facet, e)
986 normal(2) = this%ny(j, k, facet, e)
987 normal(3) = this%nz(j, k, facet, e)
988 case(3,4)
989 normal(1) = this%nx(i, k, facet, e)
990 normal(2) = this%ny(i, k, facet, e)
991 normal(3) = this%nz(i, k, facet, e)
992 case(5,6)
993 normal(1) = this%nx(i, j, facet, e)
994 normal(2) = this%ny(i, j, facet, e)
995 normal(3) = this%nz(i, j, facet, e)
996 end select
997 end function coef_get_normal
998
999 pure function coef_get_area(this, i, j, k, e, facet) result(area)
1000 class(coef_t), intent(in) :: this
1001 integer, intent(in) :: i, j, k, e, facet
1002 real(kind=rp) :: area
1003
1004 select case (facet)
1005 case(1,2)
1006 area = this%area(j, k, facet, e)
1007 case(3,4)
1008 area = this%area(i, k, facet, e)
1009 case(5,6)
1010 area = this%area(i, j, facet, e)
1011 end select
1012 end function coef_get_area
1013
1014
1017 type(coef_t), intent(inout) :: coef
1018 real(kind=rp), allocatable :: a(:,:,:,:)
1019 real(kind=rp), allocatable :: b(:,:,:,:)
1020 real(kind=rp), allocatable :: c(:,:,:,:)
1021 real(kind=rp), allocatable :: dot(:,:,:,:)
1022 integer :: n, e, i, j, k, lx
1023 real(kind=rp) :: weight, len
1024 n = coef%dof%size()
1025 lx = coef%Xh%lx
1026
1027 allocate(a(coef%Xh%lx, coef%Xh%lx, coef%Xh%lx, coef%msh%nelv))
1028 allocate(b(coef%Xh%lx, coef%Xh%lx, coef%Xh%lx, coef%msh%nelv))
1029 allocate(c(coef%Xh%lx, coef%Xh%lx, coef%Xh%lx, coef%msh%nelv))
1030 allocate(dot(coef%Xh%lx, coef%Xh%lx, coef%Xh%lx, coef%msh%nelv))
1031
1032 ! ds x dt
1033 do i = 1, n
1034 a(i, 1, 1, 1) = coef%dyds(i, 1, 1, 1) * coef%dzdt(i, 1, 1, 1) &
1035 - coef%dzds(i, 1, 1, 1) * coef%dydt(i, 1, 1, 1)
1036
1037 b(i, 1, 1, 1) = coef%dzds(i, 1, 1, 1) * coef%dxdt(i, 1, 1, 1) &
1038 - coef%dxds(i, 1, 1, 1) * coef%dzdt(i, 1, 1, 1)
1039
1040 c(i, 1, 1, 1) = coef%dxds(i, 1, 1, 1) * coef%dydt(i, 1, 1, 1) &
1041 - coef%dyds(i, 1, 1, 1) * coef%dxdt(i, 1, 1, 1)
1042 end do
1043
1044 do i = 1, n
1045 dot(i, 1, 1, 1) = a(i, 1, 1, 1) * a(i, 1, 1, 1) &
1046 + b(i, 1, 1, 1) * b(i, 1, 1, 1) &
1047 + c(i, 1, 1, 1) * c(i, 1, 1, 1)
1048 end do
1049
1050 do concurrent(e = 1:coef%msh%nelv)
1051 do concurrent(k = 1:coef%Xh%lx)
1052 do concurrent(j = 1:coef%Xh%lx)
1053 weight = coef%Xh%wy(j) * coef%Xh%wz(k)
1054 coef%area(j, k, 2, e) = sqrt(dot(lx, j, k, e)) * weight
1055 coef%area(j, k, 1, e) = sqrt(dot(1, j, k, e)) * weight
1056 coef%nx(j,k, 1, e) = -a(1, j, k, e)
1057 coef%nx(j,k, 2, e) = a(lx, j, k, e)
1058 coef%ny(j,k, 1, e) = -b(1, j, k, e)
1059 coef%ny(j,k, 2, e) = b(lx, j, k, e)
1060 coef%nz(j,k, 1, e) = -c(1, j, k, e)
1061 coef%nz(j,k, 2, e) = c(lx, j, k, e)
1062 end do
1063 end do
1064 end do
1065
1066 ! dr x dt
1067 do i = 1, n
1068 a(i, 1, 1, 1) = coef%dydr(i, 1, 1, 1) * coef%dzdt(i, 1, 1, 1) &
1069 - coef%dzdr(i, 1, 1, 1) * coef%dydt(i, 1, 1, 1)
1070
1071 b(i, 1, 1, 1) = coef%dzdr(i, 1, 1, 1) * coef%dxdt(i, 1, 1, 1) &
1072 - coef%dxdr(i, 1, 1, 1) * coef%dzdt(i, 1, 1, 1)
1073
1074 c(i, 1, 1, 1) = coef%dxdr(i, 1, 1, 1) * coef%dydt(i, 1, 1, 1) &
1075 - coef%dydr(i, 1, 1, 1) * coef%dxdt(i, 1, 1, 1)
1076 end do
1077
1078 do i = 1, n
1079 dot(i, 1, 1, 1) = a(i, 1, 1, 1) * a(i, 1, 1, 1) &
1080 + b(i, 1, 1, 1) * b(i, 1, 1, 1) &
1081 + c(i, 1, 1, 1) * c(i, 1, 1, 1)
1082 end do
1083
1084 do concurrent(e = 1:coef%msh%nelv)
1085 do concurrent(k = 1:coef%Xh%lx)
1086 do concurrent(j = 1:coef%Xh%lx)
1087 weight = coef%Xh%wx(j) * coef%Xh%wz(k)
1088 coef%area(j, k, 3, e) = sqrt(dot(j, 1, k, e)) * weight
1089 coef%area(j, k, 4, e) = sqrt(dot(j, lx, k, e)) * weight
1090 coef%nx(j,k, 3, e) = a(j, 1, k, e)
1091 coef%nx(j,k, 4, e) = -a(j, lx, k, e)
1092 coef%ny(j,k, 3, e) = b(j, 1, k, e)
1093 coef%ny(j,k, 4, e) = -b(j, lx, k, e)
1094 coef%nz(j,k, 3, e) = c(j, 1, k, e)
1095 coef%nz(j,k, 4, e) = -c(j, lx, k, e)
1096 end do
1097 end do
1098 end do
1099
1100 ! dr x ds
1101 do i = 1, n
1102 a(i, 1, 1, 1) = coef%dydr(i, 1, 1, 1) * coef%dzds(i, 1, 1, 1) &
1103 - coef%dzdr(i, 1, 1, 1) * coef%dyds(i, 1, 1, 1)
1104
1105 b(i, 1, 1, 1) = coef%dzdr(i, 1, 1, 1) * coef%dxds(i, 1, 1, 1) &
1106 - coef%dxdr(i, 1, 1, 1) * coef%dzds(i, 1, 1, 1)
1107
1108 c(i, 1, 1, 1) = coef%dxdr(i, 1, 1, 1) * coef%dyds(i, 1, 1, 1) &
1109 - coef%dydr(i, 1, 1, 1) * coef%dxds(i, 1, 1, 1)
1110 end do
1111
1112 do i = 1, n
1113 dot(i, 1, 1, 1) = a(i, 1, 1, 1) * a(i, 1, 1, 1) &
1114 + b(i, 1, 1, 1) * b(i, 1, 1, 1) &
1115 + c(i, 1, 1, 1) * c(i, 1, 1, 1)
1116 end do
1117
1118 do concurrent(e = 1:coef%msh%nelv)
1119 do concurrent(k = 1:coef%Xh%lx)
1120 do concurrent(j = 1:coef%Xh%lx)
1121 weight = coef%Xh%wx(j) * coef%Xh%wy(k)
1122 coef%area(j, k, 5, e) = sqrt(dot(j, k, 1, e)) * weight
1123 coef%area(j, k, 6, e) = sqrt(dot(j, k, lx, e)) * weight
1124 coef%nx(j,k, 5, e) = -a(j, k, 1, e)
1125 coef%nx(j,k, 6, e) = a(j, k, lx, e)
1126 coef%ny(j,k, 5, e) = -b(j, k, 1, e)
1127 coef%ny(j,k, 6, e) = b(j, k, lx, e)
1128 coef%nz(j,k, 5, e) = -c(j, k, 1, e)
1129 coef%nz(j,k, 6, e) = c(j, k, lx, e)
1130 end do
1131 end do
1132 end do
1133
1134 ! Normalize
1135 do j = 1, size(coef%nz)
1136 len = sqrt(coef%nx(j,1,1,1)**2 + &
1137 coef%ny(j,1,1,1)**2 + coef%nz(j,1,1,1)**2)
1138 if (len .gt. neko_eps) then
1139 coef%nx(j,1,1,1) = coef%nx(j,1,1,1) / len
1140 coef%ny(j,1,1,1) = coef%ny(j,1,1,1) / len
1141 coef%nz(j,1,1,1) = coef%nz(j,1,1,1) / len
1142 end if
1143 end do
1144
1145 deallocate(dot)
1146 deallocate(c)
1147 deallocate(b)
1148 deallocate(a)
1150 if (neko_bcknd_device .eq. 1) then
1151 n = size(coef%area)
1152 call device_memcpy(coef%area, coef%area_d, n, &
1153 host_to_device, sync=.false.)
1154 call device_memcpy(coef%nx, coef%nx_d, n, &
1155 host_to_device, sync=.false.)
1156 call device_memcpy(coef%ny, coef%ny_d, n, &
1157 host_to_device, sync=.false.)
1158 call device_memcpy(coef%nz, coef%nz_d, n, &
1159 host_to_device, sync=.false.)
1160 end if
1161
1162 end subroutine coef_generate_area_and_normal
1163
1164
1165end module coefs
Map a Fortran array to a device (allocate and associate)
Definition device.F90:57
Copy data between host and device (or device and device)
Definition device.F90:51
Coefficients.
Definition coef.f90:34
subroutine coef_generate_geo(c)
Generate geometric data for the given mesh.
Definition coef.f90:824
subroutine coef_free(this)
Deallocate coefficients.
Definition coef.f90:371
pure real(kind=rp) function coef_get_area(this, i, j, k, e, facet)
Definition coef.f90:1000
pure real(kind=rp) function, dimension(3) coef_get_normal(this, i, j, k, e, facet)
Definition coef.f90:979
subroutine coef_generate_dxyzdrst(c)
Definition coef.f90:666
subroutine coef_generate_area_and_normal(coef)
Generate facet area and surface normals.
Definition coef.f90:1017
subroutine coef_init_empty(this, xh, msh)
Initialize empty coefs for a space and a mesh.
Definition coef.f90:160
subroutine coef_init_all(this, gs_h)
Initialize coefficients.
Definition coef.f90:206
subroutine coef_generate_mass(c)
Generate mass matrix B for the given mesh and space.
Definition coef.f90:940
subroutine, public device_coef_generate_dxydrst(drdx_d, drdy_d, drdz_d, dsdx_d, dsdy_d, dsdz_d, dtdx_d, dtdy_d, dtdz_d, dxdr_d, dydr_d, dzdr_d, dxds_d, dyds_d, dzds_d, dxdt_d, dydt_d, dzdt_d, dx_d, dy_d, dz_d, x_d, y_d, z_d, jacinv_d, jac_d, lx, nel)
subroutine, public device_coef_generate_geo(g11_d, g12_d, g13_d, g22_d, g23_d, g33_d, drdx_d, drdy_d, drdz_d, dsdx_d, dsdy_d, dsdz_d, dtdx_d, dtdy_d, dtdz_d, jacinv_d, w3_d, nel, lx, gdim)
subroutine, public device_rone(a_d, n)
Set all elements to one.
subroutine, public device_invcol1(a_d, n)
Invert a vector .
real(kind=rp) function, public device_glsum(a_d, n)
Sum a vector of length n.
Device abstraction, common interface for various accelerators.
Definition device.F90:34
subroutine, public device_free(x_d)
Deallocate memory on the device.
Definition device.F90:185
integer, parameter, public device_to_host
Definition device.F90:47
Defines a mapping of the degrees of freedom.
Definition dofmap.f90:35
Gather-scatter.
Defines Gather-scatter operations.
Definition gs_ops.f90:34
integer, parameter, public gs_op_add
Definition gs_ops.f90:36
Definition math.f90:60
subroutine, public invers2(a, b, n)
Compute inverted vector .
Definition math.f90:499
subroutine, public subcol3(a, b, c, n)
Returns .
Definition math.f90:755
subroutine, public rone(a, n)
Set all elements to one.
Definition math.f90:227
real(kind=rp) function, public glsum(a, n)
Sum a vector of length n.
Definition math.f90:359
subroutine, public addcol3(a, b, c, n)
Returns .
Definition math.f90:800
subroutine, public invcol1(a, n)
Invert a vector .
Definition math.f90:474
subroutine, public chsign(a, n)
Change sign of vector .
Definition math.f90:439
subroutine, public copy(a, b, n)
Copy a vector .
Definition math.f90:238
real(kind=rp), parameter, public neko_eps
Machine epsilon .
Definition math.f90:69
subroutine, public rzero(a, n)
Zero a real vector.
Definition math.f90:194
Defines a mesh.
Definition mesh.f90:34
Wrapper for all matrix-matrix product implementations.
subroutine, public mxm(a, n1, b, n2, c, n3)
Compute matrix-matrix product for contiguously packed matrices A,B, and C.
Build configurations.
integer, parameter neko_bcknd_device
integer, parameter, public rp
Global precision used in computations.
Definition num_types.f90:12
Defines a function space.
Definition space.f90:34
Coefficients defined on a given (mesh, ) tuple. Arrays use indices (i,j,k,e): element e,...
Definition coef.f90:55
The function space for the SEM solution fields.
Definition space.f90:62